HiveBrain v1.2.0
Get Started
← Back to all entries
patternpythonMinor

Random playlist builder is slow

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
randomplaylistslowbuilder

Problem

I wrote a Python script to scan my music library, randomly select 50 songs, and play them in VLC media player. It works great, just a little slow (takes several seconds to launch).

I know that walking through my entire music library (thousands of songs) is a brute force (slow) way to do it. What would be a faster approach?

I would also appreciate any suggestions on syntax or Pythonic improvements.

from subprocess import Popen
import os, sys
import random

# http://ubuntuforums.org/showthread.php?t=2136340
# http://stackoverflow.com/questions/19859840/excluding-directories-in-os-walk
def scanfolder(rootpath, exclude_dirs, file_ext):
    musiclist = []
    for path, dirs, files in os.walk(rootpath):
        dirs[:] = [d for d in dirs if d not in exclude_dirs]
        for f in files:
            if f.endswith(file_ext):
                mp3 = os.path.join(path, f)
                musiclist.append(mp3)
    return musiclist

# input variables
vlcpath = r'C:\Program Files (x86)\VideoLAN\VLC\vlc.exe'
musicpath = r'G:\Music Collection'
excludedirectories = ['_baby music','Buble, Michael','ballroom dancing']
filetype = r'.mp3'
len_playlist = 50

# run script
print 'party on... filling glasses!'
mp3_list = scanfolder(musicpath, excludedirectories, filetype)
playlist = random.sample( mp3_list, min(len_playlist, len(mp3_list)) )
Popen([vlcpath] + playlist)
print 'done: added %i items from library containing %i \'%s\' files' % (len(playlist), len(mp3_list), filetype)

Solution

This is a different approach to the problem. Here is the idea:

I am making the assumption that your music collection does not change too much between start-ups of your script and that all your songs are longer than it takes to scan your entire music collection.

-
Introduce a function to save the mp3_list to a file

-
On start-up look for that file and start playing a first random song (where we need to check that it still exists) from that file if it exists.

-
Meanwhile or if the file does not exist yet, (re-)scan the music collection in the background, while the first song is playing and write it back to the file once it is finished.

-
After the first song, just play n - 1 songs from the now up-to-date music database (if the scanning is finished, otherwise use the old file).

This approach might be slower than your approach, if many music files disappeared since the last start. It might fail if all of them disappeared (but we can check for that).

This approach has a different random distribution for the first song. (Consider that you only had one song in your directory during the first start of the script, and 1000 now. It will chose that one song on start-up the next time).

And here is the code. I used the python bindings of vlc, which are available here from their official github. Just download vlc.py and save it in the same directory as the script.

The functions read_db and save_db should be self-explanatory. get_existing_file gives one random track from mp3_list, making sure it actually exists.

When calling media_player.play(), this starts the playback in the background, so we are free to scan the folders now and save the new state.

After that I included some rudimentary playback controls.

By default, the player will continue looping through the playlist. Type stop, followed by Enter to stop it and exit the script.

This could use some more encapsulation of the methods, but it is a start.

import os
import sys
import random
import vlc
import time

# http://ubuntuforums.org/showthread.php?t=2136340
# http://stackoverflow.com/questions/19859840/excluding-directories-in-os-walk

def scan_folder(rootpath, exclude_dirs, file_ext):
    musiclist = []
    for path, dirs, files in os.walk(rootpath):
        dirs[:] = [d for d in dirs if d not in exclude_dirs]
        for f in files:
            if f.endswith(file_ext):
                mp3 = os.path.join(path, f)
                musiclist.append(mp3)
    return musiclist

def save_db(mp3_list, file_name):
    """Write `mp3_list` to `file_name`"""
    with open(file_name, "w") as db_file:
        db_file.write("\n".join(mp3_list))

def read_db(file_name):
    """read `mp3_list` from `file_name`"""
    mp3_list = []
    with open(file_name) as db_file:
        for line in db_file:
            mp3_list.append(line.strip())
    return mp3_list

def get_existing_file(mp3_list):
    """Get a random file name from `mp3_list` that exists"""
    first_song = random.choice(mp3_list)
    while not os.path.isfile(first_song):
        first_song = random.choice(mp3_list)
    return first_song

if __name__ == "__main__":
    # input variables
    vlcpath = r'/usr/bin/vlc'
    musicpath = r'/home/andreas/Musik'
    excludedirectories = {'_baby music', 'Buble, Michael', 'ballroom dancing'}
    db_file = os.path.join(os.getenv('HOME'), ".music.db")
    filetype = r'.mp3'
    len_playlist = 50

    # get mp3_list
    print 'party on... filling glasses!'
    try:
        mp3_list = read_db(db_file)
    except IOError:
        mp3_list = scan_folder(musicpath, excludedirectories, filetype)
        save_db(mp3_list, db_file)

    # setup vlc
    vlc_instance = vlc.Instance()
    media_player = vlc_instance.media_list_player_new()

    playlist = vlc_instance.media_list_new([get_existing_file(mp3_list)])
    media_player.set_media_list(playlist)
    media_player.play()

    # Rescan library
    print "Scanning music library for changes..."
    mp3_list = scan_folder(musicpath, excludedirectories, filetype)
    save_db(mp3_list, db_file)
    # time.sleep(10)
    print "done"

    # add songs to playlist
    all_songs = [first_song] + \
        random.sample(mp3_list, min(len(mp3_list), len_playlist - 1))
    playlist = vlc_instance.media_list_new(all_songs)
    media_player.set_media_list(playlist)

    # add rudimentary playback controls
    actions = {"next": media_player.next,
               "previous": media_player.previous,
               "pause": media_player.pause,
               "stop": sys.exit}
    # keep alive
    while True:
        key_pressed = raw_input("enter next, previous, pause or stop: ")
        if key_pressed in actions:
            actions[key_pressed]()


As a next iteration, I encapsulated all media_player functionality into a VLCPlayer class and all functionality surrounding the mp3_list into a MediaLibrary class.

```
import os
import sys
import random
import vlc
import time

# http://ubuntuforums.org/showthrea

Code Snippets

import os
import sys
import random
import vlc
import time

# http://ubuntuforums.org/showthread.php?t=2136340
# http://stackoverflow.com/questions/19859840/excluding-directories-in-os-walk


def scan_folder(rootpath, exclude_dirs, file_ext):
    musiclist = []
    for path, dirs, files in os.walk(rootpath):
        dirs[:] = [d for d in dirs if d not in exclude_dirs]
        for f in files:
            if f.endswith(file_ext):
                mp3 = os.path.join(path, f)
                musiclist.append(mp3)
    return musiclist


def save_db(mp3_list, file_name):
    """Write `mp3_list` to `file_name`"""
    with open(file_name, "w") as db_file:
        db_file.write("\n".join(mp3_list))


def read_db(file_name):
    """read `mp3_list` from `file_name`"""
    mp3_list = []
    with open(file_name) as db_file:
        for line in db_file:
            mp3_list.append(line.strip())
    return mp3_list


def get_existing_file(mp3_list):
    """Get a random file name from `mp3_list` that exists"""
    first_song = random.choice(mp3_list)
    while not os.path.isfile(first_song):
        first_song = random.choice(mp3_list)
    return first_song

if __name__ == "__main__":
    # input variables
    vlcpath = r'/usr/bin/vlc'
    musicpath = r'/home/andreas/Musik'
    excludedirectories = {'_baby music', 'Buble, Michael', 'ballroom dancing'}
    db_file = os.path.join(os.getenv('HOME'), ".music.db")
    filetype = r'.mp3'
    len_playlist = 50

    # get mp3_list
    print 'party on... filling glasses!'
    try:
        mp3_list = read_db(db_file)
    except IOError:
        mp3_list = scan_folder(musicpath, excludedirectories, filetype)
        save_db(mp3_list, db_file)

    # setup vlc
    vlc_instance = vlc.Instance()
    media_player = vlc_instance.media_list_player_new()

    playlist = vlc_instance.media_list_new([get_existing_file(mp3_list)])
    media_player.set_media_list(playlist)
    media_player.play()

    # Rescan library
    print "Scanning music library for changes..."
    mp3_list = scan_folder(musicpath, excludedirectories, filetype)
    save_db(mp3_list, db_file)
    # time.sleep(10)
    print "done"

    # add songs to playlist
    all_songs = [first_song] + \
        random.sample(mp3_list, min(len(mp3_list), len_playlist - 1))
    playlist = vlc_instance.media_list_new(all_songs)
    media_player.set_media_list(playlist)

    # add rudimentary playback controls
    actions = {"next": media_player.next,
               "previous": media_player.previous,
               "pause": media_player.pause,
               "stop": sys.exit}
    # keep alive
    while True:
        key_pressed = raw_input("enter next, previous, pause or stop: ")
        if key_pressed in actions:
            actions[key_pressed]()
import os
import sys
import random
import vlc
import time

# http://ubuntuforums.org/showthread.php?t=2136340
# http://stackoverflow.com/questions/19859840/excluding-directories-in-os-walk


class VLCPlayer():

    def __init__(self):
        self.vlc_instance = vlc.Instance()
        self.media_player = self.vlc_instance.media_list_player_new()
        self.actions = {"next": self.media_player.next,
                        "previous": self.media_player.previous,
                        "pause": self.media_player.pause,
                        "play": self.media_player.play,
                        "stop": sys.exit}

    def set_media_list(self, playlist):
        self.playlist = self.vlc_instance.media_list_new(playlist)
        self.media_player.set_media_list(self.playlist)

    def play(self):
        self.media_player.play()

    def playback_control(self):
        key_pressed = raw_input("enter next, previous, pause or stop: ")
        if key_pressed in self.actions:
            self.actions[key_pressed]()
        else:
            print "Unrecognized command"


class MediaLibrary(list):

    def __init__(self, file_name, path, excluded=set(), extensions={".mp3"}):
        self.file_name = file_name
        self.path = path
        self.excluded = excluded
        self.extensions = extensions
        list.__init__(self, [])
        try:
            self.read_db()
        except IOError:
            self.scan()

    def scan(self):
        self.scan_folder()
        self.save_db()

    def scan_folder(self):
        self[:] = []
        for path, dirs, files in os.walk(self.path):
            dirs[:] = [d for d in dirs if d not in self.excluded]
            for f in files:
                for extension in self.extensions:
                    if f.endswith(extension):
                        track = os.path.join(self.path, f)
                        self.append(track)

    def save_db(self):
        """Write self to `file_name`"""
        with open(self.file_name, "w") as db_file:
            db_file.write("\n".join(self))

    def read_db(self):
        self[:] = []
        with open(self.file_name) as db_file:
            for line in db_file:
                self.append(line.strip())

    def get_existing_file(self):
        first_song = random.choice(self)
        while not os.path.isfile(first_song):
            first_song = random.choice(self)
        return first_song

    def get_songs(self, n):
        if n > len(self):
            print "Warning: less files in library than requested"
            n = len(self)
        else:
            n -= 1
        return random.sample(self, n)

if __name__ == "__main__":
    # input variables
    vlcpath = r'/usr/bin/vlc'
    musicpath = r'/home/andreas/Musik'
    excluded = {'_baby music', 'Buble, Michael', 'ballroom dancing'}
    db_file = os.path.join(os.getenv('HOME'), ".music.db")
    filetype = r'.mp3'
    len_playlist = 50
    print 'party on... filling glasses!'

    # get music list
    li

Context

StackExchange Code Review Q#146188, answer score: 3

Revisions (0)

No revisions yet.