libretime/python_apps/airtime_analyzer/airtime_analyzer/metadata_analyzer.py

import time
import datetime
import mutagen
import magic
import wave
import logging
import os
import hashlib
from analyzer import Analyzer

class MetadataAnalyzer(Analyzer):

    @staticmethod
    def analyze(filename, metadata):
        ''' Extract audio metadata from tags embedded in the file (eg. ID3 tags)

            Keyword arguments:
                filename: The path to the audio file to extract metadata from.
                metadata: A dictionary that the extracted metadata will be added to.
        '''
        if not isinstance(filename, unicode):
            raise TypeError("filename must be unicode. Was of type " + type(filename).__name__)
        if not isinstance(metadata, dict):
            raise TypeError("metadata must be a dict. Was of type " + type(metadata).__name__)

        #Airtime <= 2.5.x nonsense:
        metadata["ftype"] = "audioclip"
        #Other fields we'll want to set for Airtime:
        metadata["hidden"] = False

        # Get file size and md5 hash of the file
        metadata["filesize"] = os.path.getsize(filename)

        with open(filename, 'rb') as fh:
            m = hashlib.md5()
            while True:
                data = fh.read(8192)
                if not data:
                    break
                m.update(data)
            metadata["md5"] = m.hexdigest()

        # Mutagen doesn't handle WAVE files so we use a different package
        ms = magic.open(magic.MIME_TYPE)
        ms.load()
        with open(filename, 'rb') as fh:
            mime_check = ms.buffer(fh.read(2014))
        metadata["mime"] = mime_check
        if mime_check == 'audio/x-wav':
            return MetadataAnalyzer._analyze_wave(filename, metadata)

        #Extract metadata from an audio file using mutagen
        audio_file = mutagen.File(filename, easy=True)

        #Bail if the file couldn't be parsed. The title should stay as the filename
        #inside Airtime.
        if audio_file == None:  # Don't use "if not" here. It is wrong due to mutagen's design.
            return metadata
        # Note that audio_file can equal {} if the file is valid but there's no metadata tags.
        # We can still try to grab the info variables below.

        #Grab other file information that isn't encoded in a tag, but instead usually
        #in the file header. Mutagen breaks that out into a separate "info" object:
        info = audio_file.info
        if hasattr(info, "sample_rate"): # Mutagen is annoying and inconsistent
            metadata["sample_rate"] = info.sample_rate
        if hasattr(info, "length"):
            metadata["length_seconds"] = info.length
            #Converting the length in seconds (float) to a formatted time string
            track_length = datetime.timedelta(seconds=info.length)
            metadata["length"] = str(track_length) #time.strftime("%H:%M:%S.%f", track_length)
            # Other fields for Airtime
            metadata["cueout"] = metadata["length"]

        # Set a default cue in time in seconds
        metadata["cuein"] = 0.0;

        if hasattr(info, "bitrate"):
            metadata["bit_rate"] = info.bitrate

        # Use the mutagen to get the MIME type, if it has one. This is more reliable and
        # consistent for certain types of MP3s or MPEG files than the MIMEs returned by magic.
        if audio_file.mime:
            metadata["mime"] = audio_file.mime[0]

        #Try to get the number of channels if mutagen can...
        try:
            #Special handling for getting the # of channels from MP3s. It's in the "mode" field
            #which is 0=Stereo, 1=Joint Stereo, 2=Dual Channel, 3=Mono. Part of the ID3 spec...
            if metadata["mime"] in ["audio/mpeg", 'audio/mp3']:
                if info.mode == 3:
                    metadata["channels"] = 1
                else:
                    metadata["channels"] = 2
            else:
                metadata["channels"] = info.channels
        except (AttributeError, KeyError):
            #If mutagen can't figure out the number of channels, we'll just leave it out...
            pass

        #Try to extract the number of tracks on the album if we can (the "track total")
        try:
            track_number = audio_file["tracknumber"]
            if isinstance(track_number, list): # Sometimes tracknumber is a list, ugh
                track_number = track_number[0]
            track_number_tokens = track_number
            if u'/' in track_number:
                track_number_tokens = track_number.split(u'/')
                track_number = track_number_tokens[0]
            elif u'-' in track_number:
                track_number_tokens = track_number.split(u'-')
                track_number = track_number_tokens[0]
            metadata["track_number"] = track_number
            track_total = track_number_tokens[1]
            metadata["track_total"] = track_total
        except (AttributeError, KeyError, IndexError):
            #If we couldn't figure out the track_number or track_total, just ignore it...
            pass

        #We normalize the mutagen tags slightly here, so in case mutagen changes,
        #we find the
        mutagen_to_airtime_mapping = {
            'title':        'track_title',
            'artist':       'artist_name',
            'album':        'album_title',
            'bpm':          'bpm',
            'composer':     'composer',
            'conductor':    'conductor',
            'copyright':    'copyright',
            'comment':      'comment',
            'encoded_by':   'encoder',
            'genre':        'genre',
            'isrc':         'isrc',
            'label':        'label',
            'organization': 'label',
            #'length':       'length',
            'language':     'language',
            'last_modified':'last_modified',
            'mood':         'mood',
            'bit_rate':     'bit_rate',
            'replay_gain':  'replaygain',
            #'tracknumber':  'track_number',
            #'track_total':  'track_total',
            'website':      'website',
            'date':         'year',
            #'mime_type':    'mime',
        }

        for mutagen_tag, airtime_tag in mutagen_to_airtime_mapping.iteritems():
            try:
                metadata[airtime_tag] = audio_file[mutagen_tag]

                # Some tags are returned as lists because there could be multiple values.
                # This is unusual so we're going to always just take the first item in the list.
                if isinstance(metadata[airtime_tag], list):
                    if metadata[airtime_tag]:
                        metadata[airtime_tag] = metadata[airtime_tag][0]
                    else: # Handle empty lists
                        metadata[airtime_tag] = ""

            except KeyError:
                continue

        return metadata

    @staticmethod
    def _analyze_wave(filename, metadata):
        try:
            reader = wave.open(filename, 'rb')
            metadata["channels"] = reader.getnchannels()
            metadata["sample_rate"] = reader.getframerate()
            length_seconds = float(reader.getnframes()) / float(metadata["sample_rate"])
            #Converting the length in seconds (float) to a formatted time string
            track_length = datetime.timedelta(seconds=length_seconds)
            metadata["length"] = str(track_length) #time.strftime("%H:%M:%S.%f", track_length)
            metadata["length_seconds"] = length_seconds
            metadata["cueout"] = metadata["length"]
        except wave.Error:
            logging.error("Invalid WAVE file.")
            raise
        return metadata