# FLAC comment support for Mutagen
# Copyright 2005 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.

"""Read and write FLAC Vorbis comments and stream information.

Read more about FLAC at http://flac.sourceforge.net.

FLAC supports arbitrary metadata blocks. The two most interesting ones
are the FLAC stream information block, and the Vorbis comment block;
these are also the only ones Mutagen can currently read.

This module does not handle Ogg FLAC files.

Based off documentation available at
http://flac.sourceforge.net/format.html
"""

__all__ = ["FLAC", "Open", "delete"]

import struct
from cStringIO import StringIO
from _vorbis import VCommentDict
from mutagen import FileType
from mutagen._util import insert_bytes
from mutagen.id3 import BitPaddedInt

class error(IOError): pass
class FLACNoHeaderError(error): pass
class FLACVorbisError(ValueError, error): pass

def to_int_be(string):
    """Convert an arbitrarily-long string to a long using big-endian
    byte order."""
    return reduce(lambda a, b: (a << 8) + ord(b), string, 0L)

class MetadataBlock(object):
    """A generic block of FLAC metadata.

    This class is extended by specific used as an ancestor for more specific
    blocks, and also as a container for data blobs of unknown blocks.

    Attributes:
    data -- raw binary data for this block
    """

    def __init__(self, data):
        """Parse the given data string or file-like as a metadata block.
        The metadata header should not be included."""
        if data is not None:
            if isinstance(data, str): data = StringIO(data)
            elif not hasattr(data, 'read'):
                raise TypeError(
                    "StreamInfo requires string data or a file-like")
            self.load(data)

    def load(self, data): self.data = data.read()
    def write(self): return self.data

    def writeblocks(blocks):
        """Render metadata block as a byte string."""
        data = []
        codes = [[block.code, block.write()] for block in blocks]
        codes[-1][0] |= 128
        for code, datum in codes:
            byte = chr(code)
            if len(datum) > 2**24:
                raise error("block is too long to write")
            length = struct.pack(">I", len(datum))[-3:]
            data.append(byte + length + datum)
        return "".join(data)
    writeblocks = staticmethod(writeblocks)

    def group_padding(blocks):
        """Consolidate FLAC padding metadata blocks.

        The overall size of the rendered blocks does not change, so
        this adds several bytes of padding for each merged block."""
        paddings = filter(lambda x: isinstance(x, Padding), blocks)
        map(blocks.remove, paddings)
        padding = Padding()
        # total padding size is the sum of padding sizes plus 4 bytes
        # per removed header.
        size = sum([padding.length for padding in paddings])
        padding.length = size + 4 * (len(paddings) - 1)
        blocks.append(padding)
    group_padding = staticmethod(group_padding)

class StreamInfo(MetadataBlock):
    """FLAC stream information.

    This contains information about the audio data in the FLAC file.
    Unlike most stream information objects in Mutagen, changes to this
    one will rewritten to the file when it is saved. Unless you are
    actually changing the audio stream itself, don't change any
    attributes of this block.

    Attributes:
    min_blocksize -- minimum audio block size
    max_blocksize -- maximum audio block size
    sample_rate -- audio sample rate in Hz
    channels -- audio channels (1 for mono, 2 for stereo)
    bits_per_sample -- bits per sample
    total_samples -- total samples in file
    length -- audio length in seconds
    """

    code = 0

    def __eq__(self, other):
        try: return (self.min_blocksize == other.min_blocksize and
                     self.max_blocksize == other.max_blocksize and
                     self.sample_rate == other.sample_rate and
                     self.channels == other.channels and
                     self.bits_per_sample == other.bits_per_sample and
                     self.total_samples == other.total_samples)
        except: return False

    def load(self, data):
        self.min_blocksize = int(to_int_be(data.read(2)))
        self.max_blocksize = int(to_int_be(data.read(2)))
        self.min_framesize = int(to_int_be(data.read(3)))
        self.max_framesize = int(to_int_be(data.read(3)))
        # first 16 bits of sample rate
        sample_first = to_int_be(data.read(2))
        # last 4 bits of sample rate, 3 of channels, first 1 of bits/sample
        sample_channels_bps = to_int_be(data.read(1))
        # last 4 of bits/sample, 36 of total samples
        bps_total = to_int_be(data.read(5))

        sample_tail = sample_channels_bps >> 4
        self.sample_rate = int((sample_first << 4) + sample_tail)
        self.channels = int(((sample_channels_bps >> 1) & 7) + 1)
        bps_tail = bps_total >> 36
        bps_head = (sample_channels_bps & 1) << 4
        self.bits_per_sample = int(bps_head + bps_tail + 1)
        self.total_samples = bps_total & 0xFFFFFFFFFL
        self.length = self.total_samples / float(self.sample_rate)

        self.md5_signature = to_int_be(data.read(16))

    def write(self):
        f = StringIO()
        f.write(struct.pack(">I", self.min_blocksize)[-2:])
        f.write(struct.pack(">I", self.max_blocksize)[-2:])
        f.write(struct.pack(">I", self.min_framesize)[-3:])
        f.write(struct.pack(">I", self.max_framesize)[-3:])

        # first 16 bits of sample rate
        f.write(struct.pack(">I", self.sample_rate >> 4)[-2:])
        # 4 bits sample, 3 channel, 1 bps
        byte = (self.sample_rate & 0xF) << 4
        byte += ((self.channels - 1) & 3) << 1
        byte += ((self.bits_per_sample - 1) >> 4) & 1
        f.write(chr(byte))
        # 4 bits of bps, 4 of sample count
        byte = ((self.bits_per_sample - 1) & 0xF)  << 4
        byte += (self.total_samples >> 32) & 0xF
        f.write(chr(byte))
        # last 32 of sample count
        f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFFL))
        # MD5 signature
        sig = self.md5_signature
        f.write(struct.pack(
            ">4I", (sig >> 96) & 0xFFFFFFFFL, (sig >> 64) & 0xFFFFFFFFL,
            (sig >> 32) & 0xFFFFFFFFL, sig & 0xFFFFFFFFL))
        return f.getvalue()

    def pprint(self):
        return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate)

class SeekPoint(tuple):
    """A single seek point in a FLAC file.

    Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL,
    and byte_offset and num_samples undefined. Seek points must be
    sorted in ascending order by first_sample number. Seek points must
    be unique by first_sample number, except for placeholder
    points. Placeholder points must occur last in the table and there
    may be any number of them.

    Attributes:
    first_sample -- sample number of first sample in the target frame
    byte_offset -- offset from first frame to target frame
    num_samples -- number of samples in target frame
    """

    def __new__(cls, first_sample, byte_offset, num_samples):
        return super(cls, SeekPoint).__new__(cls, (first_sample,
            byte_offset, num_samples))
    first_sample = property(lambda self: self[0])
    byte_offset = property(lambda self: self[1])
    num_samples = property(lambda self: self[2])

class SeekTable(MetadataBlock):
    """Read and write FLAC seek tables.

    Attributes:
    seekpoints -- list of SeekPoint objects
    """

    __SEEKPOINT_FORMAT = '>QQH'
    __SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT)

    code = 3

    def __init__(self, data):
        self.seekpoints = []
        super(SeekTable, self).__init__(data)

    def __eq__(self, other):
        try: return (self.seekpoints == other.seekpoints)
        except (AttributeError, TypeError): return False

    def load(self, data):
        self.seekpoints = []
        sp = data.read(self.__SEEKPOINT_SIZE)
        while len(sp) == self.__SEEKPOINT_SIZE:
            self.seekpoints.append(SeekPoint(
                *struct.unpack(self.__SEEKPOINT_FORMAT, sp)))
            sp = data.read(self.__SEEKPOINT_SIZE)

    def write(self):
        f = StringIO()
        for seekpoint in self.seekpoints:
            packed = struct.pack(self.__SEEKPOINT_FORMAT,
                seekpoint.first_sample, seekpoint.byte_offset,
                seekpoint.num_samples)
            f.write(packed)
        return f.getvalue()

    def __repr__(self):
        return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints)

class VCFLACDict(VCommentDict):
    """Read and write FLAC Vorbis comments.

    FLACs don't use the framing bit at the end of the comment block.
    So this extends VCommentDict to not use the framing bit.
    """

    code = 4

    def load(self, data, errors='replace', framing=False):
        super(VCFLACDict, self).load(data, errors=errors, framing=framing)

    def write(self, framing=False):
        return super(VCFLACDict, self).write(framing=framing)

class CueSheetTrackIndex(tuple):
    """Index for a track in a cuesheet.

    For CD-DA, an index_number of 0 corresponds to the track
    pre-gap. The first index in a track must have a number of 0 or 1,
    and subsequently, index_numbers must increase by 1. Index_numbers
    must be unique within a track. And index_offset must be evenly
    divisible by 588 samples.

    Attributes:
    index_number -- index point number
    index_offset -- offset in samples from track start
    """
    
    def __new__(cls, index_number, index_offset):
        return super(cls, CueSheetTrackIndex).__new__(cls,
            (index_number, index_offset))
    index_number = property(lambda self: self[0])
    index_offset = property(lambda self: self[1])

class CueSheetTrack(object):
    """A track in a cuesheet.

    For CD-DA, track_numbers must be 1-99, or 170 for the
    lead-out. Track_numbers must be unique within a cue sheet. There
    must be atleast one index in every track except the lead-out track
    which must have none.

    Attributes:
    track_number -- track number
    start_offset -- track offset in samples from start of FLAC stream
    isrc -- ISRC code
    type -- 0 for audio, 1 for digital data
    pre_emphasis -- true if the track is recorded with pre-emphasis
    indexes -- list of CueSheetTrackIndex objects
    """

    def __init__(self, track_number, start_offset, isrc='', type_=0,
                 pre_emphasis=False):
        self.track_number = track_number
        self.start_offset = start_offset
        self.isrc = isrc
        self.type = type_
        self.pre_emphasis = pre_emphasis
        self.indexes = []

    def __eq__(self, other):
        try: return (self.track_number == other.track_number and
                     self.start_offset == other.start_offset and
                     self.isrc == other.isrc and
                     self.type == other.type and
                     self.pre_emphasis == other.pre_emphasis and
                     self.indexes == other.indexes)
        except (AttributeError, TypeError): return False

    def __repr__(self):
        return ("<%s number=%r, offset=%d, isrc=%r, type=%r, "
                "pre_emphasis=%r, indexes=%r)>") % (
            type(self).__name__, self.track_number, self.start_offset,
            self.isrc, self.type, self.pre_emphasis, self.indexes)

class CueSheet(MetadataBlock):
    """Read and write FLAC embedded cue sheets.

    Number of tracks should be from 1 to 100. There should always be
    exactly one lead-out track and that track must be the last track
    in the cue sheet.

    Attributes:
    media_catalog_number -- media catalog number in ASCII
    lead_in_samples -- number of lead-in samples
    compact_disc -- true if the cuesheet corresponds to a compact disc
    tracks -- list of CueSheetTrack objects
    lead_out -- lead-out as CueSheetTrack or None if lead-out was not found
    """

    __CUESHEET_FORMAT = '>128sQB258xB'
    __CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT)
    __CUESHEET_TRACK_FORMAT = '>QB12sB13xB'
    __CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT)
    __CUESHEET_TRACKINDEX_FORMAT = '>QB3x'
    __CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT)

    code = 5

    media_catalog_number = ''
    lead_in_samples = 88200
    compact_disc = True

    def __init__(self, data):
        self.tracks = []
        super(CueSheet, self).__init__(data)

    def __eq__(self, other):
        try:
            return (self.media_catalog_number == other.media_catalog_number and
                     self.lead_in_samples == other.lead_in_samples and
                     self.compact_disc == other.compact_disc and
                     self.tracks == other.tracks)
        except (AttributeError, TypeError): return False

    def load(self, data):
        header = data.read(self.__CUESHEET_SIZE)
        media_catalog_number, lead_in_samples, flags, num_tracks = \
            struct.unpack(self.__CUESHEET_FORMAT, header)
        self.media_catalog_number = media_catalog_number.rstrip('\0')
        self.lead_in_samples = lead_in_samples
        self.compact_disc = bool(flags & 0x80)
        self.tracks = []
        for i in range(num_tracks): 
            track = data.read(self.__CUESHEET_TRACK_SIZE)
            start_offset, track_number, isrc_padded, flags, num_indexes = \
                struct.unpack(self.__CUESHEET_TRACK_FORMAT, track)
            isrc = isrc_padded.rstrip('\0')
            type_ = (flags & 0x80) >> 7
            pre_emphasis = bool(flags & 0x40)
            val = CueSheetTrack(
                track_number, start_offset, isrc, type_, pre_emphasis)
            for j in range(num_indexes):
                index = data.read(self.__CUESHEET_TRACKINDEX_SIZE)
                index_offset, index_number = struct.unpack(
                    self.__CUESHEET_TRACKINDEX_FORMAT, index)
                val.indexes.append(
                    CueSheetTrackIndex(index_number, index_offset))
            self.tracks.append(val)
            
    def write(self):
        f = StringIO()
        flags = 0
        if self.compact_disc: flags |= 0x80
        packed = struct.pack(
            self.__CUESHEET_FORMAT, self.media_catalog_number,
            self.lead_in_samples, flags, len(self.tracks))
        f.write(packed)
        for track in self.tracks:
            track_flags = 0
            track_flags |= (track.type & 1) << 7
            if track.pre_emphasis: track_flags |= 0x40
            track_packed = struct.pack(
                self.__CUESHEET_TRACK_FORMAT, track.start_offset,
                track.track_number, track.isrc, track_flags,
                len(track.indexes))
            f.write(track_packed)
            for index in track.indexes:
                index_packed = struct.pack(
                    self.__CUESHEET_TRACKINDEX_FORMAT,
                    index.index_offset, index.index_number)
                f.write(index_packed)
        return f.getvalue()

    def __repr__(self):
        return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, "
                "tracks=%r>") % (
            type(self).__name__, self.media_catalog_number,
            self.lead_in_samples, self.compact_disc, self.tracks)

class Picture(MetadataBlock):
    """Read and write FLAC embed pictures.

    Attributes:
    type -- picture type (same as types for ID3 APIC frames)
    mime -- MIME type of the picture
    desc -- picture's description
    width -- width in pixels
    height -- height in pixels
    depth -- color depth in bits-per-pixel
    colors -- number of colors for indexed palettes (like GIF),
              0 for non-indexed
    data -- picture data
    """

    code = 6

    def __init__(self, data=None):
        self.type = 0
        self.mime = u''
        self.desc = u''
        self.width = 0
        self.height = 0
        self.depth = 0
        self.colors = 0
        self.data = ''
        super(Picture, self).__init__(data)

    def __eq__(self, other):
        try: return (self.type == other.type and
                     self.mime == other.mime and
                     self.desc == other.desc and
                     self.width == other.width and
                     self.height == other.height and
                     self.depth == other.depth and
                     self.colors == other.colors and
                     self.data == other.data)
        except (AttributeError, TypeError): return False

    def load(self, data):
        self.type, length = struct.unpack('>2I', data.read(8))
        self.mime = data.read(length).decode('UTF-8', 'replace')
        length, = struct.unpack('>I', data.read(4))
        self.desc = data.read(length).decode('UTF-8', 'replace')
        (self.width, self.height, self.depth,
         self.colors, length) = struct.unpack('>5I', data.read(20))
        self.data = data.read(length)

    def write(self):
        f = StringIO()
        mime = self.mime.encode('UTF-8')
        f.write(struct.pack('>2I', self.type, len(mime)))
        f.write(mime)
        desc = self.desc.encode('UTF-8')
        f.write(struct.pack('>I', len(desc)))
        f.write(desc)
        f.write(struct.pack('>5I', self.width, self.height, self.depth,
                            self.colors, len(self.data)))
        f.write(self.data)
        return f.getvalue()

    def __repr__(self):
        return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime,
                                         len(self.data))

class Padding(MetadataBlock):
    """Empty padding space for metadata blocks.

    To avoid rewriting the entire FLAC file when editing comments,
    metadata is often padded. Padding should occur at the end, and no
    more than one padding block should be in any FLAC file. Mutagen
    handles this with MetadataBlock.group_padding.
    """

    code = 1

    def __init__(self, data=""): super(Padding, self).__init__(data)
    def load(self, data): self.length = len(data.read())
    def write(self):
        try: return "\x00" * self.length
        # On some 64 bit platforms this won't generate a MemoryError
        # or OverflowError since you might have enough RAM, but it
        # still generates a ValueError. On other 64 bit platforms,
        # this will still succeed for extremely large values.
        # Those should never happen in the real world, and if they
        # do, writeblocks will catch it.
        except (OverflowError, ValueError, MemoryError):
            raise error("cannot write %d bytes" % self.length)
    def __eq__(self, other):
        return isinstance(other, Padding) and self.length == other.length
    def __repr__(self):
        return "<%s (%d bytes)>" % (type(self).__name__, self.length)

class FLAC(FileType):
    """A FLAC audio file.
    
    Attributes:
    info -- stream information (length, bitrate, sample rate)
    tags -- metadata tags, if any
    cuesheet -- CueSheet object, if any
    seektable -- SeekTable object, if any
    pictures -- list of embedded pictures
    """

    _mimes = ["audio/x-flac", "application/x-flac"]

    METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict,
        CueSheet, Picture]
    """Known metadata block types, indexed by ID."""

    def score(filename, fileobj, header):
        return header.startswith("fLaC")
    score = staticmethod(score)

    def __read_metadata_block(self, file):
        byte = ord(file.read(1))
        size = to_int_be(file.read(3))
        try:
            data = file.read(size)
            if len(data) != size:
                raise error(
                    "file said %d bytes, read %d bytes" % (size, len(data)))
            block = self.METADATA_BLOCKS[byte & 0x7F](data)
        except (IndexError, TypeError):
            block = MetadataBlock(data)
            block.code = byte & 0x7F
            self.metadata_blocks.append(block)
        else:
            self.metadata_blocks.append(block)
            if block.code == VCFLACDict.code:
                if self.tags is None: self.tags = block
                else: raise FLACVorbisError("> 1 Vorbis comment block found")
            elif block.code == CueSheet.code:
                if self.cuesheet is None: self.cuesheet = block
                else: raise error("> 1 CueSheet block found")
            elif block.code == SeekTable.code:
                if self.seektable is None: self.seektable = block
                else: raise error("> 1 SeekTable block found")
        return (byte >> 7) ^ 1

    def add_tags(self):
        """Add a Vorbis comment block to the file."""
        if self.tags is None:
            self.tags = VCFLACDict()
            self.metadata_blocks.append(self.tags)
        else: raise FLACVorbisError("a Vorbis comment already exists")
    add_vorbiscomment = add_tags

    def delete(self, filename=None):
        """Remove Vorbis comments from a file.

        If no filename is given, the one most recently loaded is used.
        """
        if filename is None: filename = self.filename
        for s in list(self.metadata_blocks):
            if isinstance(s, VCFLACDict):
                self.metadata_blocks.remove(s)
                self.tags = None
                self.save()
                break

    vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.")

    def load(self, filename):
        """Load file information from a filename."""

        self.metadata_blocks = []
        self.tags = None
        self.cuesheet = None
        self.seektable = None
        self.filename = filename
        fileobj = file(filename, "rb")
        try:
            self.__check_header(fileobj)
            while self.__read_metadata_block(fileobj): pass
        finally:
            fileobj.close()

        try: self.metadata_blocks[0].length
        except (AttributeError, IndexError):
            raise FLACNoHeaderError("Stream info block not found")

    info = property(lambda s: s.metadata_blocks[0])

    def add_picture(self, picture):
        """Add a new picture to the file."""
        self.metadata_blocks.append(picture)

    def clear_pictures(self):
        """Delete all pictures from the file."""
        self.metadata_blocks = filter(lambda b: b.code != Picture.code,
                                      self.metadata_blocks)

    def __get_pictures(self):
        return filter(lambda b: b.code == Picture.code, self.metadata_blocks)
    pictures = property(__get_pictures, doc="List of embedded pictures")

    def save(self, filename=None, deleteid3=False):
        """Save metadata blocks to a file.

        If no filename is given, the one most recently loaded is used.
        """

        if filename is None: filename = self.filename
        f = open(filename, 'rb+')

        # Ensure we've got padding at the end, and only at the end.
        # If adding makes it too large, we'll scale it down later.
        self.metadata_blocks.append(Padding('\x00' * 1020))
        MetadataBlock.group_padding(self.metadata_blocks)

        header = self.__check_header(f)
        available = self.__find_audio_offset(f) - header # "fLaC" and maybe ID3
        data = MetadataBlock.writeblocks(self.metadata_blocks)

        # Delete ID3v2
        if deleteid3 and header > 4:
            available += header - 4
            header = 4

        if len(data) > available:
            # If we have too much data, see if we can reduce padding.
            padding = self.metadata_blocks[-1]
            newlength = padding.length - (len(data) - available)
            if newlength > 0:
                padding.length = newlength
                data = MetadataBlock.writeblocks(self.metadata_blocks)
                assert len(data) == available

        elif len(data) < available:
            # If we have too little data, increase padding.
            self.metadata_blocks[-1].length += (available - len(data))
            data = MetadataBlock.writeblocks(self.metadata_blocks)
            assert len(data) == available

        if len(data) != available:
            # We couldn't reduce the padding enough.
            diff = (len(data) - available)
            insert_bytes(f, diff, header)

        f.seek(header - 4)
        f.write("fLaC" + data)

        # Delete ID3v1
        if deleteid3:
            try: f.seek(-128, 2)
            except IOError: pass
            else:
                if f.read(3) == "TAG":
                    f.seek(-128, 2)
                    f.truncate()

    def __find_audio_offset(self, fileobj):
        byte = 0x00
        while not (byte >> 7) & 1:
            byte = ord(fileobj.read(1))
            size = to_int_be(fileobj.read(3))
            fileobj.read(size)
        return fileobj.tell()

    def __check_header(self, fileobj):
        size = 4
        header = fileobj.read(4)
        if header != "fLaC":
            size = None
            if header[:3] == "ID3":
                size = 14 + BitPaddedInt(fileobj.read(6)[2:])
                fileobj.seek(size - 4)
                if fileobj.read(4) != "fLaC": size = None
        if size is None:
            raise FLACNoHeaderError(
                "%r is not a valid FLAC file" % fileobj.name)
        return size

Open = FLAC

def delete(filename):
    """Remove tags from a file."""
    FLAC(filename).delete()