Source code for pims.pyav_reader

import numpy as np

from pims.base_frames import FramesSequence
from pims.frame import Frame


try:
    import av
except ImportError:
    av = None


def available():
    return av is not None


def _next_video_packet(container_iter):
    for packet in container_iter:
        if packet.stream.type == 'video':
            decoded = packet.decode()
            if len(decoded) > 0:
                return decoded

    raise ValueError("Could not find any video packets.")


class WrapPyAvFrame(object):
    def __init__(self, frame, frame_no, metadata=None):
        self.frame_no = frame_no
        self.arr = None
        self.metadata = metadata

        # makes a copy of the frame so that ffmpeg does not reuse the buffer
        # by converting already to rgb24. rgb24 movies actually are converted
        # twice. don't know how to just copy! But the operations are fast.
        if frame.format.name == 'rgb24':
            frame = frame.reformat(format="bgr24")
        self.frame = frame.reformat(format="rgb24")

    def to_frame(self):
        if self.arr is None:
            self.arr = Frame(self.frame.to_ndarray(format='rgb24'),
                             frame_no=self.frame_no, metadata=self.metadata)
        return self.arr


def _gen_frames(demuxer, time_base, frame_rate=1., first_pts=0):
    for packet in demuxer:
        for frame in packet.decode():
            # learn timestamp
            for timestamp in (frame.pts, packet.pts, frame.dts, packet.dts):
                if timestamp is not None:
                    break
            else:
                raise IOError(
                    "Unable to read video: frames contain no timestamps. "
                    "Please use PyAVReaderIndexed.")
            t = (timestamp - first_pts) * time_base
            i = int(round(t * frame_rate))
            yield WrapPyAvFrame(frame, frame_no=i,
                                metadata=dict(timestamp=timestamp, t=float(t)))


[docs]class PyAVReaderTimed(FramesSequence):
    """Read images from a video file via a direct FFmpeg/AVbin interface.

    The frames are indexed according to their 'timestamp', starting at 0 at the
    timestamp of the first non-empty frame. Missing frames are filled in with
    empty frames. The number of frames in the video is estimated from the
    movie duration and the average frame rate.

    Parameters
    ----------
    filename : string
    cache_size : integer, optional
        the number of frames that are kept in memory. Default 16.
    fast_forward_thresh : integer, optional
        the reader will proceed through the frames if forwarding below this
        number. If forwarding above this number, it will use seek(). Default 32.
    stream_index : integer, optional
        the index of the video stream inside the file. rarely other than 0.

    Examples
    --------
    >>> video = PyAVVideoReader('video.avi')  # or .mov, etc.
    >>> video[0] # Show the first frame.
    >>> video[-1] # Show the last frame.
    >>> video[1][0:10, 0:10] # Show one corner of the second frame.

    >>> for frame in video[:]:
    ...    # Do something with every frame.

    >>> for frame in video[10:20]:
    ...    # Do something with frames 10-20.

    >>> for frame in video[[5, 7, 13]]:
    ...    # Do something with frames 5, 7, and 13.

    >>> frame_count = len(video) # Number of frames in video
    >>> frame_shape = video.frame_shape # Pixel dimensions of video
    """
    class_priority = 9
[docs]    @classmethod
    def class_exts(cls):
        return {'mov', 'avi', 'mp4'} | super(PyAVReaderTimed, cls).class_exts()

    def __init__(self, file, cache_size=16, fast_forward_thresh=32,
                 stream_index=0, format=None):
        if not hasattr(file, 'read'):
            file = str(file)
        self.file = file
        self.format = format
        self._container = av.open(self.file, format=self.format)

        if len(self._container.streams.video) == 0:
            raise IOError("No valid video stream found in {}".format(file))

        self._stream = self._container.streams.video[stream_index]

        try:
            self._duration = self._stream.duration * self._stream.time_base
        except TypeError:
            self._duration = self._container.duration / av.time_base

        self._frame_rate = self._stream.average_rate
        if self.duration <= 0 or len(self) <= 0:
            raise IOError("Video stream {} in {} has zero length.".format(stream_index, file))

        self._cache = [None] * cache_size
        self._fast_forward_thresh = fast_forward_thresh

        demuxer = self._container.demux(self._stream)

        # obtain first frame to get first time point
        # also tests for the presence of timestamps
        frame = next(_gen_frames(demuxer, self._stream.time_base))
        self._first_pts = frame.metadata['timestamp']

        frame = WrapPyAvFrame(frame.frame, 0, frame.metadata)
        self._cache[0] = frame
        self._frame_shape = (self._stream.height, self._stream.width, 3)
        self._last_frame = 0

        self._reset_demuxer()

    def __len__(self):
        return int(self._duration * self._frame_rate)

    def _reset_demuxer(self):
        demuxer = self._container.demux(self._stream)
        self._frame_generator = _gen_frames(demuxer, self._stream.time_base,
                                            self._frame_rate, self._first_pts)

    @property
    def duration(self):
        """The video duration in seconds."""
        return float(self._duration)

    @property
    def frame_shape(self):
        return self._frame_shape

    @property
    def frame_rate(self):
        return float(self._frame_rate)

[docs]    def get_frame(self, i):
        cached_frame = self._cache[i % len(self._cache)]
        if cached_frame is None:
            cached_i = -1
        else:
            cached_i = cached_frame.frame_no

        # return directly if the frame is in cache
        if cached_i == i:
            return cached_frame.to_frame()

        # check if we will have to seek to the frame
        if self._last_frame >= i or \
            self._last_frame < i - self._fast_forward_thresh:
            frame = self.seek(i)

            # return directly if the seek was perfect (happens rarely)
            if frame is not None:
                if frame.frame_no == i:
                    return frame.to_frame()

        # proceed through the frames
        result = None
        for frame in self._frame_generator:
            # first cache the frame
            self._cache[frame.frame_no % len(self._cache)] = frame
            self._last_frame = frame.frame_no

            if frame.frame_no < i:
                continue  # go on towards the frame
            elif frame.frame_no == i:
                result = frame
                break
            else:  # the frame was not inside the reader
                break
        else:
            # always restart the frame generator when it ends
            self._reset_demuxer()

        if result is None:
            # the requested frame actually does not exist. Can occur due to
            # a bad file, or due to inaccuracy of reader length __len__.
            # find it in the cache
            for other_i in range(i - 1, i - len(self._cache), -1):
                result = self._cache[other_i % len(self._cache)]
                if result is None:
                    continue
                if result.frame_no < i:
                    break
            else:  # cache is empty: return an empty frame
                return Frame(np.zeros(self.frame_shape, dtype=self.pixel_type),
                             frame_no=i)

        return result.to_frame()

[docs]    def seek(self, i):
        """Seek to a frame before i and return the first frame."""
        # flush the cache
        self._cache = [None] * len(self._cache)
        # the ffmpeg decode cache is flushed automatically

        timestamp = int(i / (self._frame_rate * self._stream.time_base))
        self._stream.container.seek(timestamp + self._first_pts)

        # check the first frame
        try:
            frame = next(self._frame_generator)
        except StopIteration:
            self._reset_demuxer()
            try:
                frame = next(self._frame_generator)
            except StopIteration:
                return None

        if i == 0:  # security measure to avoid infinite recursion
            return frame

        if frame.frame_no > i:
            # recurse with an additional offset of 16 frames
            return self.seek(i - 16)

        # add the frame to the cache if succesful
        self._cache[frame.frame_no % len(self._cache)] = frame
        self._last_frame = frame.frame_no
        return frame

    @property
    def pixel_type(self):
        return np.uint8

    def __repr__(self):
        # May be overwritten by subclasses
        return """<Frames>
Format: {format}
Source: {filename}
Duration: {duration:.3f} seconds
Frame rate: {frame_rate:.3f} fps
Length: {count} frames
Frame Shape: {frame_shape!r}
""".format(frame_shape=self.frame_shape,
           format=self._stream.long_name,
           duration=self.duration,
           frame_rate=self.frame_rate,
           count=len(self),
           filename=self.file)


[docs]class PyAVReaderIndexed(FramesSequence):
    """Read images from the frames of a standard video file into an
    iterable object that returns images as numpy arrays.

    Parameters
    ----------
    filename : string

    Examples
    --------
    >>> video = Video('video.avi')  # or .mov, etc.
    >>> imshow(video[0]) # Show the first frame.
    >>> imshow(video[-1]) # Show the last frame.
    >>> imshow(video[1][0:10, 0:10]) # Show one corner of the second frame.

    >>> for frame in video[:]:
    ...    # Do something with every frame.

    >>> for frame in video[10:20]:
    ...    # Do something with frames 10-20.

    >>> for frame in video[[5, 7, 13]]:
    ...    # Do something with frames 5, 7, and 13.

    >>> frame_count = len(video) # Number of frames in video
    >>> frame_shape = video.frame_shape # Pixel dimensions of video
    """
    class_priority = 8

[docs]    @classmethod
    def class_exts(cls):
        return {'mov', 'avi',
                'mp4'} | super(PyAVReaderIndexed, cls).class_exts()

    def __init__(self, file, toc=None, format=None):
        if not hasattr(file, 'read'):
            file = str(file)
        self.file = file
        self.format = format
        self._container = None

        with av.open(self.file, format=self.format) as container:
            stream = [s for s in container.streams if s.type == 'video'][0]

            # Build a toc
            if toc is None:
                packet_lengths = []
                packet_ts = []
                for packet in container.demux(stream):
                    if packet.stream.type == 'video':
                        decoded = packet.decode()
                        if len(decoded) > 0:
                            packet_lengths.append(len(decoded))
                            packet_ts.append(decoded[0].pts)
                self._toc = {
                    'lengths': packet_lengths,
                    'ts': packet_ts,
                }
            else:
                self._toc = toc

            self._toc_cumsum = np.cumsum(self.toc['lengths'])
            self._len = self._toc_cumsum[-1]

            # PyAV always returns frames in color, and we make that
            # assumption in get_frame() later below, so 3 is hardcoded here:
            self._im_sz = stream.height, stream.width, 3
            self._time_base = stream.time_base

        self._load_fresh_file()

    def _load_fresh_file(self):
        if self._container is not None:
            self._container.close()

        if hasattr(self.file, 'seek'):
            self.file.seek(0)

        self._container = av.open(self.file, format=self.format)
        demux = self._container.demux(self._video_stream)
        self._current_packet = _next_video_packet(demux)
        self._current_packet_no = 0

    @property
    def _video_stream(self):
        return [s for s in self._container.streams if s.type == 'video'][0]

    def __len__(self):
        return self._len

    def __del__(self):
        self._container.close()

    @property
    def frame_shape(self):
        return self._im_sz

    @property
    def toc(self):
        return self._toc

[docs]    def get_frame(self, j):
        # Find the packet this frame is in.
        packet_no = self._toc_cumsum.searchsorted(j, side='right')
        self._seek_packet(packet_no)
        # Find the location of the frame within the packet.
        if packet_no == 0:
            loc = j
        else:
            loc = j - self._toc_cumsum[packet_no - 1]
        frame = self._current_packet[loc]  # av.VideoFrame

        return Frame(frame.to_ndarray(format='rgb24'), frame_no=j)

    def _seek_packet(self, packet_no):
        """Advance through the container generator until we get the packet
        we want. Store that packet in selfpp._current_packet."""
        packet_ts = self.toc['ts'][packet_no]
        # Only seek when needed.
        if packet_no == self._current_packet_no:
            return
        elif (packet_no < self._current_packet_no
                or packet_no > self._current_packet_no + 1):
            self._container.seek(packet_ts, stream=self._video_stream)

        demux = self._container.demux(self._video_stream)
        self._current_packet = _next_video_packet(demux)
        while self._current_packet[0].pts < packet_ts:
            self._current_packet = _next_video_packet(demux)

        self._current_packet_no = packet_no

    @property
    def pixel_type(self):
        # No need to detect dtype: PyAV always returns uint8.
        return np.uint8

    def __repr__(self):
        # May be overwritten by subclasses
        return """<Frames>
Source: {filename}
Length: {count} frames
Frame Shape: {frame_shape!r}
""".format(frame_shape=self.frame_shape,
           count=len(self),
           filename=self.file)