diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4a30385f9..e65107428 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,7 +17,16 @@ We are operating with `semantic versioning `_. are merged into the "default" branch. +v14.1.0 (Unreleased) +-------------------- + +Features + +- Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`). + + v14.0.1 +------- Fixes: diff --git a/av/__init__.py b/av/__init__.py index cbc3c8a2f..e2f9e5a6d 100644 --- a/av/__init__.py +++ b/av/__init__.py @@ -17,6 +17,7 @@ from av.bitstream import BitStreamFilterContext, bitstream_filters_available from av.codec.codec import Codec, codecs_available from av.codec.context import CodecContext +from av.codec.hwaccel import HWConfig from av.container import open from av.format import ContainerFormat, formats_available from av.packet import Packet diff --git a/av/__main__.py b/av/__main__.py index bc353d147..9e2b9d0ac 100644 --- a/av/__main__.py +++ b/av/__main__.py @@ -6,6 +6,8 @@ def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--codecs", action="store_true") + parser.add_argument("--hwdevices", action="store_true") + parser.add_argument("--hwconfigs", action="store_true") parser.add_argument("--version", action="store_true") args = parser.parse_args() @@ -30,6 +32,18 @@ def main() -> None: version = config["version"] print(f"{libname:<13} {version[0]:3d}.{version[1]:3d}.{version[2]:3d}") + if args.hwdevices: + from av.codec.hwaccel import hwdevices_available + + print("Hardware device types:") + for x in hwdevices_available(): + print(" ", x) + + if args.hwconfigs: + from av.codec.codec import dump_hwconfigs + + dump_hwconfigs() + if args.codecs: from av.codec.codec import dump_codecs diff --git a/av/about.py b/av/about.py index 4fcf9b8bb..217fb25cb 100644 --- a/av/about.py +++ b/av/about.py @@ -1 +1 @@ -__version__ = "14.0.1" +__version__ = "14.1.0" diff --git a/av/audio/codeccontext.pyx b/av/audio/codeccontext.pyx index 54319ddaf..856af555c 100644 --- a/av/audio/codeccontext.pyx +++ b/av/audio/codeccontext.pyx @@ -3,13 +3,14 @@ cimport libav as lib from av.audio.format cimport AudioFormat, get_audio_format from av.audio.frame cimport AudioFrame, alloc_audio_frame from av.audio.layout cimport AudioLayout, get_audio_layout +from av.codec.hwaccel cimport HWAccel from av.frame cimport Frame from av.packet cimport Packet cdef class AudioCodecContext(CodecContext): - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec): - CodecContext._init(self, ptr, codec) + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel): + CodecContext._init(self, ptr, codec, hwaccel) cdef _prepare_frames_for_encode(self, Frame input_frame): diff --git a/av/codec/codec.pxd b/av/codec/codec.pxd index b9925df13..576c659b4 100644 --- a/av/codec/codec.pxd +++ b/av/codec/codec.pxd @@ -7,6 +7,8 @@ cdef class Codec: cdef const lib.AVCodecDescriptor *desc cdef readonly bint is_encoder + cdef tuple _hardware_configs + cdef _init(self, name=?) diff --git a/av/codec/codec.pyi b/av/codec/codec.pyi index 32736c080..73c1f1edb 100644 --- a/av/codec/codec.pyi +++ b/av/codec/codec.pyi @@ -108,3 +108,4 @@ class codec_descriptor: codecs_available: set[str] def dump_codecs() -> None: ... +def dump_hwconfigs() -> None: ... diff --git a/av/codec/codec.pyx b/av/codec/codec.pyx index 1493f0f7b..2dab166ab 100644 --- a/av/codec/codec.pyx +++ b/av/codec/codec.pyx @@ -1,4 +1,5 @@ from av.audio.format cimport get_audio_format +from av.codec.hwaccel cimport wrap_hwconfig from av.descriptor cimport wrap_avclass from av.utils cimport avrational_to_fraction from av.video.format cimport get_video_format @@ -117,6 +118,10 @@ cdef class Codec: if self.is_encoder and lib.av_codec_is_decoder(self.ptr): raise RuntimeError("%s is both encoder and decoder.") + def __repr__(self): + mode = "w" if self.is_encoder else "r" + return f"" + def create(self, kind = None): """Create a :class:`.CodecContext` for this codec. @@ -203,6 +208,23 @@ cdef class Codec: i += 1 return ret + @property + def hardware_configs(self): + if self._hardware_configs: + return self._hardware_configs + ret = [] + cdef int i = 0 + cdef lib.AVCodecHWConfig *ptr + while True: + ptr = lib.avcodec_get_hw_config(self.ptr, i) + if not ptr: + break + ret.append(wrap_hwconfig(ptr)) + i += 1 + ret = tuple(ret) + self._hardware_configs = ret + return ret + @property def properties(self): return self.desc.props @@ -337,3 +359,19 @@ def dump_codecs(): ) except Exception as e: print(f"...... {codec.name:<18} ERROR: {e}") + +def dump_hwconfigs(): + print("Hardware configs:") + for name in sorted(codecs_available): + try: + codec = Codec(name, "r") + except ValueError: + continue + + configs = codec.hardware_configs + if not configs: + continue + + print(" ", codec.name) + for config in configs: + print(" ", config) diff --git a/av/codec/context.pxd b/av/codec/context.pxd index 42b2d63e7..7ba89dab7 100644 --- a/av/codec/context.pxd +++ b/av/codec/context.pxd @@ -3,6 +3,7 @@ from libc.stdint cimport int64_t from av.bytesource cimport ByteSource from av.codec.codec cimport Codec +from av.codec.hwaccel cimport HWAccel from av.frame cimport Frame from av.packet cimport Packet @@ -18,11 +19,12 @@ cdef class CodecContext: cdef int stream_index cdef lib.AVCodecParserContext *parser - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec) + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel) # Public API. cdef readonly bint is_open cdef readonly Codec codec + cdef readonly HWAccel hwaccel cdef public dict options cpdef open(self, bint strict=?) @@ -31,6 +33,9 @@ cdef class CodecContext: cpdef decode(self, Packet packet=?) cpdef flush_buffers(self) + # Used by hardware-accelerated decode. + cdef HWAccel hwaccel_ctx + # Used by both transcode APIs to setup user-land objects. # TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing packets # are bogus). It should take all info it needs from the context and/or stream. @@ -49,10 +54,11 @@ cdef class CodecContext: cdef _send_packet_and_recv(self, Packet packet) cdef _recv_frame(self) + cdef _transfer_hwframe(self, Frame frame) + # Implemented by children for the generic send/recv API, so we have the # correct subclass of Frame. cdef Frame _next_frame cdef Frame _alloc_next_frame(self) - -cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*) +cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*, HWAccel hwaccel) diff --git a/av/codec/context.pyi b/av/codec/context.pyi index a6ca9647e..543da208e 100644 --- a/av/codec/context.pyi +++ b/av/codec/context.pyi @@ -5,6 +5,7 @@ from typing import ClassVar, Literal from av.packet import Packet from .codec import Codec +from .hwaccel import HWAccel class ThreadType(Flag): NONE: ClassVar[ThreadType] @@ -83,10 +84,14 @@ class CodecContext: def delay(self) -> bool: ... @property def extradata_size(self) -> int: ... + @property + def is_hwaccel(self) -> bool: ... def open(self, strict: bool = True) -> None: ... @staticmethod def create( - codec: str | Codec, mode: Literal["r", "w"] | None = None + codec: str | Codec, + mode: Literal["r", "w"] | None = None, + hwaccel: HWAccel | None = None, ) -> CodecContext: ... def parse( self, raw_input: bytes | bytearray | memoryview | None = None diff --git a/av/codec/context.pyx b/av/codec/context.pyx index 29b7b80d1..e7136b86f 100644 --- a/av/codec/context.pyx +++ b/av/codec/context.pyx @@ -18,7 +18,7 @@ from av.dictionary import Dictionary cdef object _cinit_sentinel = object() -cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec): +cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec, HWAccel hwaccel): """Build an av.CodecContext for an existing AVCodecContext.""" cdef CodecContext py_ctx @@ -35,7 +35,7 @@ cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCode else: py_ctx = CodecContext(_cinit_sentinel) - py_ctx._init(c_ctx, c_codec) + py_ctx._init(c_ctx, c_codec, hwaccel) return py_ctx @@ -83,10 +83,10 @@ class Flags2(IntEnum): cdef class CodecContext: @staticmethod - def create(codec, mode=None): + def create(codec, mode=None, hwaccel=None): cdef Codec cy_codec = codec if isinstance(codec, Codec) else Codec(codec, mode) cdef lib.AVCodecContext *c_ctx = lib.avcodec_alloc_context3(cy_codec.ptr) - return wrap_codec_context(c_ctx, cy_codec.ptr) + return wrap_codec_context(c_ctx, cy_codec.ptr, hwaccel) def __cinit__(self, sentinel=None, *args, **kwargs): if sentinel is not _cinit_sentinel: @@ -96,11 +96,12 @@ cdef class CodecContext: self.stream_index = -1 # This is set by the container immediately. self.is_open = False - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec): + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel): self.ptr = ptr if self.ptr.codec and codec and self.ptr.codec != codec: raise RuntimeError("Wrapping CodecContext with mismatched codec.") self.codec = wrap_codec(codec if codec != NULL else self.ptr.codec) + self.hwaccel = hwaccel # Set reasonable threading defaults. self.ptr.thread_count = 0 # use as many threads as there are CPUs. @@ -310,6 +311,13 @@ cdef class CodecContext: return packets + @property + def is_hwaccel(self): + """ + Returns ``True`` if this codec context is hardware accelerated, ``False`` otherwise. + """ + return self.hwaccel_ctx is not None + def _send_frame_and_recv(self, Frame frame): cdef Packet packet @@ -359,10 +367,15 @@ cdef class CodecContext: return err_check(res) + frame = self._transfer_hwframe(frame) + if not res: self._next_frame = None return frame + cdef _transfer_hwframe(self, Frame frame): + return frame + cdef _recv_packet(self): cdef Packet packet = Packet() diff --git a/av/codec/hwaccel.pxd b/av/codec/hwaccel.pxd new file mode 100644 index 000000000..e68f43bb1 --- /dev/null +++ b/av/codec/hwaccel.pxd @@ -0,0 +1,20 @@ +cimport libav as lib + +from av.codec.codec cimport Codec + + +cdef class HWConfig: + cdef object __weakref__ + cdef lib.AVCodecHWConfig *ptr + cdef void _init(self, lib.AVCodecHWConfig *ptr) + +cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr) + +cdef class HWAccel: + cdef int _device_type + cdef str _device + cdef readonly Codec codec + cdef readonly HWConfig config + cdef lib.AVBufferRef *ptr + cdef public bint allow_software_fallback + cdef public dict options diff --git a/av/codec/hwaccel.pyi b/av/codec/hwaccel.pyi new file mode 100644 index 000000000..7e4748e23 --- /dev/null +++ b/av/codec/hwaccel.pyi @@ -0,0 +1,48 @@ +from enum import IntEnum + +from av.codec.codec import Codec +from av.video.format import VideoFormat + +class HWDeviceType(IntEnum): + none: int + vdpau: int + cuda: int + vaapi: int + dxva2: int + qsv: int + videotoolbox: int + d3d11va: int + drm: int + opencl: int + mediacodec: int + vulkan: int + d3d12va: int + +class HWConfigMethod(IntEnum): + none: int + hw_device_ctx: int + hw_frame_ctx: int + internal: int + ad_hoc: int + +class HWConfig: + @property + def device_type(self) -> HWDeviceType: ... + @property + def format(self) -> VideoFormat: ... + @property + def methods(self) -> HWConfigMethod: ... + @property + def is_supported(self) -> bool: ... + +class HWAccel: + def __init__( + self, + device_type: str | HWDeviceType, + device: str | None = None, + allow_software_fallback: bool = False, + options: dict[str, object] | None = None, + ) -> None: ... + def create(self, codec: Codec) -> HWAccel: ... + +def hwdevices_available() -> list[str]: ... diff --git a/av/codec/hwaccel.pyx b/av/codec/hwaccel.pyx new file mode 100644 index 000000000..1c96d02e8 --- /dev/null +++ b/av/codec/hwaccel.pyx @@ -0,0 +1,151 @@ +import weakref +from enum import IntEnum + +cimport libav as lib + +from av.codec.codec cimport Codec +from av.dictionary cimport _Dictionary +from av.error cimport err_check +from av.video.format cimport get_video_format +from av.dictionary import Dictionary + + +class HWDeviceType(IntEnum): + none = lib.AV_HWDEVICE_TYPE_NONE + vdpau = lib.AV_HWDEVICE_TYPE_VDPAU + cuda = lib.AV_HWDEVICE_TYPE_CUDA + vaapi = lib.AV_HWDEVICE_TYPE_VAAPI + dxva2 = lib.AV_HWDEVICE_TYPE_DXVA2 + qsv = lib.AV_HWDEVICE_TYPE_QSV + videotoolbox = lib.AV_HWDEVICE_TYPE_VIDEOTOOLBOX + d3d11va = lib.AV_HWDEVICE_TYPE_D3D11VA + drm = lib.AV_HWDEVICE_TYPE_DRM + opencl = lib.AV_HWDEVICE_TYPE_OPENCL + mediacodec = lib.AV_HWDEVICE_TYPE_MEDIACODEC + vulkan = lib.AV_HWDEVICE_TYPE_VULKAN + d3d12va = lib.AV_HWDEVICE_TYPE_D3D12VA + +class HWConfigMethod(IntEnum): + none = 0 + hw_device_ctx = lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX # This is the only one we support. + hw_frame_ctx = lib.AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX + internal = lib.AV_CODEC_HW_CONFIG_METHOD_INTERNAL + ad_hoc = lib.AV_CODEC_HW_CONFIG_METHOD_AD_HOC + + +cdef object _cinit_sentinel = object() +cdef object _singletons = weakref.WeakValueDictionary() + +cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr): + try: + return _singletons[ptr] + except KeyError: + pass + cdef HWConfig config = HWConfig(_cinit_sentinel) + config._init(ptr) + _singletons[ptr] = config + return config + + +cdef class HWConfig: + def __init__(self, sentinel): + if sentinel is not _cinit_sentinel: + raise RuntimeError("Cannot instantiate CodecContext") + + cdef void _init(self, lib.AVCodecHWConfig *ptr): + self.ptr = ptr + + def __repr__(self): + return ( + f"self.ptr:x}>" + ) + + @property + def device_type(self): + return HWDeviceType(self.ptr.device_type) + + @property + def format(self): + return get_video_format(self.ptr.pix_fmt, 0, 0) + + @property + def methods(self): + return HWConfigMethod(self.ptr.methods) + + @property + def is_supported(self): + return bool(self.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) + + +cpdef hwdevices_available(): + result = [] + + cdef lib.AVHWDeviceType x = lib.AV_HWDEVICE_TYPE_NONE + while True: + x = lib.av_hwdevice_iterate_types(x) + if x == lib.AV_HWDEVICE_TYPE_NONE: + break + result.append(lib.av_hwdevice_get_type_name(HWDeviceType(x))) + + return result + + +cdef class HWAccel: + def __init__(self, device_type, device=None, codec=None, allow_software_fallback=True, options=None): + if isinstance(device_type, HWDeviceType): + self._device_type = device_type + elif isinstance(device_type, str): + self._device_type = int(lib.av_hwdevice_find_type_by_name(device_type)) + else: + raise ValueError("Unknown type for device_type") + + self._device = device + self.allow_software_fallback = allow_software_fallback + self.options = {} if not options else dict(options) + self.ptr = NULL + self.codec = codec + self.config = None + + if codec: + self._initialize_hw_context() + + def _initialize_hw_context(self): + cdef HWConfig config + for config in self.codec.hardware_configs: + if not (config.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX): + continue + if self._device_type and config.device_type != self._device_type: + continue + break + else: + raise NotImplementedError(f"No supported hardware config for {self.codec}") + + self.config = config + + cdef char *c_device = NULL + if self._device: + device_bytes = self._device.encode() + c_device = device_bytes + cdef _Dictionary c_options = Dictionary(self.options) + + err_check( + lib.av_hwdevice_ctx_create( + &self.ptr, config.ptr.device_type, c_device, c_options.ptr, 0 + ) + ) + + def create(self, Codec codec not None): + """Create a new hardware accelerator context with the given codec""" + if self.ptr: + raise RuntimeError("Hardware context already initialized") + + self.codec = codec + self._initialize_hw_context() + return self + + def __dealloc__(self): + if self.ptr: + lib.av_buffer_unref(&self.ptr) diff --git a/av/container/core.pxd b/av/container/core.pxd index 1aed54b90..87bb792b3 100644 --- a/av/container/core.pxd +++ b/av/container/core.pxd @@ -1,5 +1,6 @@ cimport libav as lib +from av.codec.hwaccel cimport HWAccel from av.container.pyio cimport PyIOFile from av.container.streams cimport StreamContainer from av.dictionary cimport _Dictionary @@ -33,6 +34,8 @@ cdef class Container: cdef readonly dict container_options cdef readonly list stream_options + cdef HWAccel hwaccel + cdef readonly StreamContainer streams cdef readonly dict metadata diff --git a/av/container/core.pyi b/av/container/core.pyi index 227a7d32a..7c681b18b 100644 --- a/av/container/core.pyi +++ b/av/container/core.pyi @@ -4,6 +4,7 @@ from pathlib import Path from types import TracebackType from typing import Any, Callable, ClassVar, Literal, Type, overload +from av.codec.hwaccel import HWAccel from av.format import ContainerFormat from .input import InputContainer @@ -73,6 +74,7 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> InputContainer: ... @overload def open( @@ -87,6 +89,7 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> InputContainer: ... @overload def open( @@ -101,6 +104,7 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> OutputContainer: ... @overload def open( @@ -115,4 +119,5 @@ def open( buffer_size: int = 32768, timeout: Real | None | tuple[Real | None, Real | None] = None, io_open: Callable[..., Any] | None = None, + hwaccel: HWAccel | None = None, ) -> InputContainer | OutputContainer: ... diff --git a/av/container/core.pyx b/av/container/core.pyx index 563c79d21..201570c62 100755 --- a/av/container/core.pyx +++ b/av/container/core.pyx @@ -8,6 +8,7 @@ from pathlib import Path cimport libav as lib +from av.codec.hwaccel cimport HWAccel from av.container.core cimport timeout_info from av.container.input cimport InputContainer from av.container.output cimport OutputContainer @@ -143,7 +144,7 @@ class Flags(Flag): cdef class Container: def __cinit__(self, sentinel, file_, format_name, options, - container_options, stream_options, + container_options, stream_options, hwaccel, metadata_encoding, metadata_errors, buffer_size, open_timeout, read_timeout, io_open): @@ -164,6 +165,8 @@ cdef class Container: self.container_options = dict(container_options or ()) self.stream_options = [dict(x) for x in stream_options or ()] + self.hwaccel = hwaccel + self.metadata_encoding = metadata_encoding self.metadata_errors = metadata_errors @@ -296,6 +299,7 @@ def open( buffer_size=32768, timeout=None, io_open=None, + hwaccel=None ): """open(file, mode='r', **kwargs) @@ -322,6 +326,7 @@ def open( ``url`` is the url to open, ``flags`` is a combination of AVIO_FLAG_* and ``options`` is a dictionary of additional options. The callable should return a file-like object. + :param HWAccel hwaccel: Optional settings for hardware-accelerated decoding. :rtype: Container For devices (via ``libavdevice``), pass the name of the device to ``format``, @@ -367,7 +372,7 @@ def open( if mode.startswith("r"): return InputContainer(_cinit_sentinel, file, format, options, - container_options, stream_options, metadata_encoding, metadata_errors, + container_options, stream_options, hwaccel, metadata_encoding, metadata_errors, buffer_size, open_timeout, read_timeout, io_open, ) @@ -376,6 +381,6 @@ def open( "Provide stream options via Container.add_stream(..., options={})." ) return OutputContainer(_cinit_sentinel, file, format, options, - container_options, stream_options, metadata_encoding, metadata_errors, + container_options, stream_options, None, metadata_encoding, metadata_errors, buffer_size, open_timeout, read_timeout, io_open, ) diff --git a/av/container/input.pyx b/av/container/input.pyx index 7246f8245..aa9940452 100644 --- a/av/container/input.pyx +++ b/av/container/input.pyx @@ -77,7 +77,7 @@ cdef class InputContainer(Container): codec_context = lib.avcodec_alloc_context3(codec) err_check(lib.avcodec_parameters_to_context(codec_context, stream.codecpar)) codec_context.pkt_timebase = stream.time_base - py_codec_context = wrap_codec_context(codec_context, codec) + py_codec_context = wrap_codec_context(codec_context, codec, self.hwaccel) else: # no decoder is available py_codec_context = None diff --git a/av/container/output.pyx b/av/container/output.pyx index a75e47d43..e61ef2297 100644 --- a/av/container/output.pyx +++ b/av/container/output.pyx @@ -125,7 +125,7 @@ cdef class OutputContainer(Container): err_check(lib.avcodec_parameters_from_context(stream.codecpar, codec_context)) # Construct the user-land stream - cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec) + cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec, None) cdef Stream py_stream = wrap_stream(self, stream, py_codec_context) self.streams.add_stream(py_stream) @@ -179,7 +179,7 @@ cdef class OutputContainer(Container): err_check(lib.avcodec_parameters_from_context(stream.codecpar, codec_context)) # Construct the user-land stream - cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec) + cdef CodecContext py_codec_context = wrap_codec_context(codec_context, codec, None) cdef Stream py_stream = wrap_stream(self, stream, py_codec_context) self.streams.add_stream(py_stream) @@ -237,7 +237,7 @@ cdef class OutputContainer(Container): # Construct the user-land stream cdef CodecContext py_codec_context = None if codec_context != NULL: - py_codec_context = wrap_codec_context(codec_context, codec) + py_codec_context = wrap_codec_context(codec_context, codec, None) cdef Stream py_stream = wrap_stream(self, stream, py_codec_context) self.streams.add_stream(py_stream) diff --git a/av/video/codeccontext.pxd b/av/video/codeccontext.pxd index 9693caa9b..895ba74b1 100644 --- a/av/video/codeccontext.pxd +++ b/av/video/codeccontext.pxd @@ -1,3 +1,4 @@ +cimport libav as lib from av.codec.context cimport CodecContext from av.video.format cimport VideoFormat @@ -5,8 +6,19 @@ from av.video.frame cimport VideoFrame from av.video.reformatter cimport VideoReformatter +# The get_format callback in AVCodecContext is called by the decoder to pick a format out of a list. +# When we want accelerated decoding, we need to figure out ahead of time what the format should be, +# and find a way to pass that into our callback so we can return it to the decoder. We use the 'opaque' +# user data field in AVCodecContext for that. This is the struct we store a pointer to in that field. +cdef struct AVCodecPrivateData: + lib.AVPixelFormat hardware_pix_fmt + bint allow_software_fallback + + cdef class VideoCodecContext(CodecContext): + cdef AVCodecPrivateData _private_data + cdef VideoFormat _format cdef _build_format(self) diff --git a/av/video/codeccontext.pyx b/av/video/codeccontext.pyx index d2f4c9e14..92470c159 100644 --- a/av/video/codeccontext.pyx +++ b/av/video/codeccontext.pyx @@ -2,6 +2,8 @@ cimport libav as lib from libc.stdint cimport int64_t from av.codec.context cimport CodecContext +from av.codec.hwaccel cimport HWAccel, HWConfig +from av.error cimport err_check from av.frame cimport Frame from av.packet cimport Packet from av.utils cimport avrational_to_fraction, to_avrational @@ -10,13 +12,51 @@ from av.video.frame cimport VideoFrame, alloc_video_frame from av.video.reformatter cimport VideoReformatter +cdef lib.AVPixelFormat _get_hw_format(lib.AVCodecContext *ctx, const lib.AVPixelFormat *pix_fmts) noexcept: + # In the case where we requested accelerated decoding, the decoder first calls this function + # with a list that includes both the hardware format and software formats. + # First we try to pick the hardware format if it's in the list. + # However, if the decoder fails to initialize the hardware, it will call this function again, + # with only software formats in pix_fmts. We return ctx->sw_pix_fmt regardless in this case, + # because that should be in the candidate list. If not, we are out of ideas anyways. + cdef AVCodecPrivateData* private_data = ctx.opaque + i = 0 + while pix_fmts[i] != -1: + if pix_fmts[i] == private_data.hardware_pix_fmt: + return pix_fmts[i] + i += 1 + return ctx.sw_pix_fmt if private_data.allow_software_fallback else lib.AV_PIX_FMT_NONE + + cdef class VideoCodecContext(CodecContext): + def __cinit__(self, *args, **kwargs): self.last_w = 0 self.last_h = 0 - cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec): - CodecContext._init(self, ptr, codec) # TODO: Can this be `super`? + cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel): + CodecContext._init(self, ptr, codec, hwaccel) # TODO: Can this be `super`? + + if hwaccel is not None: + try: + self.hwaccel_ctx = hwaccel.create(self.codec) + self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr) + self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt + self.ptr.get_format = _get_hw_format + self._private_data.hardware_pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt + self._private_data.allow_software_fallback = self.hwaccel.allow_software_fallback + self.ptr.opaque = &self._private_data + except NotImplementedError: + # Some streams may not have a hardware decoder. For example, many action + # cam videos have a low resolution mjpeg stream, which is usually not + # compatible with hardware decoders. + # The user may have passed in a hwaccel because they want to decode the main + # stream with it, so we shouldn't abort even if we find a stream that can't + # be HW decoded. + # If the user wants to make sure hwaccel is actually used, they can check with the + # is_hardware_accelerated() function on each stream's codec context. + self.hwaccel_ctx = None + self._build_format() self.encoded_frame_count = 0 @@ -58,6 +98,26 @@ cdef class VideoCodecContext(CodecContext): cdef VideoFrame vframe = frame vframe._init_user_attributes() + cdef _transfer_hwframe(self, Frame frame): + if self.hwaccel_ctx is None: + return frame + + if frame.ptr.format != self.hwaccel_ctx.config.ptr.pix_fmt: + # If we get a software frame, that means we are in software fallback mode, and don't actually + # need to transfer. + return frame + + cdef Frame frame_sw + + frame_sw = self._alloc_next_frame() + + err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0)) + + # TODO: Is there anything else to transfer?! + frame_sw.pts = frame.pts + + return frame_sw + cdef _build_format(self): self._format = get_video_format(self.ptr.pix_fmt, self.ptr.width, self.ptr.height) diff --git a/examples/basics/hw_decode.py b/examples/basics/hw_decode.py new file mode 100644 index 000000000..1ce7a11af --- /dev/null +++ b/examples/basics/hw_decode.py @@ -0,0 +1,76 @@ +import os +import time + +import av +import av.datasets + +# What accelerator to use. +# Recommendations: +# Windows: +# - d3d11va (Direct3D 11) +# * available with built-in ffmpeg in PyAV binary wheels, and gives access to +# all decoders, but performance may not be as good as vendor native interfaces. +# - cuda (NVIDIA NVDEC), qsv (Intel QuickSync) +# * may be faster than d3d11va, but requires custom ffmpeg built with those libraries. +# Linux (all options require custom FFmpeg): +# - vaapi (Intel, AMD) +# - cuda (NVIDIA) +# Mac: +# - videotoolbox +# * available with built-in ffmpeg in PyAV binary wheels, and gives access to +# all accelerators available on Macs. This is the only option on MacOS. + +HW_DEVICE = os.environ["HW_DEVICE"] if "HW_DEVICE" in os.environ else None + +if "TEST_FILE_PATH" in os.environ: + test_file_path = os.environ["TEST_FILE_PATH"] +else: + test_file_path = av.datasets.curated( + "pexels/time-lapse-video-of-night-sky-857195.mp4" + ) + +if HW_DEVICE is None: + av.codec.hwaccel.dump_hwdevices() + print("Please set HW_DEVICE.") + exit() + +assert HW_DEVICE in av.codec.hwaccel.hwdevices_available, f"{HW_DEVICE} not available." + +print("Decoding in software (auto threading)...") + +container = av.open(test_file_path) + +container.streams.video[0].thread_type = "AUTO" + +start_time = time.time() +frame_count = 0 +for packet in container.demux(video=0): + for _ in packet.decode(): + frame_count += 1 + +sw_time = time.time() - start_time +sw_fps = frame_count / sw_time +assert frame_count == container.streams.video[0].frames +container.close() + +print(f"Decoded with software in {sw_time:.2f}s ({sw_fps:.2f} fps).") + +print(f"Decoding with {HW_DEVICE}") + +hwaccel = av.codec.hwaccel.HWAccel(device_type=HW_DEVICE, allow_software_fallback=False) + +# Note the additional argument here. +container = av.open(test_file_path, hwaccel=hwaccel) + +start_time = time.time() +frame_count = 0 +for packet in container.demux(video=0): + for _ in packet.decode(): + frame_count += 1 + +hw_time = time.time() - start_time +hw_fps = frame_count / hw_time +assert frame_count == container.streams.video[0].frames +container.close() + +print(f"Decoded with {HW_DEVICE} in {hw_time:.2f}s ({hw_fps:.2f} fps).") diff --git a/include/libav.pxd b/include/libav.pxd index c793b9988..e2fe323a4 100644 --- a/include/libav.pxd +++ b/include/libav.pxd @@ -4,11 +4,14 @@ include "libavutil/channel_layout.pxd" include "libavutil/dict.pxd" include "libavutil/error.pxd" include "libavutil/frame.pxd" +include "libavutil/hwcontext.pxd" include "libavutil/samplefmt.pxd" include "libavutil/motion_vector.pxd" include "libavcodec/avcodec.pxd" include "libavcodec/bsf.pxd" +include "libavcodec/hwaccel.pxd" + include "libavdevice/avdevice.pxd" include "libavformat/avformat.pxd" include "libswresample/swresample.pxd" diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd index 172c9cc65..bcb342373 100644 --- a/include/libavcodec/avcodec.pxd +++ b/include/libavcodec/avcodec.pxd @@ -213,6 +213,8 @@ cdef extern from "libavcodec/avcodec.h" nogil: AVFrame* coded_frame + void* opaque + int bit_rate int bit_rate_tolerance int mb_decision @@ -247,6 +249,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: int coded_height AVPixelFormat pix_fmt + AVPixelFormat sw_pix_fmt AVRational sample_aspect_ratio int gop_size # The number of pictures in a group of pictures, or 0 for intra_only. int max_b_frames @@ -266,6 +269,11 @@ cdef extern from "libavcodec/avcodec.h" nogil: int get_buffer(AVCodecContext *ctx, AVFrame *frame) void release_buffer(AVCodecContext *ctx, AVFrame *frame) + # Hardware acceleration + AVHWAccel *hwaccel + AVBufferRef *hw_device_ctx + AVPixelFormat (*get_format)(AVCodecContext *s, const AVPixelFormat *fmt) + # User Data void *opaque diff --git a/include/libavcodec/hwaccel.pxd b/include/libavcodec/hwaccel.pxd new file mode 100644 index 000000000..cb9ac41b6 --- /dev/null +++ b/include/libavcodec/hwaccel.pxd @@ -0,0 +1,19 @@ +cdef extern from "libavcodec/avcodec.h" nogil: + cdef enum: + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX, + AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, + AV_CODEC_HW_CONFIG_METHOD_INTERNAL, + AV_CODEC_HW_CONFIG_METHOD_AD_HOC, + cdef struct AVCodecHWConfig: + AVPixelFormat pix_fmt + int methods + AVHWDeviceType device_type + cdef const AVCodecHWConfig* avcodec_get_hw_config(const AVCodec *codec, int index) + cdef enum: + AV_HWACCEL_CODEC_CAP_EXPERIMENTAL + cdef struct AVHWAccel: + char *name + AVMediaType type + AVCodecID id + AVPixelFormat pix_fmt + int capabilities diff --git a/include/libavutil/buffer.pxd b/include/libavutil/buffer.pxd index daf86105b..d4ff4cd17 100644 --- a/include/libavutil/buffer.pxd +++ b/include/libavutil/buffer.pxd @@ -1,9 +1,18 @@ -from libc.stdint cimport uint8_t +from libc.stdint cimport intptr_t, uint8_t cdef extern from "libavutil/buffer.h" nogil: - AVBufferRef *av_buffer_create(uint8_t *data, size_t size, void (*free)(void *opaque, uint8_t *data), void *opaque, int flags) + AVBufferRef* av_buffer_ref(AVBufferRef *buf) void av_buffer_unref(AVBufferRef **buf) + cdef struct AVBuffer: + uint8_t *data + int size + intptr_t refcount + void (*free)(void *opaque, uint8_t *data) + void *opaque + int flags cdef struct AVBufferRef: + AVBuffer *buffer uint8_t *data + int size diff --git a/include/libavutil/hwcontext.pxd b/include/libavutil/hwcontext.pxd new file mode 100644 index 000000000..beda15a2c --- /dev/null +++ b/include/libavutil/hwcontext.pxd @@ -0,0 +1,24 @@ +cdef extern from "libavutil/hwcontext.h" nogil: + + enum AVHWDeviceType: + AV_HWDEVICE_TYPE_NONE + AV_HWDEVICE_TYPE_VDPAU + AV_HWDEVICE_TYPE_CUDA + AV_HWDEVICE_TYPE_VAAPI + AV_HWDEVICE_TYPE_DXVA2 + AV_HWDEVICE_TYPE_QSV + AV_HWDEVICE_TYPE_VIDEOTOOLBOX + AV_HWDEVICE_TYPE_D3D11VA + AV_HWDEVICE_TYPE_DRM + AV_HWDEVICE_TYPE_OPENCL + AV_HWDEVICE_TYPE_MEDIACODEC + AV_HWDEVICE_TYPE_VULKAN + AV_HWDEVICE_TYPE_D3D12VA + + cdef int av_hwdevice_ctx_create(AVBufferRef **device_ctx, AVHWDeviceType type, const char *device, AVDictionary *opts, int flags) + + cdef AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) + cdef const char *av_hwdevice_get_type_name(AVHWDeviceType type) + cdef AVHWDeviceType av_hwdevice_iterate_types(AVHWDeviceType prev) + + cdef int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags) diff --git a/scripts/build-deps b/scripts/build-deps index 4cb90f074..de4a6e547 100755 --- a/scripts/build-deps +++ b/scripts/build-deps @@ -13,6 +13,31 @@ if [[ -e "$PYAV_LIBRARY_PREFIX/bin/ffmpeg" ]]; then exit 0 fi +# Add CUDA support if available +CONFFLAGS_NVIDIA="" +if [[ -e /usr/local/cuda ]]; then + # Get Nvidia headers for ffmpeg + cd $PYAV_LIBRARY_ROOT + if [[ ! -e "$PYAV_LIBRARY_ROOT/nv-codec-headers" ]]; then + git clone https://github.com/FFmpeg/nv-codec-headers.git + cd nv-codec-headers + make -j4 + make PREFIX="$PYAV_LIBRARY_PREFIX" install + fi + + PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH" + CONFFLAGS_NVIDIA="--enable-cuda \ + --enable-cuvid \ + --enable-nvenc \ + --enable-nonfree \ + --enable-libnpp \ + --extra-cflags=-I/usr/local/cuda/include \ + --extra-ldflags=-L/usr/local/cuda/lib64" +else + echo "WARNING: Did not find cuda libraries in /usr/local/cuda..." + echo " Building without NVIDIA NVENC/NVDEC support" +fi + mkdir -p "$PYAV_LIBRARY_ROOT" mkdir -p "$PYAV_LIBRARY_PREFIX" @@ -44,6 +69,7 @@ echo ./configure --enable-sse \ --enable-avx \ --enable-avx2 \ + $CONFFLAGS_NVIDIA \ --prefix="$PYAV_LIBRARY_PREFIX" \ || exit 2 echo diff --git a/tests/test_decode.py b/tests/test_decode.py index 05f636977..fc293d201 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -1,10 +1,47 @@ +import functools +import os +import pathlib from fractions import Fraction +import numpy as np +import pytest + import av from .common import TestCase, fate_suite +@functools.cache +def make_h264_test_video(path: str) -> None: + """Generates a black H264 test video for testing hardware decoding.""" + + # We generate a file here that's designed to be as compatible as possible with hardware + # encoders. Hardware encoders are sometimes very picky and the errors we get are often + # opaque, so there is nothing much we (PyAV) can do. The user needs to figure that out + # if they want to use hwaccel. We only want to test the PyAV plumbing here. + # Our video is H264, 1280x720p (note that some decoders have a minimum resolution limit), 24fps, + # 8-bit yuv420p. + pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True) + output_container = av.open(path, "w") + stream = output_container.add_stream("libx264", rate=24) + assert isinstance(stream, av.VideoStream) + stream.width = 1280 + stream.height = 720 + stream.pix_fmt = "yuv420p" + + for _ in range(24): + frame = av.VideoFrame.from_ndarray( + np.zeros((720, 1280, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + output_container.mux(packet) + + for packet in stream.encode(): + output_container.mux(packet) + + output_container.close() + + class TestDecode(TestCase): def test_decoded_video_frame_count(self) -> None: container = av.open(fate_suite("h264/interlaced_crop.mp4")) @@ -165,3 +202,33 @@ def test_side_data(self) -> None: container = av.open(fate_suite("mov/displaymatrix.mov")) frame = next(container.decode(video=0)) assert frame.rotation == -90 + + def test_hardware_decode(self) -> None: + hwdevices_available = av.codec.hwaccel.hwdevices_available() + if "HWACCEL_DEVICE_TYPE" not in os.environ: + pytest.skip( + "Set the HWACCEL_DEVICE_TYPE to run this test. " + f"Options are {' '.join(hwdevices_available)}" + ) + + HWACCEL_DEVICE_TYPE = os.environ["HWACCEL_DEVICE_TYPE"] + assert ( + HWACCEL_DEVICE_TYPE in hwdevices_available + ), f"{HWACCEL_DEVICE_TYPE} not available" + + test_video_path = "tests/assets/black.mp4" + make_h264_test_video(test_video_path) + + hwaccel = av.codec.hwaccel.HWAccel( + device_type=HWACCEL_DEVICE_TYPE, allow_software_fallback=False + ) + + container = av.open(test_video_path, hwaccel=hwaccel) + video_stream = container.streams.video[0] + assert video_stream.codec_context.is_hwaccel + + frame_count = 0 + for frame in container.decode(video_stream): + frame_count += 1 + + assert frame_count == video_stream.frames