Skip to content

Commit

Permalink
Check for experimental codecs
Browse files Browse the repository at this point in the history
  • Loading branch information
WyattBlue committed Dec 3, 2024
1 parent 9fd986e commit 2fa6148
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 53 deletions.
111 changes: 62 additions & 49 deletions auto_editor/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
from auto_editor.utils.log import Log


__all__ = ("LevelError", "Levels", "iter_audio", "iter_motion")


class LevelError(Exception):
pass

Expand Down Expand Up @@ -69,45 +72,39 @@ def mut_remove_large(
active = False


def iter_audio(src, tb: Fraction, stream: int = 0) -> Iterator[np.float32]:
def iter_audio(audio_stream: av.AudioStream, tb: Fraction) -> Iterator[np.float32]:
fifo = AudioFifo()
try:
container = av.open(src.path, "r")
audio_stream = container.streams.audio[stream]
sample_rate = audio_stream.rate
sr = audio_stream.rate

exact_size = (1 / tb) * sample_rate
accumulated_error = 0
exact_size = (1 / tb) * sr
accumulated_error = Fraction(0)

# Resample so that audio data is between [-1, 1]
resampler = av.AudioResampler(
av.AudioFormat("flt"), audio_stream.layout, sample_rate
)
# Resample so that audio data is between [-1, 1]
resampler = av.AudioResampler(av.AudioFormat("flt"), audio_stream.layout, sr)

for frame in container.decode(audio=stream):
frame.pts = None # Skip time checks
container = audio_stream.container
assert isinstance(container, av.container.InputContainer)

for reframe in resampler.resample(frame):
fifo.write(reframe)
for frame in container.decode(audio_stream):
frame.pts = None # Skip time checks

while fifo.samples >= ceil(exact_size):
size_with_error = exact_size + accumulated_error
current_size = round(size_with_error)
accumulated_error = size_with_error - current_size
for reframe in resampler.resample(frame):
fifo.write(reframe)

audio_chunk = fifo.read(current_size)
assert audio_chunk is not None
arr = audio_chunk.to_ndarray().flatten()
yield np.max(np.abs(arr))

finally:
container.close()
while fifo.samples >= ceil(exact_size):
size_with_error = exact_size + accumulated_error
current_size = round(size_with_error)
accumulated_error = size_with_error - current_size

audio_chunk = fifo.read(current_size)
assert audio_chunk is not None
arr = audio_chunk.to_ndarray().flatten()
yield np.max(np.abs(arr))

def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[np.float32]:
container = av.open(src.path, "r")

video = container.streams.video[stream]
def iter_motion(
video: av.VideoStream, tb: Fraction, blur: int, width: int
) -> Iterator[np.float32]:
video.thread_type = "AUTO"

prev_frame = None
Expand All @@ -125,6 +122,9 @@ def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[np.floa
graph.add("buffersink"),
).configure()

container = video.container
assert isinstance(container, av.container.InputContainer)

for unframe in container.decode(video):
if unframe.pts is None:
continue
Expand All @@ -151,8 +151,6 @@ def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[np.floa
prev_frame = current_frame
prev_index = index

container.close()


def obj_tag(path: Path, kind: str, tb: Fraction, obj: Sequence[object]) -> str:
mod_time = int(path.stat().st_mtime)
Expand All @@ -175,7 +173,11 @@ def media_length(self) -> int:
if (arr := self.read_cache("audio", (0,))) is not None:
return len(arr)

result = sum(1 for _ in iter_audio(self.src, self.tb, 0))
with av.open(self.src.path, "r") as container:
audio_stream = container.streams.audio[0]
self.log.experimental(audio_stream.codec)
result = sum(1 for _ in iter_audio(audio_stream, self.tb))

self.log.debug(f"Audio Length: {result}")
return result

Expand Down Expand Up @@ -239,21 +241,26 @@ def audio(self, stream: int) -> NDArray[np.float32]:
if (arr := self.read_cache("audio", (stream,))) is not None:
return arr

with av.open(self.src.path, "r") as container:
audio = container.streams.audio[stream]
if audio.duration is not None and audio.time_base is not None:
inaccurate_dur = int(audio.duration * audio.time_base * self.tb)
elif container.duration is not None:
inaccurate_dur = int(container.duration / av.time_base * self.tb)
else:
inaccurate_dur = 1024
container = av.open(self.src.path, "r")
audio = container.streams.audio[stream]

if audio.codec.experimental:
self.log.error(f"`{audio.codec.name}` is an experimental codec")

if audio.duration is not None and audio.time_base is not None:
inaccurate_dur = int(audio.duration * audio.time_base * self.tb)
elif container.duration is not None:
inaccurate_dur = int(container.duration / av.time_base * self.tb)
else:
inaccurate_dur = 1024

bar = self.bar
bar.start(inaccurate_dur, "Analyzing audio volume")

result = np.zeros((inaccurate_dur), dtype=np.float32)
index = 0
for value in iter_audio(self.src, self.tb, stream):

for value in iter_audio(audio, self.tb):
if index > len(result) - 1:
result = np.concatenate(
(result, np.zeros((len(result)), dtype=np.float32))
Expand All @@ -263,6 +270,7 @@ def audio(self, stream: int) -> NDArray[np.float32]:
index += 1

bar.end()
assert len(result) > 0
return self.cache(result[:index], "audio", (stream,))

def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float32]:
Expand All @@ -273,20 +281,25 @@ def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float32]:
if (arr := self.read_cache("motion", mobj)) is not None:
return arr

with av.open(self.src.path, "r") as container:
video = container.streams.video[stream]
inaccurate_dur = (
1024
if video.duration is None or video.time_base is None
else int(video.duration * video.time_base * self.tb)
)
container = av.open(self.src.path, "r")
video = container.streams.video[stream]

if video.codec.experimental:
self.log.experimental(video.codec)

inaccurate_dur = (
1024
if video.duration is None or video.time_base is None
else int(video.duration * video.time_base * self.tb)
)

bar = self.bar
bar.start(inaccurate_dur, "Analyzing motion")

result = np.zeros((inaccurate_dur), dtype=np.float32)
index = 0
for value in iter_motion(self.src, self.tb, stream, blur, width):

for value in iter_motion(video, self.tb, blur, width):
if index > len(result) - 1:
result = np.concatenate(
(result, np.zeros((len(result)), dtype=np.float32))
Expand Down
2 changes: 2 additions & 0 deletions auto_editor/make_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def make_timeline(
has_loud = concat((has_loud, result))
src_index = concat((src_index, np.full(len(result), i, dtype=np.int32)))

assert len(has_loud) > 0

# Setup for handling custom speeds
speed_index = has_loud.astype(np.uint)
speed_map = [args.silent_speed, args.video_speed]
Expand Down
17 changes: 14 additions & 3 deletions auto_editor/subcommands/levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from fractions import Fraction
from typing import TYPE_CHECKING

import av
import numpy as np

from auto_editor.analyze import LevelError, Levels, iter_audio, iter_motion
from auto_editor.analyze import *
from auto_editor.ffwrapper import initFileInfo
from auto_editor.lang.palet import env
from auto_editor.lib.contracts import is_bool, is_nat, is_nat1, is_str, is_void, orc
Expand Down Expand Up @@ -130,9 +131,19 @@ def main(sys_args: list[str] = sys.argv[1:]) -> None:
levels = Levels(src, tb, bar, False, log, strict=True)
try:
if method == "audio":
print_arr_gen(iter_audio(src, tb, **obj))
container = av.open(src.path, "r")
audio_stream = container.streams.audio[obj["stream"]]
log.experimental(audio_stream.codec)
print_arr_gen(iter_audio(audio_stream, tb))
container.close()

elif method == "motion":
print_arr_gen(iter_motion(src, tb, **obj))
container = av.open(src.path, "r")
video_stream = container.streams.video[obj["stream"]]
log.experimental(video_stream.codec)
print_arr_gen(iter_motion(video_stream, tb, obj["blur"], obj["width"]))
container.close()

elif method == "subtitle":
print_arr(levels.subtitle(**obj))
elif method == "none":
Expand Down
6 changes: 6 additions & 0 deletions auto_editor/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from time import perf_counter, sleep
from typing import NoReturn

import av


class Log:
__slots__ = ("is_debug", "quiet", "machine", "no_color", "_temp", "_ut", "_s")
Expand Down Expand Up @@ -97,6 +99,10 @@ def stop_timer(self) -> None:

sys.stdout.write(f"Finished. took {second_len} seconds ({minute_len})\n")

def experimental(self, codec: av.Codec) -> None:
if codec.experimental:
self.error(f"`{codec.name}` is an experimental codec")

def error(self, message: str | Exception) -> NoReturn:
if self.is_debug and isinstance(message, Exception):
self.cleanup()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [{ name = "WyattBlue", email = "[email protected]" }]
requires-python = ">=3.10,<3.14"
dependencies = [
"numpy>=1.24,<3.0",
"pyav==14.0.0rc4",
"pyav==14.*",
]
keywords = [
"video", "audio", "media", "editor", "editing",
Expand Down

0 comments on commit 2fa6148

Please sign in to comment.