Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for experimental codecs #606

Merged
merged 1 commit into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 62 additions & 49 deletions auto_editor/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
from auto_editor.utils.log import Log


__all__ = ("LevelError", "Levels", "iter_audio", "iter_motion")


class LevelError(Exception):
pass

Expand Down Expand Up @@ -69,45 +72,39 @@ def mut_remove_large(
active = False


def iter_audio(src, tb: Fraction, stream: int = 0) -> Iterator[np.float32]:
def iter_audio(audio_stream: av.AudioStream, tb: Fraction) -> Iterator[np.float32]:
fifo = AudioFifo()
try:
container = av.open(src.path, "r")
audio_stream = container.streams.audio[stream]
sample_rate = audio_stream.rate
sr = audio_stream.rate

exact_size = (1 / tb) * sample_rate
accumulated_error = 0
exact_size = (1 / tb) * sr
accumulated_error = Fraction(0)

# Resample so that audio data is between [-1, 1]
resampler = av.AudioResampler(
av.AudioFormat("flt"), audio_stream.layout, sample_rate
)
# Resample so that audio data is between [-1, 1]
resampler = av.AudioResampler(av.AudioFormat("flt"), audio_stream.layout, sr)

for frame in container.decode(audio=stream):
frame.pts = None # Skip time checks
container = audio_stream.container
assert isinstance(container, av.container.InputContainer)

for reframe in resampler.resample(frame):
fifo.write(reframe)
for frame in container.decode(audio_stream):
frame.pts = None # Skip time checks

while fifo.samples >= ceil(exact_size):
size_with_error = exact_size + accumulated_error
current_size = round(size_with_error)
accumulated_error = size_with_error - current_size
for reframe in resampler.resample(frame):
fifo.write(reframe)

audio_chunk = fifo.read(current_size)
assert audio_chunk is not None
arr = audio_chunk.to_ndarray().flatten()
yield np.max(np.abs(arr))

finally:
container.close()
while fifo.samples >= ceil(exact_size):
size_with_error = exact_size + accumulated_error
current_size = round(size_with_error)
accumulated_error = size_with_error - current_size

audio_chunk = fifo.read(current_size)
assert audio_chunk is not None
arr = audio_chunk.to_ndarray().flatten()
yield np.max(np.abs(arr))

def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[np.float32]:
container = av.open(src.path, "r")

video = container.streams.video[stream]
def iter_motion(
video: av.VideoStream, tb: Fraction, blur: int, width: int
) -> Iterator[np.float32]:
video.thread_type = "AUTO"

prev_frame = None
Expand All @@ -125,6 +122,9 @@ def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[np.floa
graph.add("buffersink"),
).configure()

container = video.container
assert isinstance(container, av.container.InputContainer)

for unframe in container.decode(video):
if unframe.pts is None:
continue
Expand All @@ -151,8 +151,6 @@ def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[np.floa
prev_frame = current_frame
prev_index = index

container.close()


def obj_tag(path: Path, kind: str, tb: Fraction, obj: Sequence[object]) -> str:
mod_time = int(path.stat().st_mtime)
Expand All @@ -175,7 +173,11 @@ def media_length(self) -> int:
if (arr := self.read_cache("audio", (0,))) is not None:
return len(arr)

result = sum(1 for _ in iter_audio(self.src, self.tb, 0))
with av.open(self.src.path, "r") as container:
audio_stream = container.streams.audio[0]
self.log.experimental(audio_stream.codec)
result = sum(1 for _ in iter_audio(audio_stream, self.tb))

self.log.debug(f"Audio Length: {result}")
return result

Expand Down Expand Up @@ -239,21 +241,26 @@ def audio(self, stream: int) -> NDArray[np.float32]:
if (arr := self.read_cache("audio", (stream,))) is not None:
return arr

with av.open(self.src.path, "r") as container:
audio = container.streams.audio[stream]
if audio.duration is not None and audio.time_base is not None:
inaccurate_dur = int(audio.duration * audio.time_base * self.tb)
elif container.duration is not None:
inaccurate_dur = int(container.duration / av.time_base * self.tb)
else:
inaccurate_dur = 1024
container = av.open(self.src.path, "r")
audio = container.streams.audio[stream]

if audio.codec.experimental:
self.log.error(f"`{audio.codec.name}` is an experimental codec")

if audio.duration is not None and audio.time_base is not None:
inaccurate_dur = int(audio.duration * audio.time_base * self.tb)
elif container.duration is not None:
inaccurate_dur = int(container.duration / av.time_base * self.tb)
else:
inaccurate_dur = 1024

bar = self.bar
bar.start(inaccurate_dur, "Analyzing audio volume")

result = np.zeros((inaccurate_dur), dtype=np.float32)
index = 0
for value in iter_audio(self.src, self.tb, stream):

for value in iter_audio(audio, self.tb):
if index > len(result) - 1:
result = np.concatenate(
(result, np.zeros((len(result)), dtype=np.float32))
Expand All @@ -263,6 +270,7 @@ def audio(self, stream: int) -> NDArray[np.float32]:
index += 1

bar.end()
assert len(result) > 0
return self.cache(result[:index], "audio", (stream,))

def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float32]:
Expand All @@ -273,20 +281,25 @@ def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float32]:
if (arr := self.read_cache("motion", mobj)) is not None:
return arr

with av.open(self.src.path, "r") as container:
video = container.streams.video[stream]
inaccurate_dur = (
1024
if video.duration is None or video.time_base is None
else int(video.duration * video.time_base * self.tb)
)
container = av.open(self.src.path, "r")
video = container.streams.video[stream]

if video.codec.experimental:
self.log.experimental(video.codec)

inaccurate_dur = (
1024
if video.duration is None or video.time_base is None
else int(video.duration * video.time_base * self.tb)
)

bar = self.bar
bar.start(inaccurate_dur, "Analyzing motion")

result = np.zeros((inaccurate_dur), dtype=np.float32)
index = 0
for value in iter_motion(self.src, self.tb, stream, blur, width):

for value in iter_motion(video, self.tb, blur, width):
if index > len(result) - 1:
result = np.concatenate(
(result, np.zeros((len(result)), dtype=np.float32))
Expand Down
2 changes: 2 additions & 0 deletions auto_editor/make_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def make_timeline(
has_loud = concat((has_loud, result))
src_index = concat((src_index, np.full(len(result), i, dtype=np.int32)))

assert len(has_loud) > 0

# Setup for handling custom speeds
speed_index = has_loud.astype(np.uint)
speed_map = [args.silent_speed, args.video_speed]
Expand Down
17 changes: 14 additions & 3 deletions auto_editor/subcommands/levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from fractions import Fraction
from typing import TYPE_CHECKING

import av
import numpy as np

from auto_editor.analyze import LevelError, Levels, iter_audio, iter_motion
from auto_editor.analyze import *
from auto_editor.ffwrapper import initFileInfo
from auto_editor.lang.palet import env
from auto_editor.lib.contracts import is_bool, is_nat, is_nat1, is_str, is_void, orc
Expand Down Expand Up @@ -130,9 +131,19 @@ def main(sys_args: list[str] = sys.argv[1:]) -> None:
levels = Levels(src, tb, bar, False, log, strict=True)
try:
if method == "audio":
print_arr_gen(iter_audio(src, tb, **obj))
container = av.open(src.path, "r")
audio_stream = container.streams.audio[obj["stream"]]
log.experimental(audio_stream.codec)
print_arr_gen(iter_audio(audio_stream, tb))
container.close()

elif method == "motion":
print_arr_gen(iter_motion(src, tb, **obj))
container = av.open(src.path, "r")
video_stream = container.streams.video[obj["stream"]]
log.experimental(video_stream.codec)
print_arr_gen(iter_motion(video_stream, tb, obj["blur"], obj["width"]))
container.close()

elif method == "subtitle":
print_arr(levels.subtitle(**obj))
elif method == "none":
Expand Down
6 changes: 6 additions & 0 deletions auto_editor/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from time import perf_counter, sleep
from typing import NoReturn

import av


class Log:
__slots__ = ("is_debug", "quiet", "machine", "no_color", "_temp", "_ut", "_s")
Expand Down Expand Up @@ -97,6 +99,10 @@ def stop_timer(self) -> None:

sys.stdout.write(f"Finished. took {second_len} seconds ({minute_len})\n")

def experimental(self, codec: av.Codec) -> None:
if codec.experimental:
self.error(f"`{codec.name}` is an experimental codec")

def error(self, message: str | Exception) -> NoReturn:
if self.is_debug and isinstance(message, Exception):
self.cleanup()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [{ name = "WyattBlue", email = "[email protected]" }]
requires-python = ">=3.10,<3.14"
dependencies = [
"numpy>=1.24,<3.0",
"pyav==14.0.0rc4",
"pyav==14.*",
]
keywords = [
"video", "audio", "media", "editor", "editing",
Expand Down
Loading