Skip to content

Commit

Permalink
Move iter_audio to analyze.py
Browse files Browse the repository at this point in the history
  • Loading branch information
WyattBlue committed Jul 27, 2024
1 parent f4135fa commit 5b4000a
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 167 deletions.
181 changes: 93 additions & 88 deletions auto_editor/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
import re
from dataclasses import dataclass
from fractions import Fraction
from math import ceil
from typing import TYPE_CHECKING

import av
import numpy as np
from av.audio.fifo import AudioFifo
from av.subtitles.subtitle import AssSubtitle

from auto_editor import version
from auto_editor.utils.subtitle_tools import convert_ass_to_text
from auto_editor.wavfile import read

if TYPE_CHECKING:
from collections.abc import Iterator
Expand All @@ -22,15 +23,13 @@
from numpy.typing import NDArray

from auto_editor.ffwrapper import FileInfo
from auto_editor.output import Ensure
from auto_editor.utils.bar import Bar
from auto_editor.utils.log import Log


@dataclass(slots=True)
class FileSetup:
src: FileInfo
ensure: Ensure
strict: bool
tb: Fraction
bar: Bar
Expand Down Expand Up @@ -89,6 +88,41 @@ def obj_tag(tag: str, tb: Fraction, obj: dict[str, Any]) -> str:
return key


def iter_audio(src, tb: Fraction, stream: int = 0) -> Iterator[float]:
fifo = AudioFifo()
try:
container = av.open(src.path, "r")
audio_stream = container.streams.audio[stream]
sample_rate = audio_stream.rate

exact_size = (1 / tb) * sample_rate
accumulated_error = 0

# Resample so that audio data is between [-1, 1]
resampler = av.AudioResampler(
av.AudioFormat("flt"), audio_stream.layout, sample_rate
)

for frame in container.decode(audio=stream):
frame.pts = None # Skip time checks

for reframe in resampler.resample(frame):
fifo.write(reframe)

while fifo.samples >= ceil(exact_size):
size_with_error = exact_size + accumulated_error
current_size = round(size_with_error)
accumulated_error = size_with_error - current_size

audio_chunk = fifo.read(current_size)
assert audio_chunk is not None
arr = audio_chunk.to_ndarray().flatten()
yield float(np.max(np.abs(arr)))

finally:
container.close()


def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[float]:
container = av.open(src.path, "r")

Expand Down Expand Up @@ -138,7 +172,6 @@ def iter_motion(src, tb, stream: int, blur: int, width: int) -> Iterator[float]:

@dataclass(slots=True)
class Levels:
ensure: Ensure
src: FileInfo
tb: Fraction
bar: Bar
Expand All @@ -151,24 +184,16 @@ def media_length(self) -> int:
if (arr := self.read_cache("audio", {"stream": 0})) is not None:
return len(arr)

sr, samples = read(self.ensure.audio(self.src, 0))
samp_count = len(samples)
del samples

samp_per_ticks = sr / self.tb
ticks = int(samp_count / samp_per_ticks)
self.log.debug(f"Audio Length: {ticks}")
self.log.debug(
f"... without rounding: {float(samp_count / samp_per_ticks)}"
)
return ticks
result = sum(1 for _ in iter_audio(self.src, self.tb, 0))
self.log.debug(f"Audio Length: {result}")
return result

# If there's no audio, get length in video metadata.
with av.open(f"{self.src.path}") as cn:
if len(cn.streams.video) < 1:
with av.open(self.src.path) as container:
if len(container.streams.video) == 0:
self.log.error("Could not get media duration")

video = cn.streams.video[0]
video = container.streams.video[0]

if video.duration is None or video.time_base is None:
dur = 0
Expand Down Expand Up @@ -213,56 +238,70 @@ def cache(self, tag: str, obj: dict[str, Any], arr: np.ndarray) -> np.ndarray:
return arr

def audio(self, stream: int) -> NDArray[np.float64]:
if stream > len(self.src.audios) - 1:
if stream >= len(self.src.audios):
raise LevelError(f"audio: audio stream '{stream}' does not exist.")

if (arr := self.read_cache("audio", {"stream": stream})) is not None:
return arr

sr, samples = read(self.ensure.audio(self.src, stream))

if len(samples) == 0:
raise LevelError(f"audio: stream '{stream}' has no samples.")

def get_max_volume(s: np.ndarray) -> float:
return max(float(np.max(s)), -float(np.min(s)))

max_volume = get_max_volume(samples)
self.log.debug(f"Max volume: {max_volume}")
with av.open(self.src.path, "r") as container:
audio = container.streams.audio[stream]
if audio.duration is not None and audio.time_base is not None:
inaccurate_dur = int(audio.duration * audio.time_base * self.tb)
elif container.duration is not None:
inaccurate_dur = int(container.duration / av.time_base * self.tb)
else:
inaccurate_dur = 1024

samp_count = samples.shape[0]
samp_per_ticks = sr / self.tb
bar = self.bar
bar.start(inaccurate_dur, "Analyzing audio volume")

if samp_per_ticks < 1:
self.log.error(
f"audio: stream '{stream}'\n Samplerate ({sr}) must be greater than "
f"or equal to timebase ({self.tb})\n"
" Try `-fps 30` and/or `--sample-rate 48000`"
)
result = np.zeros((inaccurate_dur), dtype=np.float64)
index = 0
for value in iter_audio(self.src, self.tb, stream):
if index > len(result) - 1:
result = np.concatenate(
(result, np.zeros((len(result)), dtype=np.float64))
)
result[index] = value
bar.tick(index)
index += 1

audio_ticks = int(samp_count / samp_per_ticks)
self.log.debug(
f"analyze: audio length: {audio_ticks} ({float(samp_count / samp_per_ticks)})"
)
self.bar.start(audio_ticks, "Analyzing audio volume")
bar.end()
return self.cache("audio", {"stream": stream}, result[:index])

threshold_list = np.zeros((audio_ticks), dtype=np.float64)
def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float64]:
if stream >= len(self.src.videos):
raise LevelError(f"motion: video stream '{stream}' does not exist.")

if max_volume == 0: # Prevent dividing by zero
return threshold_list
mobj = {"stream": stream, "width": width, "blur": blur}
if (arr := self.read_cache("motion", mobj)) is not None:
return arr

# Determine when audio is silent or loud.
for i in range(audio_ticks):
if i % 500 == 0:
self.bar.tick(i)
with av.open(self.src.path, "r") as container:
video = container.streams.video[stream]
inaccurate_dur = (
1024
if video.duration is None or video.time_base is None
else int(video.duration * video.time_base * self.tb)
)

start = int(i * samp_per_ticks)
end = min(int((i + 1) * samp_per_ticks), samp_count)
bar = self.bar
bar.start(inaccurate_dur, "Analyzing motion")

threshold_list[i] = get_max_volume(samples[start:end]) / max_volume
result = np.zeros((inaccurate_dur), dtype=np.float64)
index = 0
for value in iter_motion(self.src, self.tb, stream, blur, width):
if index > len(result) - 1:
result = np.concatenate(
(result, np.zeros((len(result)), dtype=np.float64))
)
result[index] = value
bar.tick(index)
index += 1

self.bar.end()
return self.cache("audio", {"stream": stream}, threshold_list)
bar.end()
return self.cache("motion", mobj, result[:index])

def subtitle(
self,
Expand Down Expand Up @@ -336,37 +375,3 @@ def subtitle(
container.close()

return result

def motion(self, stream: int, blur: int, width: int) -> NDArray[np.float64]:
if stream >= len(self.src.videos):
raise LevelError(f"motion: video stream '{stream}' does not exist.")

mobj = {"stream": stream, "width": width, "blur": blur}
if (arr := self.read_cache("motion", mobj)) is not None:
return arr

with av.open(self.src.path, "r") as container:
video = container.streams.video[stream]
inaccurate_dur = (
1024
if video.duration is None or video.time_base is None
else int(video.duration * video.time_base * self.tb)
)

bar = self.bar
bar.start(inaccurate_dur, "Analyzing motion")

threshold_list = np.zeros((inaccurate_dur), dtype=np.float64)
index = 0

for value in iter_motion(self.src, self.tb, stream, blur, width):
if index > len(threshold_list) - 1:
threshold_list = np.concatenate(
(threshold_list, np.zeros((len(threshold_list)), dtype=np.float64))
)
threshold_list[index] = value
bar.tick(index)
index += 1

bar.end()
return self.cache("motion", mobj, threshold_list[:index])
8 changes: 4 additions & 4 deletions auto_editor/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,8 @@ def edit_media(
else:
samplerate = args.sample_rate

ensure = Ensure(ffmpeg, bar, samplerate, temp, log)

if tl is None:
tl = make_timeline(sources, ensure, args, samplerate, bar, temp, log)
tl = make_timeline(sources, args, samplerate, bar, temp, log)

if export["export"] == "timeline":
from auto_editor.formats.json import make_json_timeline
Expand All @@ -216,7 +214,7 @@ def edit_media(
if args.preview:
from auto_editor.preview import preview

preview(ensure, tl, temp, log)
preview(tl, temp, log)
return

if export["export"] == "json":
Expand Down Expand Up @@ -265,6 +263,8 @@ def make_media(tl: v3, output: str) -> None:
sub_output = []
apply_later = False

ensure = Ensure(ffmpeg, bar, samplerate, temp, log)

if ctr.default_sub != "none" and not args.sn:
sub_output = make_new_subtitles(tl, ensure, temp)

Expand Down
9 changes: 3 additions & 6 deletions auto_editor/make_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
if TYPE_CHECKING:
from numpy.typing import NDArray

from auto_editor.output import Ensure
from auto_editor.utils.bar import Bar
from auto_editor.utils.chunks import Chunks
from auto_editor.utils.log import Log
Expand Down Expand Up @@ -75,7 +74,6 @@ def make_av(src: FileInfo, all_clips: list[list[Clip]]) -> tuple[VSpace, ASpace]
def run_interpreter_for_edit_option(
text: str, filesetup: FileSetup
) -> NDArray[np.bool_]:
ensure = filesetup.ensure
src = filesetup.src
tb = filesetup.tb
bar = filesetup.bar
Expand All @@ -87,8 +85,8 @@ def run_interpreter_for_edit_option(
if log.is_debug:
log.debug(f"edit: {parser}")

env["timebase"] = filesetup.tb
env["@levels"] = Levels(ensure, src, tb, bar, temp, log)
env["timebase"] = tb
env["@levels"] = Levels(src, tb, bar, temp, log)
env["@filesetup"] = filesetup

results = interpret(env, parser)
Expand Down Expand Up @@ -139,7 +137,6 @@ def parse_time(val: str, arr: NDArray, tb: Fraction) -> int: # raises: `CoerceE

def make_timeline(
sources: list[FileInfo],
ensure: Ensure,
args: Args,
sr: int,
bar: Bar,
Expand Down Expand Up @@ -169,7 +166,7 @@ def make_timeline(
concat = np.concatenate

for i, src in enumerate(sources):
filesetup = FileSetup(src, ensure, len(sources) < 2, tb, bar, temp, log)
filesetup = FileSetup(src, len(sources) < 2, tb, bar, temp, log)

edit_result = run_interpreter_for_edit_option(method, filesetup)
mut_margin(edit_result, start_margin, end_margin)
Expand Down
5 changes: 2 additions & 3 deletions auto_editor/preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import TextIO

from auto_editor.analyze import Levels
from auto_editor.output import Ensure
from auto_editor.timeline import v3
from auto_editor.utils.bar import Bar
from auto_editor.utils.func import to_timecode
Expand Down Expand Up @@ -49,7 +48,7 @@ def all_cuts(tl: v3, in_len: int) -> list[int]:
return cut_lens


def preview(ensure: Ensure, tl: v3, temp: str, log: Log) -> None:
def preview(tl: v3, temp: str, log: Log) -> None:
log.conwrite("")
tb = tl.tb

Expand All @@ -66,7 +65,7 @@ def preview(ensure: Ensure, tl: v3, temp: str, log: Log) -> None:

in_len = 0
for src in all_sources:
in_len += Levels(ensure, src, tb, Bar("none"), temp, log).media_length
in_len += Levels(src, tb, Bar("none"), temp, log).media_length

out_len = tl.out_len()

Expand Down
2 changes: 1 addition & 1 deletion auto_editor/render/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def parse(self, text: str, codec: str) -> None:
self.codec = codec
self.contents = []

if codec == "ass":
if codec == "ass" or codec == "ssa":
time_code = re.compile(r"(.*)(\d+:\d+:[\d.]+)(.*)(\d+:\d+:[\d.]+)(.*)")
elif codec == "webvtt":
time_code = re.compile(r"()(\d+:[\d.]+)( --> )(\d+:[\d.]+)(\n.*)")
Expand Down
Loading

0 comments on commit 5b4000a

Please sign in to comment.