rwth-i6 · christophmluscher · Jan 23, 2024 · Jan 18, 2024 · Jan 22, 2024 · Jan 22, 2024
diff --git a/audio/ffmpeg.py b/audio/ffmpeg.py
@@ -112,7 +112,8 @@ def __init__(
         if self.error_threshold > 0:
             self.out_failed_files = self.output_path("failed_files.txt")
 
-        self.rqmt = {"time": 4, "cpu": 4, "mem": 8}
+        # e.g. 1 core for python and 4x2 cores for ffmpeg, one for input processing and one for output processing
+        self.rqmt = {"time": 4, "cpu": 9, "mem": 8}
 
     def tasks(self):
         yield Task("run", rqmt=self.rqmt)
@@ -124,11 +125,11 @@ def tasks(self):
 
     def run(self):
         c = corpus.Corpus()
-        c.load(tk.uncached_path(self.corpus_file))
+        c.load(self.corpus_file.get_path())
 
         from multiprocessing import pool
 
-        p = pool.Pool(self.rqmt["cpu"])
+        p = pool.Pool(self.rqmt["cpu"] // 2)
         p.map(self._perform_ffmpeg, c.all_recordings())
 
         for r in c.all_recordings():
@@ -138,7 +139,7 @@ def run(self):
         if self.recover_duration:
             c.dump("temp_corpus.xml.gz")
         else:
-            c.dump(tk.uncached_path(self.out_corpus))
+            c.dump(self.out_corpus.get_path())
 
         if self.out_failed_files is not None:
             with open(self.out_failed_files.get_path(), "wt") as out:
@@ -195,13 +196,9 @@ def _perform_ffmpeg(self, recording: corpus.Recording):
         target = os.path.join(self.out_audio_folder.get_path(), audio_filename)
         if not os.path.exists(target):
             logging.info(f"try converting {target}")
-            command_head = [
-                self.ffmpeg_binary,
-                "-hide_banner",
-                "-y",
-            ]
+            command_head = [self.ffmpeg_binary, "-hide_banner", "-y", "-threads", "1"]
             command_in = ["-i", recording.audio]
-            command_out = [target]
+            command_out = ["-threads", "1", target]
             in_options = self.ffmpeg_input_options or []
             out_options = self.ffmpeg_options or []
             command = command_head + in_options + command_in + out_options + command_out