diff --git a/objects.inv b/objects.inv
index c7459f5..c6d919b 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/reference/video_sampler/gating/index.html b/reference/video_sampler/gating/index.html
index 54da74e..9a54e1a 100644
--- a/reference/video_sampler/gating/index.html
+++ b/reference/video_sampler/gating/index.html
@@ -457,6 +457,45 @@
     </span>
   </a>
   
+    <nav class="md-nav" aria-label="ClipGate">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.gating.ClipGate.__init__" class="md-nav__link">
+    <span class="md-ellipsis">
+      __init__
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#video_sampler.gating.PassGate" class="md-nav__link">
+    <span class="md-ellipsis">
+      PassGate
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="PassGate">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.gating.PassGate.__call__" class="md-nav__link">
+    <span class="md-ellipsis">
+      __call__
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
     </ul>
@@ -736,6 +775,45 @@
     </span>
   </a>
   
+    <nav class="md-nav" aria-label="ClipGate">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.gating.ClipGate.__init__" class="md-nav__link">
+    <span class="md-ellipsis">
+      __init__
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#video_sampler.gating.PassGate" class="md-nav__link">
+    <span class="md-ellipsis">
+      PassGate
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="PassGate">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.gating.PassGate.__call__" class="md-nav__link">
+    <span class="md-ellipsis">
+      __call__
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
     </ul>
@@ -792,18 +870,7 @@ <h2 id="video_sampler.gating.BlurGate" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>video_sampler/gating.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">37</span>
-<span class="normal">38</span>
-<span class="normal">39</span>
-<span class="normal">40</span>
-<span class="normal">41</span>
-<span class="normal">42</span>
-<span class="normal">43</span>
-<span class="normal">44</span>
-<span class="normal">45</span>
-<span class="normal">46</span>
-<span class="normal">47</span>
-<span class="normal">48</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">48</span>
 <span class="normal">49</span>
 <span class="normal">50</span>
 <span class="normal">51</span>
@@ -835,17 +902,40 @@ <h2 id="video_sampler.gating.BlurGate" class="doc doc-heading">
 <span class="normal">77</span>
 <span class="normal">78</span>
 <span class="normal">79</span>
-<span class="normal">80</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">BlurGate</span><span class="p">:</span>
+<span class="normal">80</span>
+<span class="normal">81</span>
+<span class="normal">82</span>
+<span class="normal">83</span>
+<span class="normal">84</span>
+<span class="normal">85</span>
+<span class="normal">86</span>
+<span class="normal">87</span>
+<span class="normal">88</span>
+<span class="normal">89</span>
+<span class="normal">90</span>
+<span class="normal">91</span>
+<span class="normal">92</span>
+<span class="normal">93</span>
+<span class="normal">94</span>
+<span class="normal">95</span>
+<span class="normal">96</span>
+<span class="normal">97</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">BlurGate</span><span class="p">:</span>
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">&quot;fft&quot;</span><span class="p">,</span> <span class="s2">&quot;laplacian&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;laplacian&quot;</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mi">100</span>
     <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Gate frames based on bluriness.</span>
-<span class="sd">        :param method: The method to use for blur detection. Can be &quot;fft&quot; or &quot;laplacian&quot;.</span>
-<span class="sd">        :param threshold: The threshold for bluriness. The higher the threshold, the less</span>
-<span class="sd">            blurry the image needs to be to be discarded.</span>
-<span class="sd">            Those are different depending on the method:</span>
-<span class="sd">            - 20 is a good start for fft</span>
-<span class="sd">            - 100 is a good start for laplacian.</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Initializes the Gating object.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            method (str): The method to use for blur detection. Can be &quot;fft&quot; or &quot;laplacian&quot;.</span>
+<span class="sd">            threshold (float): The threshold for bluriness. The higher the threshold, the less</span>
+<span class="sd">                blurry the image needs to be to be discarded.</span>
+<span class="sd">                The default threshold values are:</span>
+<span class="sd">                - 20 for the &quot;fft&quot; method</span>
+<span class="sd">                - 100 for the &quot;laplacian&quot; method.</span>
+
+<span class="sd">        Raises:</span>
+<span class="sd">            ValueError: If an unknown blur method is provided.</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">is_blurry</span> <span class="o">=</span> <span class="kc">None</span>
         <span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;fft&quot;</span><span class="p">:</span>
@@ -907,44 +997,122 @@ <h3 id="video_sampler.gating.BlurGate.__init__" class="doc doc-heading">
 
   <div class="doc doc-contents ">
   
-      <p>Gate frames based on bluriness.
-:param method: The method to use for blur detection. Can be "fft" or "laplacian".
-:param threshold: The threshold for bluriness. The higher the threshold, the less
-    blurry the image needs to be to be discarded.
-    Those are different depending on the method:
-    - 20 is a good start for fft
-    - 100 is a good start for laplacian.</p>
+      <p>Initializes the Gating object.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>method</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The method to use for blur detection. Can be "fft" or "laplacian".</p>
+            </div>
+          </td>
+          <td>
+                <code>&#39;laplacian&#39;</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>threshold</code></td>
+          <td>
+                <code>float</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The threshold for bluriness. The higher the threshold, the less
+blurry the image needs to be to be discarded.
+The default threshold values are:
+- 20 for the "fft" method
+- 100 for the "laplacian" method.</p>
+            </div>
+          </td>
+          <td>
+                <code>100</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Raises:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td>
+                <code>ValueError</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>If an unknown blur method is provided.</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/gating.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">38</span>
-<span class="normal">39</span>
-<span class="normal">40</span>
-<span class="normal">41</span>
-<span class="normal">42</span>
-<span class="normal">43</span>
-<span class="normal">44</span>
-<span class="normal">45</span>
-<span class="normal">46</span>
-<span class="normal">47</span>
-<span class="normal">48</span>
-<span class="normal">49</span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">49</span>
 <span class="normal">50</span>
 <span class="normal">51</span>
 <span class="normal">52</span>
 <span class="normal">53</span>
 <span class="normal">54</span>
 <span class="normal">55</span>
-<span class="normal">56</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+<span class="normal">56</span>
+<span class="normal">57</span>
+<span class="normal">58</span>
+<span class="normal">59</span>
+<span class="normal">60</span>
+<span class="normal">61</span>
+<span class="normal">62</span>
+<span class="normal">63</span>
+<span class="normal">64</span>
+<span class="normal">65</span>
+<span class="normal">66</span>
+<span class="normal">67</span>
+<span class="normal">68</span>
+<span class="normal">69</span>
+<span class="normal">70</span>
+<span class="normal">71</span>
+<span class="normal">72</span>
+<span class="normal">73</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="n">Literal</span><span class="p">[</span><span class="s2">&quot;fft&quot;</span><span class="p">,</span> <span class="s2">&quot;laplacian&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;laplacian&quot;</span><span class="p">,</span> <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mi">100</span>
 <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Gate frames based on bluriness.</span>
-<span class="sd">    :param method: The method to use for blur detection. Can be &quot;fft&quot; or &quot;laplacian&quot;.</span>
-<span class="sd">    :param threshold: The threshold for bluriness. The higher the threshold, the less</span>
-<span class="sd">        blurry the image needs to be to be discarded.</span>
-<span class="sd">        Those are different depending on the method:</span>
-<span class="sd">        - 20 is a good start for fft</span>
-<span class="sd">        - 100 is a good start for laplacian.</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Initializes the Gating object.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        method (str): The method to use for blur detection. Can be &quot;fft&quot; or &quot;laplacian&quot;.</span>
+<span class="sd">        threshold (float): The threshold for bluriness. The higher the threshold, the less</span>
+<span class="sd">            blurry the image needs to be to be discarded.</span>
+<span class="sd">            The default threshold values are:</span>
+<span class="sd">            - 20 for the &quot;fft&quot; method</span>
+<span class="sd">            - 100 for the &quot;laplacian&quot; method.</span>
+
+<span class="sd">    Raises:</span>
+<span class="sd">        ValueError: If an unknown blur method is provided.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">is_blurry</span> <span class="o">=</span> <span class="kc">None</span>
     <span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;fft&quot;</span><span class="p">:</span>
@@ -985,24 +1153,7 @@ <h2 id="video_sampler.gating.ClipGate" class="doc doc-heading">
 
             <details class="quote">
               <summary>Source code in <code>video_sampler/gating.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 83</span>
-<span class="normal"> 84</span>
-<span class="normal"> 85</span>
-<span class="normal"> 86</span>
-<span class="normal"> 87</span>
-<span class="normal"> 88</span>
-<span class="normal"> 89</span>
-<span class="normal"> 90</span>
-<span class="normal"> 91</span>
-<span class="normal"> 92</span>
-<span class="normal"> 93</span>
-<span class="normal"> 94</span>
-<span class="normal"> 95</span>
-<span class="normal"> 96</span>
-<span class="normal"> 97</span>
-<span class="normal"> 98</span>
-<span class="normal"> 99</span>
-<span class="normal">100</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">100</span>
 <span class="normal">101</span>
 <span class="normal">102</span>
 <span class="normal">103</span>
@@ -1068,7 +1219,35 @@ <h2 id="video_sampler.gating.ClipGate" class="doc doc-heading">
 <span class="normal">163</span>
 <span class="normal">164</span>
 <span class="normal">165</span>
-<span class="normal">166</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">ClipGate</span><span class="p">:</span>
+<span class="normal">166</span>
+<span class="normal">167</span>
+<span class="normal">168</span>
+<span class="normal">169</span>
+<span class="normal">170</span>
+<span class="normal">171</span>
+<span class="normal">172</span>
+<span class="normal">173</span>
+<span class="normal">174</span>
+<span class="normal">175</span>
+<span class="normal">176</span>
+<span class="normal">177</span>
+<span class="normal">178</span>
+<span class="normal">179</span>
+<span class="normal">180</span>
+<span class="normal">181</span>
+<span class="normal">182</span>
+<span class="normal">183</span>
+<span class="normal">184</span>
+<span class="normal">185</span>
+<span class="normal">186</span>
+<span class="normal">187</span>
+<span class="normal">188</span>
+<span class="normal">189</span>
+<span class="normal">190</span>
+<span class="normal">191</span>
+<span class="normal">192</span>
+<span class="normal">193</span>
+<span class="normal">194</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">ClipGate</span><span class="p">:</span>
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
         <span class="n">pos_samples</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
@@ -1078,6 +1257,17 @@ <h2 id="video_sampler.gating.ClipGate" class="doc doc-heading">
         <span class="n">pos_margin</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.2</span><span class="p">,</span>
         <span class="n">neg_margin</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.3</span><span class="p">,</span>
     <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Initializes the Clip Gating object.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            pos_samples (list[str], optional): List of positive samples. Defaults to None.</span>
+<span class="sd">            neg_samples (list[str], optional): List of negative samples. Defaults to None.</span>
+<span class="sd">            model_name (str, optional): Name of the model. Defaults to &quot;ViT-B-32&quot;.</span>
+<span class="sd">            batch_size (int, optional): Batch size. Defaults to 32.</span>
+<span class="sd">            pos_margin (float, optional): Positive margin. Defaults to 0.2.</span>
+<span class="sd">            neg_margin (float, optional): Negative margin. Defaults to 0.3.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">preprocess</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span> <span class="o">=</span> <span class="n">create_model</span><span class="p">(</span>
             <span class="n">model_name</span><span class="o">=</span><span class="n">model_name</span>
         <span class="p">)</span>
@@ -1168,6 +1358,403 @@ <h2 id="video_sampler.gating.ClipGate" class="doc doc-heading">
 
 
 
+<div class="doc doc-object doc-function">
+
+
+
+<h3 id="video_sampler.gating.ClipGate.__init__" class="doc doc-heading">
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">pos_samples</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">neg_samples</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">model_name</span><span class="o">=</span><span class="s1">&#39;ViT-B-32&#39;</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> <span class="n">pos_margin</span><span class="o">=</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">neg_margin</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span></code>
+
+</h3>
+
+
+  <div class="doc doc-contents ">
+  
+      <p>Initializes the Clip Gating object.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>pos_samples</code></td>
+          <td>
+                <code>list[str]</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of positive samples. Defaults to None.</p>
+            </div>
+          </td>
+          <td>
+                <code>None</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>neg_samples</code></td>
+          <td>
+                <code>list[str]</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of negative samples. Defaults to None.</p>
+            </div>
+          </td>
+          <td>
+                <code>None</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>model_name</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the model. Defaults to "ViT-B-32".</p>
+            </div>
+          </td>
+          <td>
+                <code>&#39;ViT-B-32&#39;</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>batch_size</code></td>
+          <td>
+                <code>int</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Batch size. Defaults to 32.</p>
+            </div>
+          </td>
+          <td>
+                <code>32</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>pos_margin</code></td>
+          <td>
+                <code>float</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Positive margin. Defaults to 0.2.</p>
+            </div>
+          </td>
+          <td>
+                <code>0.2</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>neg_margin</code></td>
+          <td>
+                <code>float</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Negative margin. Defaults to 0.3.</p>
+            </div>
+          </td>
+          <td>
+                <code>0.3</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+          <details class="quote">
+            <summary>Source code in <code>video_sampler/gating.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">101</span>
+<span class="normal">102</span>
+<span class="normal">103</span>
+<span class="normal">104</span>
+<span class="normal">105</span>
+<span class="normal">106</span>
+<span class="normal">107</span>
+<span class="normal">108</span>
+<span class="normal">109</span>
+<span class="normal">110</span>
+<span class="normal">111</span>
+<span class="normal">112</span>
+<span class="normal">113</span>
+<span class="normal">114</span>
+<span class="normal">115</span>
+<span class="normal">116</span>
+<span class="normal">117</span>
+<span class="normal">118</span>
+<span class="normal">119</span>
+<span class="normal">120</span>
+<span class="normal">121</span>
+<span class="normal">122</span>
+<span class="normal">123</span>
+<span class="normal">124</span>
+<span class="normal">125</span>
+<span class="normal">126</span>
+<span class="normal">127</span>
+<span class="normal">128</span>
+<span class="normal">129</span>
+<span class="normal">130</span>
+<span class="normal">131</span>
+<span class="normal">132</span>
+<span class="normal">133</span>
+<span class="normal">134</span>
+<span class="normal">135</span>
+<span class="normal">136</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+    <span class="bp">self</span><span class="p">,</span>
+    <span class="n">pos_samples</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+    <span class="n">neg_samples</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
+    <span class="n">model_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;ViT-B-32&quot;</span><span class="p">,</span>
+    <span class="n">batch_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span>
+    <span class="n">pos_margin</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.2</span><span class="p">,</span>
+    <span class="n">neg_margin</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.3</span><span class="p">,</span>
+<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Initializes the Clip Gating object.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        pos_samples (list[str], optional): List of positive samples. Defaults to None.</span>
+<span class="sd">        neg_samples (list[str], optional): List of negative samples. Defaults to None.</span>
+<span class="sd">        model_name (str, optional): Name of the model. Defaults to &quot;ViT-B-32&quot;.</span>
+<span class="sd">        batch_size (int, optional): Batch size. Defaults to 32.</span>
+<span class="sd">        pos_margin (float, optional): Positive margin. Defaults to 0.2.</span>
+<span class="sd">        neg_margin (float, optional): Negative margin. Defaults to 0.3.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">preprocess</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tokenizer</span> <span class="o">=</span> <span class="n">create_model</span><span class="p">(</span>
+        <span class="n">model_name</span><span class="o">=</span><span class="n">model_name</span>
+    <span class="p">)</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">pos_margin</span> <span class="o">=</span> <span class="n">pos_margin</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">neg_margin</span> <span class="o">=</span> <span class="n">neg_margin</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">batch_size</span> <span class="o">=</span> <span class="n">batch_size</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">frame_accumulator</span> <span class="o">=</span> <span class="p">[]</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">metadata_accumulator</span> <span class="o">=</span> <span class="p">[]</span>
+    <span class="k">if</span> <span class="n">pos_samples</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">pos_samples</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="mi">512</span><span class="p">))</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">pos_samples</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_preproc_samples</span><span class="p">(</span><span class="n">pos_samples</span><span class="p">)</span>
+    <span class="k">if</span> <span class="n">neg_samples</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">neg_samples</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="mi">512</span><span class="p">))</span>
+    <span class="k">else</span><span class="p">:</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">neg_samples</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_preproc_samples</span><span class="p">(</span><span class="n">neg_samples</span><span class="p">)</span>
+</code></pre></div></td></tr></table></div>
+          </details>
+  </div>
+
+</div>
+
+
+
+  </div>
+
+  </div>
+
+
+</div>
+
+<div class="doc doc-object doc-class">
+
+
+
+<h2 id="video_sampler.gating.PassGate" class="doc doc-heading">
+          <code>PassGate</code>
+
+
+</h2>
+
+
+  <div class="doc doc-contents ">
+
+
+            <details class="quote">
+              <summary>Source code in <code>video_sampler/gating.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">29</span>
+<span class="normal">30</span>
+<span class="normal">31</span>
+<span class="normal">32</span>
+<span class="normal">33</span>
+<span class="normal">34</span>
+<span class="normal">35</span>
+<span class="normal">36</span>
+<span class="normal">37</span>
+<span class="normal">38</span>
+<span class="normal">39</span>
+<span class="normal">40</span>
+<span class="normal">41</span>
+<span class="normal">42</span>
+<span class="normal">43</span>
+<span class="normal">44</span>
+<span class="normal">45</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">PassGate</span><span class="p">:</span>
+    <span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">frame</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">,</span> <span class="n">meta</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">last</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GatedObject</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Passes the frame through the gating mechanism.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            frame (Image.Image): The frame to pass through.</span>
+<span class="sd">            meta (dict): The metadata for the frame.</span>
+<span class="sd">            last (bool): If this is the last frame in the video.</span>
+
+<span class="sd">        Returns:</span>
+<span class="sd">            GatedObject: The gated object containing the processed frame.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> <span class="k">if</span> <span class="n">last</span> <span class="k">else</span> <span class="n">GatedObject</span><span class="p">([</span><span class="n">FrameObject</span><span class="p">(</span><span class="n">frame</span><span class="p">,</span> <span class="n">meta</span><span class="p">)],</span> <span class="mi">1</span><span class="p">)</span>
+
+    <span class="k">def</span> <span class="nf">flush</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+        <span class="k">return</span> <span class="n">EMPTY_GATED_OBJECT</span>
+</code></pre></div></td></tr></table></div>
+            </details>
+
+  
+
+  <div class="doc doc-children">
+
+
+
+
+
+
+
+
+
+
+<div class="doc doc-object doc-function">
+
+
+
+<h3 id="video_sampler.gating.PassGate.__call__" class="doc doc-heading">
+          <code class="highlight language-python"><span class="fm">__call__</span><span class="p">(</span><span class="n">frame</span><span class="p">,</span> <span class="n">meta</span><span class="p">,</span> <span class="n">last</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code>
+
+</h3>
+
+
+  <div class="doc doc-contents ">
+  
+      <p>Passes the frame through the gating mechanism.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>frame</code></td>
+          <td>
+                <code><span title="PIL.Image.Image">Image</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The frame to pass through.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>meta</code></td>
+          <td>
+                <code>dict</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The metadata for the frame.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>last</code></td>
+          <td>
+                <code>bool</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>If this is the last frame in the video.</p>
+            </div>
+          </td>
+          <td>
+                <code>False</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Returns:</strong></p>
+  <table>
+    <thead>
+      <tr>
+<th>Name</th>        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+<td><code>GatedObject</code></td>          <td>
+                <code><span title="video_sampler.schemas.GatedObject">GatedObject</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gated object containing the processed frame.</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+          <details class="quote">
+            <summary>Source code in <code>video_sampler/gating.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
+<span class="normal">31</span>
+<span class="normal">32</span>
+<span class="normal">33</span>
+<span class="normal">34</span>
+<span class="normal">35</span>
+<span class="normal">36</span>
+<span class="normal">37</span>
+<span class="normal">38</span>
+<span class="normal">39</span>
+<span class="normal">40</span>
+<span class="normal">41</span>
+<span class="normal">42</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">frame</span><span class="p">:</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">,</span> <span class="n">meta</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">last</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GatedObject</span><span class="p">:</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Passes the frame through the gating mechanism.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        frame (Image.Image): The frame to pass through.</span>
+<span class="sd">        meta (dict): The metadata for the frame.</span>
+<span class="sd">        last (bool): If this is the last frame in the video.</span>
+
+<span class="sd">    Returns:</span>
+<span class="sd">        GatedObject: The gated object containing the processed frame.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> <span class="k">if</span> <span class="n">last</span> <span class="k">else</span> <span class="n">GatedObject</span><span class="p">([</span><span class="n">FrameObject</span><span class="p">(</span><span class="n">frame</span><span class="p">,</span> <span class="n">meta</span><span class="p">)],</span> <span class="mi">1</span><span class="p">)</span>
+</code></pre></div></td></tr></table></div>
+          </details>
+  </div>
+
+</div>
+
+
 
   </div>
 
diff --git a/reference/video_sampler/integrations/yt_dlp_plugin/index.html b/reference/video_sampler/integrations/yt_dlp_plugin/index.html
index 0993335..f4f90df 100644
--- a/reference/video_sampler/integrations/yt_dlp_plugin/index.html
+++ b/reference/video_sampler/integrations/yt_dlp_plugin/index.html
@@ -862,10 +862,29 @@ <h2 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin" class="doc doc-hea
   <div class="doc doc-contents ">
 
   
-      <p>A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.
-Methods:
-    generate_urls(url, extra_yt_constr_args=None, extra_info_extract_opts=None) -&gt; Iterable[str]:
-        Generates URLs and corresponding titles from the given URL.</p>
+      <p>A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.</p>
+
+
+
+  <p><strong>Methods:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+          <tr>
+            <td><code><a class="autorefs autorefs-internal" title="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls" href="#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls">generate_urls</a></code></td>
+            <td>
+              <div class="doc-md-description">
+                <p>Generates URLs and corresponding titles from the given URL.</p>
+              </div>
+            </td>
+          </tr>
+    </tbody>
+  </table>
 
             <details class="quote">
               <summary>Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code></summary>
@@ -958,9 +977,16 @@ <h2 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin" class="doc doc-hea
 <span class="normal">157</span>
 <span class="normal">158</span>
 <span class="normal">159</span>
-<span class="normal">160</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">YTDLPPlugin</span><span class="p">:</span>
+<span class="normal">160</span>
+<span class="normal">161</span>
+<span class="normal">162</span>
+<span class="normal">163</span>
+<span class="normal">164</span>
+<span class="normal">165</span>
+<span class="normal">166</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">YTDLPPlugin</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.</span>
+
 <span class="sd">    Methods:</span>
 <span class="sd">        generate_urls(url, extra_yt_constr_args=None, extra_info_extract_opts=None) -&gt; Iterable[str]:</span>
 <span class="sd">            Generates URLs and corresponding titles from the given URL.</span>
@@ -970,7 +996,6 @@ <h2 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin" class="doc doc-hea
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ie_key</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;Generic&quot;</span><span class="p">):</span>
 <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">        Initialize the YTDLPPlugin instance.</span>
-<span class="sd">        :param ie_key (str): The key for the information extractor.</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">ie_key</span> <span class="o">=</span> <span class="n">ie_key</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">ydl_opts</span> <span class="o">=</span> <span class="p">{</span>
@@ -982,13 +1007,14 @@ <h2 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin" class="doc doc-hea
         <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
         <span class="n">extra_info_extract_opts</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
     <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Generate URLs and corresponding titles from the given URL.</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Generate URLs and download subtitles for a given video URL.</span>
 
-<span class="sd">        :param url (str): The URL to extract information from.</span>
-<span class="sd">        :param extra_info_extract_opts (dict, optional): Extra options for information extraction.</span>
+<span class="sd">        Args:</span>
+<span class="sd">            url (str): The URL of the video to download subtitles for.</span>
+<span class="sd">            extra_info_extract_opts (dict, optional): Additional options for extracting video information.</span>
 
-<span class="sd">        :return Iterable[str]:</span>
-<span class="sd">            Tuple[str, str]: A tuple containing the title and URL of each extracted entry.</span>
+<span class="sd">        Yields:</span>
+<span class="sd">            tuple: A tuple containing the video title, video format URL, and downloaded subtitles.</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="k">if</span> <span class="n">extra_info_extract_opts</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
             <span class="n">extra_info_extract_opts</span> <span class="o">=</span> <span class="p">{}</span>
@@ -1023,9 +1049,14 @@ <h2 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin" class="doc doc-hea
         <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
         <span class="n">extra_info_extract_opts</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
     <span class="p">):</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Download subtitles for a given video URL.</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Generate URLs and download subtitles for a given video URL.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            url (str): The URL of the video to download subtitles for.</span>
+<span class="sd">            extra_info_extract_opts (dict, optional): Additional options for extracting video information.</span>
 
-<span class="sd">        :param video_url (str): The URL of the video to download subtitles for.</span>
+<span class="sd">        Yields:</span>
+<span class="sd">            tuple: A tuple containing the video title, video format URL, and downloaded subtitles.</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="k">if</span> <span class="n">extra_info_extract_opts</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
             <span class="n">extra_info_extract_opts</span> <span class="o">=</span> <span class="p">{}</span>
@@ -1076,13 +1107,11 @@ <h3 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.__init__" class="do
 
   <div class="doc doc-contents ">
   
-      <p>Initialize the YTDLPPlugin instance.
-:param ie_key (str): The key for the information extractor.</p>
+      <p>Initialize the YTDLPPlugin instance.</p>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">80</span>
-<span class="normal">81</span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">81</span>
 <span class="normal">82</span>
 <span class="normal">83</span>
 <span class="normal">84</span>
@@ -1092,7 +1121,6 @@ <h3 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.__init__" class="do
 <span class="normal">88</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ie_key</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;Generic&quot;</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Initialize the YTDLPPlugin instance.</span>
-<span class="sd">    :param ie_key (str): The key for the information extractor.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">ie_key</span> <span class="o">=</span> <span class="n">ie_key</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">ydl_opts</span> <span class="o">=</span> <span class="p">{</span>
@@ -1117,11 +1145,75 @@ <h3 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls" clas
 
   <div class="doc doc-contents ">
   
-      <p>Generate URLs and corresponding titles from the given URL.</p>
-<p>:param url (str): The URL to extract information from.
-:param extra_info_extract_opts (dict, optional): Extra options for information extraction.</p>
-<p>:return Iterable[str]:
-    Tuple[str, str]: A tuple containing the title and URL of each extracted entry.</p>
+      <p>Generate URLs and download subtitles for a given video URL.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>url</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The URL of the video to download subtitles for.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>extra_info_extract_opts</code></td>
+          <td>
+                <code>dict</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Additional options for extracting video information.</p>
+            </div>
+          </td>
+          <td>
+                <code>None</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Yields:</strong></p>
+  <table>
+    <thead>
+      <tr>
+<th>Name</th>        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+<td><code>tuple</code></td>          <td>
+                <code><span title="collections.abc.Iterable">Iterable</span>[str]</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>A tuple containing the video title, video format URL, and downloaded subtitles.</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code></summary>
@@ -1149,18 +1241,20 @@ <h3 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls" clas
 <span class="normal">111</span>
 <span class="normal">112</span>
 <span class="normal">113</span>
-<span class="normal">114</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">generate_urls</span><span class="p">(</span>
+<span class="normal">114</span>
+<span class="normal">115</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">generate_urls</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
     <span class="n">extra_info_extract_opts</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
 <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Generate URLs and corresponding titles from the given URL.</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Generate URLs and download subtitles for a given video URL.</span>
 
-<span class="sd">    :param url (str): The URL to extract information from.</span>
-<span class="sd">    :param extra_info_extract_opts (dict, optional): Extra options for information extraction.</span>
+<span class="sd">    Args:</span>
+<span class="sd">        url (str): The URL of the video to download subtitles for.</span>
+<span class="sd">        extra_info_extract_opts (dict, optional): Additional options for extracting video information.</span>
 
-<span class="sd">    :return Iterable[str]:</span>
-<span class="sd">        Tuple[str, str]: A tuple containing the title and URL of each extracted entry.</span>
+<span class="sd">    Yields:</span>
+<span class="sd">        tuple: A tuple containing the video title, video format URL, and downloaded subtitles.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">if</span> <span class="n">extra_info_extract_opts</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
         <span class="n">extra_info_extract_opts</span> <span class="o">=</span> <span class="p">{}</span>
@@ -1193,13 +1287,78 @@ <h3 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls_by_su
 
   <div class="doc doc-contents ">
   
-      <p>Download subtitles for a given video URL.</p>
-<p>:param video_url (str): The URL of the video to download subtitles for.</p>
+      <p>Generate URLs and download subtitles for a given video URL.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>url</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The URL of the video to download subtitles for.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>extra_info_extract_opts</code></td>
+          <td>
+                <code>dict</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Additional options for extracting video information.</p>
+            </div>
+          </td>
+          <td>
+                <code>None</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Yields:</strong></p>
+  <table>
+    <thead>
+      <tr>
+<th>Name</th>        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+<td><code>tuple</code></td>          <td>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>A tuple containing the video title, video format URL, and downloaded subtitles.</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">131</span>
-<span class="normal">132</span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">132</span>
 <span class="normal">133</span>
 <span class="normal">134</span>
 <span class="normal">135</span>
@@ -1227,14 +1386,25 @@ <h3 id="video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls_by_su
 <span class="normal">157</span>
 <span class="normal">158</span>
 <span class="normal">159</span>
-<span class="normal">160</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">generate_urls_by_subs</span><span class="p">(</span>
+<span class="normal">160</span>
+<span class="normal">161</span>
+<span class="normal">162</span>
+<span class="normal">163</span>
+<span class="normal">164</span>
+<span class="normal">165</span>
+<span class="normal">166</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">generate_urls_by_subs</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
     <span class="n">extra_info_extract_opts</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
 <span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Download subtitles for a given video URL.</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Generate URLs and download subtitles for a given video URL.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        url (str): The URL of the video to download subtitles for.</span>
+<span class="sd">        extra_info_extract_opts (dict, optional): Additional options for extracting video information.</span>
 
-<span class="sd">    :param video_url (str): The URL of the video to download subtitles for.</span>
+<span class="sd">    Yields:</span>
+<span class="sd">        tuple: A tuple containing the video title, video format URL, and downloaded subtitles.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">if</span> <span class="n">extra_info_extract_opts</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
         <span class="n">extra_info_extract_opts</span> <span class="o">=</span> <span class="p">{}</span>
diff --git a/reference/video_sampler/sampler/index.html b/reference/video_sampler/sampler/index.html
index b45b159..665d99f 100644
--- a/reference/video_sampler/sampler/index.html
+++ b/reference/video_sampler/sampler/index.html
@@ -487,6 +487,30 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#video_sampler.sampler.SegmentSampler" class="md-nav__link">
+    <span class="md-ellipsis">
+      SegmentSampler
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="SegmentSampler">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.sampler.SegmentSampler.sample" class="md-nav__link">
+    <span class="md-ellipsis">
+      sample
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
         <li class="md-nav__item">
@@ -499,6 +523,15 @@
     <nav class="md-nav" aria-label="VideoSampler">
       <ul class="md-nav__list">
         
+          <li class="md-nav__item">
+  <a href="#video_sampler.sampler.VideoSampler.flush_buffer" class="md-nav__link">
+    <span class="md-ellipsis">
+      flush_buffer
+    </span>
+  </a>
+  
+</li>
+        
           <li class="md-nav__item">
   <a href="#video_sampler.sampler.VideoSampler.sample" class="md-nav__link">
     <span class="md-ellipsis">
@@ -530,6 +563,15 @@
     </span>
   </a>
   
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.sampler.Worker.queue_reader" class="md-nav__link">
+    <span class="md-ellipsis">
+      queue_reader
+    </span>
+  </a>
+  
 </li>
         
       </ul>
@@ -718,6 +760,30 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#video_sampler.sampler.SegmentSampler" class="md-nav__link">
+    <span class="md-ellipsis">
+      SegmentSampler
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="SegmentSampler">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.sampler.SegmentSampler.sample" class="md-nav__link">
+    <span class="md-ellipsis">
+      sample
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
         <li class="md-nav__item">
@@ -730,6 +796,15 @@
     <nav class="md-nav" aria-label="VideoSampler">
       <ul class="md-nav__list">
         
+          <li class="md-nav__item">
+  <a href="#video_sampler.sampler.VideoSampler.flush_buffer" class="md-nav__link">
+    <span class="md-ellipsis">
+      flush_buffer
+    </span>
+  </a>
+  
+</li>
+        
           <li class="md-nav__item">
   <a href="#video_sampler.sampler.VideoSampler.sample" class="md-nav__link">
     <span class="md-ellipsis">
@@ -761,6 +836,15 @@
     </span>
   </a>
   
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#video_sampler.sampler.Worker.queue_reader" class="md-nav__link">
+    <span class="md-ellipsis">
+      queue_reader
+    </span>
+  </a>
+  
 </li>
         
       </ul>
@@ -810,17 +894,224 @@ <h1>Sampler</h1>
 
 
 
-<h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
-          <code>VideoSampler</code>
+<h2 id="video_sampler.sampler.SegmentSampler" class="doc doc-heading">
+          <code>SegmentSampler</code>
 
 
 </h2>
 
 
   <div class="doc doc-contents ">
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="video_sampler.sampler.VideoSampler" href="#video_sampler.sampler.VideoSampler">VideoSampler</a></code></p>
+
+
+            <details class="quote">
+              <summary>Source code in <code>video_sampler/sampler.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">122</span>
+<span class="normal">123</span>
+<span class="normal">124</span>
+<span class="normal">125</span>
+<span class="normal">126</span>
+<span class="normal">127</span>
+<span class="normal">128</span>
+<span class="normal">129</span>
+<span class="normal">130</span>
+<span class="normal">131</span>
+<span class="normal">132</span>
+<span class="normal">133</span>
+<span class="normal">134</span>
+<span class="normal">135</span>
+<span class="normal">136</span>
+<span class="normal">137</span>
+<span class="normal">138</span>
+<span class="normal">139</span>
+<span class="normal">140</span>
+<span class="normal">141</span>
+<span class="normal">142</span>
+<span class="normal">143</span>
+<span class="normal">144</span>
+<span class="normal">145</span>
+<span class="normal">146</span>
+<span class="normal">147</span>
+<span class="normal">148</span>
+<span class="normal">149</span>
+<span class="normal">150</span>
+<span class="normal">151</span>
+<span class="normal">152</span>
+<span class="normal">153</span>
+<span class="normal">154</span>
+<span class="normal">155</span>
+<span class="normal">156</span>
+<span class="normal">157</span>
+<span class="normal">158</span>
+<span class="normal">159</span>
+<span class="normal">160</span>
+<span class="normal">161</span>
+<span class="normal">162</span>
+<span class="normal">163</span>
+<span class="normal">164</span>
+<span class="normal">165</span>
+<span class="normal">166</span>
+<span class="normal">167</span>
+<span class="normal">168</span>
+<span class="normal">169</span>
+<span class="normal">170</span>
+<span class="normal">171</span>
+<span class="normal">172</span>
+<span class="normal">173</span>
+<span class="normal">174</span>
+<span class="normal">175</span>
+<span class="normal">176</span>
+<span class="normal">177</span>
+<span class="normal">178</span>
+<span class="normal">179</span>
+<span class="normal">180</span>
+<span class="normal">181</span>
+<span class="normal">182</span>
+<span class="normal">183</span>
+<span class="normal">184</span>
+<span class="normal">185</span>
+<span class="normal">186</span>
+<span class="normal">187</span>
+<span class="normal">188</span>
+<span class="normal">189</span>
+<span class="normal">190</span>
+<span class="normal">191</span>
+<span class="normal">192</span>
+<span class="normal">193</span>
+<span class="normal">194</span>
+<span class="normal">195</span>
+<span class="normal">196</span>
+<span class="normal">197</span>
+<span class="normal">198</span>
+<span class="normal">199</span>
+<span class="normal">200</span>
+<span class="normal">201</span>
+<span class="normal">202</span>
+<span class="normal">203</span>
+<span class="normal">204</span>
+<span class="normal">205</span>
+<span class="normal">206</span>
+<span class="normal">207</span>
+<span class="normal">208</span>
+<span class="normal">209</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">SegmentSampler</span><span class="p">(</span><span class="n">VideoSampler</span><span class="p">):</span>
+    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">SamplerConfig</span><span class="p">,</span> <span class="n">segment_generator</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">subtitle_line</span><span class="p">]</span>
+    <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">cfg</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">segment_generator</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">subtitle_line</span><span class="p">]</span> <span class="o">=</span> <span class="n">segment_generator</span>
+
+    <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">FrameObject</span><span class="p">]]:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Generate sample frames from a video.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            video_path (str): The path to the video file.</span>
+
+<span class="sd">        Yields:</span>
+<span class="sd">            Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
+        <span class="n">next_segment</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">segment_generator</span><span class="p">)</span>
+        <span class="n">segment_boundary_end_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">end_time</span> <span class="o">/</span> <span class="mi">1000</span>
+        <span class="n">segment_boundary_start_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">start_time</span> <span class="o">/</span> <span class="mi">1000</span>
+        <span class="n">absolute_stop</span> <span class="o">=</span> <span class="kc">False</span>
+        <span class="k">with</span> <span class="n">av</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">container</span><span class="p">:</span>
+            <span class="n">stream</span> <span class="o">=</span> <span class="n">container</span><span class="o">.</span><span class="n">streams</span><span class="o">.</span><span class="n">video</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">keyframes_only</span><span class="p">:</span>
+                <span class="n">stream</span><span class="o">.</span><span class="n">codec_context</span><span class="o">.</span><span class="n">skip_frame</span> <span class="o">=</span> <span class="s2">&quot;NONKEY&quot;</span>
+            <span class="n">prev_time</span> <span class="o">=</span> <span class="o">-</span><span class="mi">10</span>
+            <span class="k">for</span> <span class="n">frame_indx</span><span class="p">,</span> <span class="n">frame</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">container</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">stream</span><span class="p">)):</span>
+                <span class="n">ftime</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span>
+                <span class="n">reiters</span> <span class="o">=</span> <span class="mi">0</span>
+                <span class="c1"># find the next segment that starts after the current frame</span>
+                <span class="k">while</span> <span class="n">ftime</span> <span class="o">&gt;</span> <span class="n">segment_boundary_end_sec</span><span class="p">:</span>
+                    <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                        <span class="sa">f</span><span class="s2">&quot;Seeking to next segment: </span><span class="si">{</span><span class="n">segment_boundary_end_sec</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ftime</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                        <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">yellow</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="p">)</span>
+                    <span class="k">try</span><span class="p">:</span>
+                        <span class="n">next_segment</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">segment_generator</span><span class="p">)</span>
+                        <span class="n">reiters</span> <span class="o">+=</span> <span class="mi">1</span>
+                        <span class="n">segment_boundary_end_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">end_time</span> <span class="o">/</span> <span class="mi">1000</span>
+                        <span class="n">segment_boundary_start_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">start_time</span> <span class="o">/</span> <span class="mi">1000</span>
+                    <span class="k">except</span> <span class="ne">StopIteration</span><span class="p">:</span>
+                        <span class="n">absolute_stop</span> <span class="o">=</span> <span class="kc">True</span>
+                        <span class="k">break</span>
+                <span class="k">if</span> <span class="n">reiters</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+                    <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                        <span class="sa">f</span><span class="s2">&quot;Skipped </span><span class="si">{</span><span class="n">reiters</span><span class="si">}</span><span class="s2"> segments!&quot;</span><span class="p">,</span>
+                        <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">red</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="p">)</span>
+                <span class="k">if</span> <span class="n">absolute_stop</span><span class="p">:</span>
+                    <span class="k">break</span>
+                <span class="c1"># we haven&#39;t found the next segment yet</span>
+                <span class="c1"># the other condition, is where we are after the segment</span>
+                <span class="c1"># but this is handled by the while loop above</span>
+                <span class="k">if</span> <span class="n">ftime</span> <span class="o">&lt;=</span> <span class="n">segment_boundary_start_sec</span><span class="p">:</span>
+                    <span class="k">continue</span>
+
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;total&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+                <span class="n">time_diff</span> <span class="o">=</span> <span class="n">ftime</span> <span class="o">-</span> <span class="n">prev_time</span>
+                <span class="k">if</span> <span class="n">time_diff</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">min_frame_interval_sec</span><span class="p">:</span>
+                    <span class="k">continue</span>
+                <span class="n">prev_time</span> <span class="o">=</span> <span class="n">ftime</span>
+
+                <span class="n">frame_pil</span><span class="p">:</span> <span class="n">Image</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">to_image</span><span class="p">()</span>
+                <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">debug</span><span class="p">:</span>
+                    <span class="n">buf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">get_buffer_state</span><span class="p">()</span>
+                    <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                        <span class="sa">f</span><span class="s2">&quot;Frame </span><span class="si">{</span><span class="n">frame_indx</span><span class="si">}</span><span class="se">\t</span><span class="s2">time: </span><span class="si">{</span><span class="n">ftime</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                        <span class="sa">f</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2"> Buffer (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span><span class="si">}</span><span class="s2">): </span><span class="si">{</span><span class="n">buf</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                        <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">green</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="p">)</span>
+                <span class="n">frame_meta</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;frame_time&quot;</span><span class="p">:</span> <span class="n">ftime</span><span class="p">,</span> <span class="s2">&quot;frame_indx&quot;</span><span class="p">:</span> <span class="n">frame_indx</span><span class="p">}</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;decoded&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+                <span class="k">if</span> <span class="n">res</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">add</span><span class="p">(</span>
+                    <span class="n">frame_pil</span><span class="p">,</span>
+                    <span class="n">metadata</span><span class="o">=</span><span class="n">frame_meta</span><span class="p">,</span>
+                <span class="p">):</span>
+                    <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="p">(</span><span class="o">*</span><span class="n">res</span><span class="p">)</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+                    <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
+                    <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
+                        <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
+
+        <span class="c1"># flush buffer</span>
+        <span class="k">yield from</span> <span class="bp">self</span><span class="o">.</span><span class="n">flush_buffer</span><span class="p">()</span>
+
+    <span class="k">def</span> <span class="nf">write_queue</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">q</span><span class="p">:</span> <span class="n">Queue</span><span class="p">):</span>
+        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">write_queue</span><span class="p">(</span><span class="n">video_path</span><span class="p">,</span> <span class="n">q</span><span class="p">)</span>
+</code></pre></div></td></tr></table></div>
+            </details>
 
   
-      <p>The fundamental class for sampling video frames.</p>
+
+  <div class="doc doc-children">
+
+
+
+
+
+
+
+
+
+
+<div class="doc doc-object doc-function">
+
+
+
+<h3 id="video_sampler.sampler.SegmentSampler.sample" class="doc doc-heading">
+          <code class="highlight language-python"><span class="n">sample</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span></code>
+
+</h3>
+
+
+  <div class="doc doc-contents ">
+  
+      <p>Generate sample frames from a video.</p>
 
 
 
@@ -836,13 +1127,13 @@ <h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
     </thead>
     <tbody>
         <tr>
-          <td><code>cfg</code></td>
+          <td><code>video_path</code></td>
           <td>
-                <code><a class="autorefs autorefs-internal" title="video_sampler.buffer.SamplerConfig" href="../buffer/#video_sampler.buffer.SamplerConfig">SamplerConfig</a></code>
+                <code>str</code>
           </td>
           <td>
             <div class="doc-md-description">
-              <p>The configuration for the video sampler.</p>
+              <p>The path to the video file.</p>
             </div>
           </td>
           <td>
@@ -854,76 +1145,318 @@ <h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
 
 
 
-  <p><strong>Attributes:</strong></p>
+  <p><strong>Yields:</strong></p>
   <table>
     <thead>
       <tr>
-        <th>Name</th>
         <th>Type</th>
         <th>Description</th>
       </tr>
     </thead>
     <tbody>
         <tr>
-          <td><code><span title="video_sampler.sampler.VideoSampler.cfg">cfg</span></code></td>
-          <td>
-                <code><a class="autorefs autorefs-internal" title="video_sampler.buffer.SamplerConfig" href="../buffer/#video_sampler.buffer.SamplerConfig">SamplerConfig</a></code>
-          </td>
-          <td>
-            <div class="doc-md-description">
-              <p>The configuration for the video sampler.</p>
-            </div>
-          </td>
-        </tr>
-        <tr>
-          <td><code><span title="video_sampler.sampler.VideoSampler.frame_buffer">frame_buffer</span></code></td>
-          <td>
-                <code>FrameBuffer</code>
-          </td>
-          <td>
-            <div class="doc-md-description">
-              <p>The frame buffer used for sampling frames.</p>
-            </div>
-          </td>
-        </tr>
-        <tr>
-          <td><code><span title="video_sampler.sampler.VideoSampler.gate">gate</span></code></td>
-          <td>
-                <code>Gate</code>
-          </td>
-          <td>
-            <div class="doc-md-description">
-              <p>The gate used for filtering frames.</p>
-            </div>
-          </td>
-        </tr>
-        <tr>
-          <td><code><span title="video_sampler.sampler.VideoSampler.stats">stats</span></code></td>
           <td>
-                <code><span title="collections.Counter">Counter</span></code>
+                <code><span title="collections.abc.Iterable">Iterable</span>[list[<span title="video_sampler.schemas.FrameObject">FrameObject</span>]]</code>
           </td>
           <td>
             <div class="doc-md-description">
-              <p>A counter for tracking statistics.</p>
+              <p>Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</p>
             </div>
           </td>
         </tr>
     </tbody>
   </table>
 
+          <details class="quote">
+            <summary>Source code in <code>video_sampler/sampler.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">129</span>
+<span class="normal">130</span>
+<span class="normal">131</span>
+<span class="normal">132</span>
+<span class="normal">133</span>
+<span class="normal">134</span>
+<span class="normal">135</span>
+<span class="normal">136</span>
+<span class="normal">137</span>
+<span class="normal">138</span>
+<span class="normal">139</span>
+<span class="normal">140</span>
+<span class="normal">141</span>
+<span class="normal">142</span>
+<span class="normal">143</span>
+<span class="normal">144</span>
+<span class="normal">145</span>
+<span class="normal">146</span>
+<span class="normal">147</span>
+<span class="normal">148</span>
+<span class="normal">149</span>
+<span class="normal">150</span>
+<span class="normal">151</span>
+<span class="normal">152</span>
+<span class="normal">153</span>
+<span class="normal">154</span>
+<span class="normal">155</span>
+<span class="normal">156</span>
+<span class="normal">157</span>
+<span class="normal">158</span>
+<span class="normal">159</span>
+<span class="normal">160</span>
+<span class="normal">161</span>
+<span class="normal">162</span>
+<span class="normal">163</span>
+<span class="normal">164</span>
+<span class="normal">165</span>
+<span class="normal">166</span>
+<span class="normal">167</span>
+<span class="normal">168</span>
+<span class="normal">169</span>
+<span class="normal">170</span>
+<span class="normal">171</span>
+<span class="normal">172</span>
+<span class="normal">173</span>
+<span class="normal">174</span>
+<span class="normal">175</span>
+<span class="normal">176</span>
+<span class="normal">177</span>
+<span class="normal">178</span>
+<span class="normal">179</span>
+<span class="normal">180</span>
+<span class="normal">181</span>
+<span class="normal">182</span>
+<span class="normal">183</span>
+<span class="normal">184</span>
+<span class="normal">185</span>
+<span class="normal">186</span>
+<span class="normal">187</span>
+<span class="normal">188</span>
+<span class="normal">189</span>
+<span class="normal">190</span>
+<span class="normal">191</span>
+<span class="normal">192</span>
+<span class="normal">193</span>
+<span class="normal">194</span>
+<span class="normal">195</span>
+<span class="normal">196</span>
+<span class="normal">197</span>
+<span class="normal">198</span>
+<span class="normal">199</span>
+<span class="normal">200</span>
+<span class="normal">201</span>
+<span class="normal">202</span>
+<span class="normal">203</span>
+<span class="normal">204</span>
+<span class="normal">205</span>
+<span class="normal">206</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">FrameObject</span><span class="p">]]:</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Generate sample frames from a video.</span>
 
+<span class="sd">    Args:</span>
+<span class="sd">        video_path (str): The path to the video file.</span>
 
-  <p><strong>Methods:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-          <tr>
-            <td><code><a class="autorefs autorefs-internal" title="video_sampler.sampler.VideoSampler.sample" href="#video_sampler.sampler.VideoSampler.sample">sample</a></code></td>
+<span class="sd">    Yields:</span>
+<span class="sd">        Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
+    <span class="n">next_segment</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">segment_generator</span><span class="p">)</span>
+    <span class="n">segment_boundary_end_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">end_time</span> <span class="o">/</span> <span class="mi">1000</span>
+    <span class="n">segment_boundary_start_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">start_time</span> <span class="o">/</span> <span class="mi">1000</span>
+    <span class="n">absolute_stop</span> <span class="o">=</span> <span class="kc">False</span>
+    <span class="k">with</span> <span class="n">av</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">container</span><span class="p">:</span>
+        <span class="n">stream</span> <span class="o">=</span> <span class="n">container</span><span class="o">.</span><span class="n">streams</span><span class="o">.</span><span class="n">video</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">keyframes_only</span><span class="p">:</span>
+            <span class="n">stream</span><span class="o">.</span><span class="n">codec_context</span><span class="o">.</span><span class="n">skip_frame</span> <span class="o">=</span> <span class="s2">&quot;NONKEY&quot;</span>
+        <span class="n">prev_time</span> <span class="o">=</span> <span class="o">-</span><span class="mi">10</span>
+        <span class="k">for</span> <span class="n">frame_indx</span><span class="p">,</span> <span class="n">frame</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">container</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">stream</span><span class="p">)):</span>
+            <span class="n">ftime</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span>
+            <span class="n">reiters</span> <span class="o">=</span> <span class="mi">0</span>
+            <span class="c1"># find the next segment that starts after the current frame</span>
+            <span class="k">while</span> <span class="n">ftime</span> <span class="o">&gt;</span> <span class="n">segment_boundary_end_sec</span><span class="p">:</span>
+                <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;Seeking to next segment: </span><span class="si">{</span><span class="n">segment_boundary_end_sec</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ftime</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">yellow</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                <span class="p">)</span>
+                <span class="k">try</span><span class="p">:</span>
+                    <span class="n">next_segment</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">segment_generator</span><span class="p">)</span>
+                    <span class="n">reiters</span> <span class="o">+=</span> <span class="mi">1</span>
+                    <span class="n">segment_boundary_end_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">end_time</span> <span class="o">/</span> <span class="mi">1000</span>
+                    <span class="n">segment_boundary_start_sec</span> <span class="o">=</span> <span class="n">next_segment</span><span class="o">.</span><span class="n">start_time</span> <span class="o">/</span> <span class="mi">1000</span>
+                <span class="k">except</span> <span class="ne">StopIteration</span><span class="p">:</span>
+                    <span class="n">absolute_stop</span> <span class="o">=</span> <span class="kc">True</span>
+                    <span class="k">break</span>
+            <span class="k">if</span> <span class="n">reiters</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+                <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;Skipped </span><span class="si">{</span><span class="n">reiters</span><span class="si">}</span><span class="s2"> segments!&quot;</span><span class="p">,</span>
+                    <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">red</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                <span class="p">)</span>
+            <span class="k">if</span> <span class="n">absolute_stop</span><span class="p">:</span>
+                <span class="k">break</span>
+            <span class="c1"># we haven&#39;t found the next segment yet</span>
+            <span class="c1"># the other condition, is where we are after the segment</span>
+            <span class="c1"># but this is handled by the while loop above</span>
+            <span class="k">if</span> <span class="n">ftime</span> <span class="o">&lt;=</span> <span class="n">segment_boundary_start_sec</span><span class="p">:</span>
+                <span class="k">continue</span>
+
+            <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;total&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+            <span class="n">time_diff</span> <span class="o">=</span> <span class="n">ftime</span> <span class="o">-</span> <span class="n">prev_time</span>
+            <span class="k">if</span> <span class="n">time_diff</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">min_frame_interval_sec</span><span class="p">:</span>
+                <span class="k">continue</span>
+            <span class="n">prev_time</span> <span class="o">=</span> <span class="n">ftime</span>
+
+            <span class="n">frame_pil</span><span class="p">:</span> <span class="n">Image</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">to_image</span><span class="p">()</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">debug</span><span class="p">:</span>
+                <span class="n">buf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">get_buffer_state</span><span class="p">()</span>
+                <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;Frame </span><span class="si">{</span><span class="n">frame_indx</span><span class="si">}</span><span class="se">\t</span><span class="s2">time: </span><span class="si">{</span><span class="n">ftime</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="sa">f</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2"> Buffer (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span><span class="si">}</span><span class="s2">): </span><span class="si">{</span><span class="n">buf</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">green</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                <span class="p">)</span>
+            <span class="n">frame_meta</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;frame_time&quot;</span><span class="p">:</span> <span class="n">ftime</span><span class="p">,</span> <span class="s2">&quot;frame_indx&quot;</span><span class="p">:</span> <span class="n">frame_indx</span><span class="p">}</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;decoded&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+            <span class="k">if</span> <span class="n">res</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">add</span><span class="p">(</span>
+                <span class="n">frame_pil</span><span class="p">,</span>
+                <span class="n">metadata</span><span class="o">=</span><span class="n">frame_meta</span><span class="p">,</span>
+            <span class="p">):</span>
+                <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="p">(</span><span class="o">*</span><span class="n">res</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
+                <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
+                    <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
+
+    <span class="c1"># flush buffer</span>
+    <span class="k">yield from</span> <span class="bp">self</span><span class="o">.</span><span class="n">flush_buffer</span><span class="p">()</span>
+</code></pre></div></td></tr></table></div>
+          </details>
+  </div>
+
+</div>
+
+
+
+  </div>
+
+  </div>
+
+
+</div>
+
+<div class="doc doc-object doc-class">
+
+
+
+<h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
+          <code>VideoSampler</code>
+
+
+</h2>
+
+
+  <div class="doc doc-contents ">
+
+  
+      <p>The fundamental class for sampling video frames.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>cfg</code></td>
+          <td>
+                <code><a class="autorefs autorefs-internal" title="video_sampler.buffer.SamplerConfig" href="../buffer/#video_sampler.buffer.SamplerConfig">SamplerConfig</a></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The configuration for the video sampler.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Attributes:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code><span title="video_sampler.sampler.VideoSampler.cfg">cfg</span></code></td>
+          <td>
+                <code><a class="autorefs autorefs-internal" title="video_sampler.buffer.SamplerConfig" href="../buffer/#video_sampler.buffer.SamplerConfig">SamplerConfig</a></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The configuration for the video sampler.</p>
+            </div>
+          </td>
+        </tr>
+        <tr>
+          <td><code><span title="video_sampler.sampler.VideoSampler.frame_buffer">frame_buffer</span></code></td>
+          <td>
+                <code>FrameBuffer</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The frame buffer used for sampling frames.</p>
+            </div>
+          </td>
+        </tr>
+        <tr>
+          <td><code><span title="video_sampler.sampler.VideoSampler.gate">gate</span></code></td>
+          <td>
+                <code>Gate</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gate used for filtering frames.</p>
+            </div>
+          </td>
+        </tr>
+        <tr>
+          <td><code><span title="video_sampler.sampler.VideoSampler.stats">stats</span></code></td>
+          <td>
+                <code><span title="collections.Counter">Counter</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>A counter for tracking statistics.</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Methods:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+          <tr>
+            <td><code><a class="autorefs autorefs-internal" title="video_sampler.sampler.VideoSampler.sample" href="#video_sampler.sampler.VideoSampler.sample">sample</a></code></td>
             <td>
               <div class="doc-md-description">
                 <p>Generates sample frames from a video.</p>
@@ -1032,7 +1565,18 @@ <h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
 <span class="normal">105</span>
 <span class="normal">106</span>
 <span class="normal">107</span>
-<span class="normal">108</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">VideoSampler</span><span class="p">:</span>
+<span class="normal">108</span>
+<span class="normal">109</span>
+<span class="normal">110</span>
+<span class="normal">111</span>
+<span class="normal">112</span>
+<span class="normal">113</span>
+<span class="normal">114</span>
+<span class="normal">115</span>
+<span class="normal">116</span>
+<span class="normal">117</span>
+<span class="normal">118</span>
+<span class="normal">119</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">VideoSampler</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    The fundamental class for sampling video frames.</span>
 
@@ -1059,8 +1603,30 @@ <h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
         <span class="bp">self</span><span class="o">.</span><span class="n">gate</span> <span class="o">=</span> <span class="n">create_gate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">gate_config</span><span class="p">)</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">stats</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">()</span>
 
+    <span class="k">def</span> <span class="nf">flush_buffer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Flushes the frame buffer and yields gated frames&quot;&quot;&quot;</span>
+        <span class="k">for</span> <span class="n">res</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">final_flush</span><span class="p">():</span>
+            <span class="k">if</span> <span class="n">res</span><span class="p">:</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+                <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="p">(</span><span class="o">*</span><span class="n">res</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
+                <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
+                    <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
+        <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
+        <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
+            <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
+        <span class="k">yield</span> <span class="n">PROCESSING_DONE_ITERABLE</span>
+
     <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">FrameObject</span><span class="p">]]:</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Generate sample frames from a video&quot;&quot;&quot;</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;Generate sample frames from a video.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            video_path (str): The path to the video file.</span>
+
+<span class="sd">        Yields:</span>
+<span class="sd">            Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
         <span class="k">with</span> <span class="n">av</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">container</span><span class="p">:</span>
@@ -1097,18 +1663,7 @@ <h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
                         <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
 
         <span class="c1"># flush buffer</span>
-        <span class="k">for</span> <span class="n">res</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">final_flush</span><span class="p">():</span>
-            <span class="k">if</span> <span class="n">res</span><span class="p">:</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
-                <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="p">(</span><span class="o">*</span><span class="n">res</span><span class="p">)</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
-                <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
-                    <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
-        <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
-        <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
-            <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
-        <span class="k">yield</span> <span class="n">PROCESSING_DONE_ITERABLE</span>
+        <span class="k">yield from</span> <span class="bp">self</span><span class="o">.</span><span class="n">flush_buffer</span><span class="p">()</span>
 
     <span class="k">def</span> <span class="nf">write_queue</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">q</span><span class="p">:</span> <span class="n">Queue</span><span class="p">):</span>
         <span class="k">try</span><span class="p">:</span>
@@ -1142,15 +1697,15 @@ <h2 id="video_sampler.sampler.VideoSampler" class="doc doc-heading">
 
 
 
-<h3 id="video_sampler.sampler.VideoSampler.sample" class="doc doc-heading">
-          <code class="highlight language-python"><span class="n">sample</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span></code>
+<h3 id="video_sampler.sampler.VideoSampler.flush_buffer" class="doc doc-heading">
+          <code class="highlight language-python"><span class="n">flush_buffer</span><span class="p">()</span></code>
 
 </h3>
 
 
   <div class="doc doc-contents ">
   
-      <p>Generate sample frames from a video</p>
+      <p>Flushes the frame buffer and yields gated frames</p>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/sampler.py</code></summary>
@@ -1167,80 +1722,8 @@ <h3 id="video_sampler.sampler.VideoSampler.sample" class="doc doc-heading">
 <span class="normal">56</span>
 <span class="normal">57</span>
 <span class="normal">58</span>
-<span class="normal">59</span>
-<span class="normal">60</span>
-<span class="normal">61</span>
-<span class="normal">62</span>
-<span class="normal">63</span>
-<span class="normal">64</span>
-<span class="normal">65</span>
-<span class="normal">66</span>
-<span class="normal">67</span>
-<span class="normal">68</span>
-<span class="normal">69</span>
-<span class="normal">70</span>
-<span class="normal">71</span>
-<span class="normal">72</span>
-<span class="normal">73</span>
-<span class="normal">74</span>
-<span class="normal">75</span>
-<span class="normal">76</span>
-<span class="normal">77</span>
-<span class="normal">78</span>
-<span class="normal">79</span>
-<span class="normal">80</span>
-<span class="normal">81</span>
-<span class="normal">82</span>
-<span class="normal">83</span>
-<span class="normal">84</span>
-<span class="normal">85</span>
-<span class="normal">86</span>
-<span class="normal">87</span>
-<span class="normal">88</span>
-<span class="normal">89</span>
-<span class="normal">90</span>
-<span class="normal">91</span>
-<span class="normal">92</span>
-<span class="normal">93</span>
-<span class="normal">94</span>
-<span class="normal">95</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">FrameObject</span><span class="p">]]:</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Generate sample frames from a video&quot;&quot;&quot;</span>
-    <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
-    <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
-    <span class="k">with</span> <span class="n">av</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">container</span><span class="p">:</span>
-        <span class="n">stream</span> <span class="o">=</span> <span class="n">container</span><span class="o">.</span><span class="n">streams</span><span class="o">.</span><span class="n">video</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
-        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">keyframes_only</span><span class="p">:</span>
-            <span class="n">stream</span><span class="o">.</span><span class="n">codec_context</span><span class="o">.</span><span class="n">skip_frame</span> <span class="o">=</span> <span class="s2">&quot;NONKEY&quot;</span>
-        <span class="n">prev_time</span> <span class="o">=</span> <span class="o">-</span><span class="mi">10</span>
-        <span class="k">for</span> <span class="n">frame_indx</span><span class="p">,</span> <span class="n">frame</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">container</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">stream</span><span class="p">)):</span>
-            <span class="c1"># skip frames if keyframes_only is True</span>
-            <span class="n">time_diff</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span> <span class="o">-</span> <span class="n">prev_time</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;total&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
-            <span class="k">if</span> <span class="n">time_diff</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">min_frame_interval_sec</span><span class="p">:</span>
-                <span class="k">continue</span>
-            <span class="n">prev_time</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span>
-
-            <span class="n">frame_pil</span><span class="p">:</span> <span class="n">Image</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">to_image</span><span class="p">()</span>
-            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">debug</span><span class="p">:</span>
-                <span class="n">buf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">get_buffer_state</span><span class="p">()</span>
-                <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
-                    <span class="sa">f</span><span class="s2">&quot;Frame </span><span class="si">{</span><span class="n">frame_indx</span><span class="si">}</span><span class="se">\t</span><span class="s2">time: </span><span class="si">{</span><span class="n">frame</span><span class="o">.</span><span class="n">time</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
-                    <span class="sa">f</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2"> Buffer (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span><span class="si">}</span><span class="s2">): </span><span class="si">{</span><span class="n">buf</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
-                    <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">green</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
-                <span class="p">)</span>
-            <span class="n">frame_meta</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;frame_time&quot;</span><span class="p">:</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="s2">&quot;frame_indx&quot;</span><span class="p">:</span> <span class="n">frame_indx</span><span class="p">}</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;decoded&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
-            <span class="k">if</span> <span class="n">res</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">add</span><span class="p">(</span>
-                <span class="n">frame_pil</span><span class="p">,</span>
-                <span class="n">metadata</span><span class="o">=</span><span class="n">frame_meta</span><span class="p">,</span>
-            <span class="p">):</span>
-                <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="p">(</span><span class="o">*</span><span class="n">res</span><span class="p">)</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
-                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
-                <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
-                    <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
-
-    <span class="c1"># flush buffer</span>
+<span class="normal">59</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">flush_buffer</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Flushes the frame buffer and yields gated frames&quot;&quot;&quot;</span>
     <span class="k">for</span> <span class="n">res</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">final_flush</span><span class="p">():</span>
         <span class="k">if</span> <span class="n">res</span><span class="p">:</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
@@ -1260,64 +1743,199 @@ <h3 id="video_sampler.sampler.VideoSampler.sample" class="doc doc-heading">
 </div>
 
 
-
-  </div>
-
-  </div>
-
-
-</div>
-
-<div class="doc doc-object doc-class">
-
+<div class="doc doc-object doc-function">
 
 
-<h2 id="video_sampler.sampler.Worker" class="doc doc-heading">
-          <code>Worker</code>
 
+<h3 id="video_sampler.sampler.VideoSampler.sample" class="doc doc-heading">
+          <code class="highlight language-python"><span class="n">sample</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span></code>
 
-</h2>
+</h3>
 
 
   <div class="doc doc-contents ">
+  
+      <p>Generate sample frames from a video.</p>
 
 
-            <details class="quote">
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>video_path</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the video file.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Yields:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td>
+                <code><span title="collections.abc.Iterable">Iterable</span>[list[<span title="video_sampler.schemas.FrameObject">FrameObject</span>]]</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+          <details class="quote">
+            <summary>Source code in <code>video_sampler/sampler.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 61</span>
+<span class="normal"> 62</span>
+<span class="normal"> 63</span>
+<span class="normal"> 64</span>
+<span class="normal"> 65</span>
+<span class="normal"> 66</span>
+<span class="normal"> 67</span>
+<span class="normal"> 68</span>
+<span class="normal"> 69</span>
+<span class="normal"> 70</span>
+<span class="normal"> 71</span>
+<span class="normal"> 72</span>
+<span class="normal"> 73</span>
+<span class="normal"> 74</span>
+<span class="normal"> 75</span>
+<span class="normal"> 76</span>
+<span class="normal"> 77</span>
+<span class="normal"> 78</span>
+<span class="normal"> 79</span>
+<span class="normal"> 80</span>
+<span class="normal"> 81</span>
+<span class="normal"> 82</span>
+<span class="normal"> 83</span>
+<span class="normal"> 84</span>
+<span class="normal"> 85</span>
+<span class="normal"> 86</span>
+<span class="normal"> 87</span>
+<span class="normal"> 88</span>
+<span class="normal"> 89</span>
+<span class="normal"> 90</span>
+<span class="normal"> 91</span>
+<span class="normal"> 92</span>
+<span class="normal"> 93</span>
+<span class="normal"> 94</span>
+<span class="normal"> 95</span>
+<span class="normal"> 96</span>
+<span class="normal"> 97</span>
+<span class="normal"> 98</span>
+<span class="normal"> 99</span>
+<span class="normal">100</span>
+<span class="normal">101</span>
+<span class="normal">102</span>
+<span class="normal">103</span>
+<span class="normal">104</span>
+<span class="normal">105</span>
+<span class="normal">106</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">FrameObject</span><span class="p">]]:</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Generate sample frames from a video.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        video_path (str): The path to the video file.</span>
+
+<span class="sd">    Yields:</span>
+<span class="sd">        Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
+    <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
+    <span class="k">with</span> <span class="n">av</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">container</span><span class="p">:</span>
+        <span class="n">stream</span> <span class="o">=</span> <span class="n">container</span><span class="o">.</span><span class="n">streams</span><span class="o">.</span><span class="n">video</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">keyframes_only</span><span class="p">:</span>
+            <span class="n">stream</span><span class="o">.</span><span class="n">codec_context</span><span class="o">.</span><span class="n">skip_frame</span> <span class="o">=</span> <span class="s2">&quot;NONKEY&quot;</span>
+        <span class="n">prev_time</span> <span class="o">=</span> <span class="o">-</span><span class="mi">10</span>
+        <span class="k">for</span> <span class="n">frame_indx</span><span class="p">,</span> <span class="n">frame</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">container</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">stream</span><span class="p">)):</span>
+            <span class="c1"># skip frames if keyframes_only is True</span>
+            <span class="n">time_diff</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span> <span class="o">-</span> <span class="n">prev_time</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;total&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+            <span class="k">if</span> <span class="n">time_diff</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">min_frame_interval_sec</span><span class="p">:</span>
+                <span class="k">continue</span>
+            <span class="n">prev_time</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span>
+
+            <span class="n">frame_pil</span><span class="p">:</span> <span class="n">Image</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">to_image</span><span class="p">()</span>
+            <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">debug</span><span class="p">:</span>
+                <span class="n">buf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">get_buffer_state</span><span class="p">()</span>
+                <span class="n">console</span><span class="o">.</span><span class="n">print</span><span class="p">(</span>
+                    <span class="sa">f</span><span class="s2">&quot;Frame </span><span class="si">{</span><span class="n">frame_indx</span><span class="si">}</span><span class="se">\t</span><span class="s2">time: </span><span class="si">{</span><span class="n">frame</span><span class="o">.</span><span class="n">time</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="sa">f</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2"> Buffer (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span><span class="si">}</span><span class="s2">): </span><span class="si">{</span><span class="n">buf</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                    <span class="n">style</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;bold </span><span class="si">{</span><span class="n">Color</span><span class="o">.</span><span class="n">green</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
+                <span class="p">)</span>
+            <span class="n">frame_meta</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;frame_time&quot;</span><span class="p">:</span> <span class="n">frame</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="s2">&quot;frame_indx&quot;</span><span class="p">:</span> <span class="n">frame_indx</span><span class="p">}</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;decoded&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+            <span class="k">if</span> <span class="n">res</span> <span class="o">:=</span> <span class="bp">self</span><span class="o">.</span><span class="n">frame_buffer</span><span class="o">.</span><span class="n">add</span><span class="p">(</span>
+                <span class="n">frame_pil</span><span class="p">,</span>
+                <span class="n">metadata</span><span class="o">=</span><span class="n">frame_meta</span><span class="p">,</span>
+            <span class="p">):</span>
+                <span class="n">gated_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gate</span><span class="p">(</span><span class="o">*</span><span class="n">res</span><span class="p">)</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;produced&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
+                <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="p">[</span><span class="s2">&quot;gated&quot;</span><span class="p">]</span> <span class="o">+=</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">N</span>
+                <span class="k">if</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span><span class="p">:</span>
+                    <span class="k">yield</span> <span class="n">gated_obj</span><span class="o">.</span><span class="n">frames</span>
+
+    <span class="c1"># flush buffer</span>
+    <span class="k">yield from</span> <span class="bp">self</span><span class="o">.</span><span class="n">flush_buffer</span><span class="p">()</span>
+</code></pre></div></td></tr></table></div>
+          </details>
+  </div>
+
+</div>
+
+
+
+  </div>
+
+  </div>
+
+
+</div>
+
+<div class="doc doc-object doc-class">
+
+
+
+<h2 id="video_sampler.sampler.Worker" class="doc doc-heading">
+          <code>Worker</code>
+
+
+</h2>
+
+
+  <div class="doc doc-contents ">
+
+
+            <details class="quote">
               <summary>Source code in <code>video_sampler/sampler.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">179</span>
-<span class="normal">180</span>
-<span class="normal">181</span>
-<span class="normal">182</span>
-<span class="normal">183</span>
-<span class="normal">184</span>
-<span class="normal">185</span>
-<span class="normal">186</span>
-<span class="normal">187</span>
-<span class="normal">188</span>
-<span class="normal">189</span>
-<span class="normal">190</span>
-<span class="normal">191</span>
-<span class="normal">192</span>
-<span class="normal">193</span>
-<span class="normal">194</span>
-<span class="normal">195</span>
-<span class="normal">196</span>
-<span class="normal">197</span>
-<span class="normal">198</span>
-<span class="normal">199</span>
-<span class="normal">200</span>
-<span class="normal">201</span>
-<span class="normal">202</span>
-<span class="normal">203</span>
-<span class="normal">204</span>
-<span class="normal">205</span>
-<span class="normal">206</span>
-<span class="normal">207</span>
-<span class="normal">208</span>
-<span class="normal">209</span>
-<span class="normal">210</span>
-<span class="normal">211</span>
-<span class="normal">212</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">212</span>
 <span class="normal">213</span>
 <span class="normal">214</span>
 <span class="normal">215</span>
@@ -1345,7 +1963,52 @@ <h2 id="video_sampler.sampler.Worker" class="doc doc-heading">
 <span class="normal">237</span>
 <span class="normal">238</span>
 <span class="normal">239</span>
-<span class="normal">240</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">Worker</span><span class="p">:</span>
+<span class="normal">240</span>
+<span class="normal">241</span>
+<span class="normal">242</span>
+<span class="normal">243</span>
+<span class="normal">244</span>
+<span class="normal">245</span>
+<span class="normal">246</span>
+<span class="normal">247</span>
+<span class="normal">248</span>
+<span class="normal">249</span>
+<span class="normal">250</span>
+<span class="normal">251</span>
+<span class="normal">252</span>
+<span class="normal">253</span>
+<span class="normal">254</span>
+<span class="normal">255</span>
+<span class="normal">256</span>
+<span class="normal">257</span>
+<span class="normal">258</span>
+<span class="normal">259</span>
+<span class="normal">260</span>
+<span class="normal">261</span>
+<span class="normal">262</span>
+<span class="normal">263</span>
+<span class="normal">264</span>
+<span class="normal">265</span>
+<span class="normal">266</span>
+<span class="normal">267</span>
+<span class="normal">268</span>
+<span class="normal">269</span>
+<span class="normal">270</span>
+<span class="normal">271</span>
+<span class="normal">272</span>
+<span class="normal">273</span>
+<span class="normal">274</span>
+<span class="normal">275</span>
+<span class="normal">276</span>
+<span class="normal">277</span>
+<span class="normal">278</span>
+<span class="normal">279</span>
+<span class="normal">280</span>
+<span class="normal">281</span>
+<span class="normal">282</span>
+<span class="normal">283</span>
+<span class="normal">284</span>
+<span class="normal">285</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">class</span> <span class="nc">Worker</span><span class="p">:</span>
     <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span>
         <span class="n">cfg</span><span class="p">:</span> <span class="n">SamplerConfig</span><span class="p">,</span>
@@ -1363,10 +2026,14 @@ <h2 id="video_sampler.sampler.Worker" class="doc doc-heading">
     <span class="k">def</span> <span class="nf">launch</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">output_path</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">pretty_video_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
     <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Launch the worker.</span>
-<span class="sd">        :param video_path: path to the video file</span>
-<span class="sd">        :param output_path: path to the output folder</span>
-<span class="sd">        :param pretty_video_name: name of the video file for pretty printing (useful for urls)</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Launch the worker.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            video_path (str): Path to the video file.</span>
+<span class="sd">            output_path (str, optional): Path to the output folder. Defaults to &quot;&quot;.</span>
+<span class="sd">            pretty_video_name (str, optional): Name of the video file for pretty printing (useful for urls).</span>
+<span class="sd">                                                Defaults to &quot;&quot;.</span>
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="k">if</span> <span class="ow">not</span> <span class="n">pretty_video_name</span><span class="p">:</span>
             <span class="n">pretty_video_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span>
@@ -1391,6 +2058,14 @@ <h2 id="video_sampler.sampler.Worker" class="doc doc-heading">
             <span class="p">)</span>
 
     <span class="k">def</span> <span class="nf">queue_reader</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">output_path</span><span class="p">,</span> <span class="n">read_interval</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">        Reads frames from the queue and saves them as JPEG images.</span>
+
+<span class="sd">        Args:</span>
+<span class="sd">            output_path (str): The directory path where the frames will be saved.</span>
+<span class="sd">            read_interval (float, optional): The time interval between reading frames from the queue.</span>
+<span class="sd">                    Defaults to 0.1 seconds.</span>
+<span class="sd">        &quot;&quot;&quot;</span>
         <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
             <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">q</span><span class="o">.</span><span class="n">empty</span><span class="p">():</span>
                 <span class="n">frame_object</span><span class="p">:</span> <span class="n">FrameObject</span>
@@ -1435,48 +2110,112 @@ <h3 id="video_sampler.sampler.Worker.launch" class="doc doc-heading">
 
   <div class="doc doc-contents ">
   
-      <p>Launch the worker.
-:param video_path: path to the video file
-:param output_path: path to the output folder
-:param pretty_video_name: name of the video file for pretty printing (useful for urls)</p>
+      <p>Launch the worker.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>video_path</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the video file.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>output_path</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the output folder. Defaults to "".</p>
+            </div>
+          </td>
+          <td>
+                <code>&#39;&#39;</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>pretty_video_name</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the video file for pretty printing (useful for urls).
+                                Defaults to "".</p>
+            </div>
+          </td>
+          <td>
+                <code>&#39;&#39;</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/sampler.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">194</span>
-<span class="normal">195</span>
-<span class="normal">196</span>
-<span class="normal">197</span>
-<span class="normal">198</span>
-<span class="normal">199</span>
-<span class="normal">200</span>
-<span class="normal">201</span>
-<span class="normal">202</span>
-<span class="normal">203</span>
-<span class="normal">204</span>
-<span class="normal">205</span>
-<span class="normal">206</span>
-<span class="normal">207</span>
-<span class="normal">208</span>
-<span class="normal">209</span>
-<span class="normal">210</span>
-<span class="normal">211</span>
-<span class="normal">212</span>
-<span class="normal">213</span>
-<span class="normal">214</span>
-<span class="normal">215</span>
-<span class="normal">216</span>
-<span class="normal">217</span>
-<span class="normal">218</span>
-<span class="normal">219</span>
-<span class="normal">220</span>
-<span class="normal">221</span>
-<span class="normal">222</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">launch</span><span class="p">(</span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">227</span>
+<span class="normal">228</span>
+<span class="normal">229</span>
+<span class="normal">230</span>
+<span class="normal">231</span>
+<span class="normal">232</span>
+<span class="normal">233</span>
+<span class="normal">234</span>
+<span class="normal">235</span>
+<span class="normal">236</span>
+<span class="normal">237</span>
+<span class="normal">238</span>
+<span class="normal">239</span>
+<span class="normal">240</span>
+<span class="normal">241</span>
+<span class="normal">242</span>
+<span class="normal">243</span>
+<span class="normal">244</span>
+<span class="normal">245</span>
+<span class="normal">246</span>
+<span class="normal">247</span>
+<span class="normal">248</span>
+<span class="normal">249</span>
+<span class="normal">250</span>
+<span class="normal">251</span>
+<span class="normal">252</span>
+<span class="normal">253</span>
+<span class="normal">254</span>
+<span class="normal">255</span>
+<span class="normal">256</span>
+<span class="normal">257</span>
+<span class="normal">258</span>
+<span class="normal">259</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">launch</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span> <span class="n">video_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">output_path</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">pretty_video_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
 <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Launch the worker.</span>
-<span class="sd">    :param video_path: path to the video file</span>
-<span class="sd">    :param output_path: path to the output folder</span>
-<span class="sd">    :param pretty_video_name: name of the video file for pretty printing (useful for urls)</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Launch the worker.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        video_path (str): Path to the video file.</span>
+<span class="sd">        output_path (str, optional): Path to the output folder. Defaults to &quot;&quot;.</span>
+<span class="sd">        pretty_video_name (str, optional): Name of the video file for pretty printing (useful for urls).</span>
+<span class="sd">                                            Defaults to &quot;&quot;.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">if</span> <span class="ow">not</span> <span class="n">pretty_video_name</span><span class="p">:</span>
         <span class="n">pretty_video_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">video_path</span><span class="p">)</span>
@@ -1506,6 +2245,123 @@ <h3 id="video_sampler.sampler.Worker.launch" class="doc doc-heading">
 </div>
 
 
+<div class="doc doc-object doc-function">
+
+
+
+<h3 id="video_sampler.sampler.Worker.queue_reader" class="doc doc-heading">
+          <code class="highlight language-python"><span class="n">queue_reader</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="n">read_interval</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span></code>
+
+</h3>
+
+
+  <div class="doc doc-contents ">
+  
+      <p>Reads frames from the queue and saves them as JPEG images.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>output_path</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory path where the frames will be saved.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>read_interval</code></td>
+          <td>
+                <code>float</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The time interval between reading frames from the queue.
+    Defaults to 0.1 seconds.</p>
+            </div>
+          </td>
+          <td>
+                <code>0.1</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+          <details class="quote">
+            <summary>Source code in <code>video_sampler/sampler.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">261</span>
+<span class="normal">262</span>
+<span class="normal">263</span>
+<span class="normal">264</span>
+<span class="normal">265</span>
+<span class="normal">266</span>
+<span class="normal">267</span>
+<span class="normal">268</span>
+<span class="normal">269</span>
+<span class="normal">270</span>
+<span class="normal">271</span>
+<span class="normal">272</span>
+<span class="normal">273</span>
+<span class="normal">274</span>
+<span class="normal">275</span>
+<span class="normal">276</span>
+<span class="normal">277</span>
+<span class="normal">278</span>
+<span class="normal">279</span>
+<span class="normal">280</span>
+<span class="normal">281</span>
+<span class="normal">282</span>
+<span class="normal">283</span>
+<span class="normal">284</span>
+<span class="normal">285</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">queue_reader</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">output_path</span><span class="p">,</span> <span class="n">read_interval</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Reads frames from the queue and saves them as JPEG images.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        output_path (str): The directory path where the frames will be saved.</span>
+<span class="sd">        read_interval (float, optional): The time interval between reading frames from the queue.</span>
+<span class="sd">                Defaults to 0.1 seconds.</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
+        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">q</span><span class="o">.</span><span class="n">empty</span><span class="p">():</span>
+            <span class="n">frame_object</span><span class="p">:</span> <span class="n">FrameObject</span>
+            <span class="k">for</span> <span class="n">frame_object</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">q</span><span class="o">.</span><span class="n">get</span><span class="p">():</span>
+                <span class="k">if</span> <span class="n">frame_object</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;end&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
+                    <span class="k">return</span>
+                <span class="k">if</span> <span class="n">frame_object</span><span class="o">.</span><span class="n">frame</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="p">(</span>
+                    <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">devnull</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">frame_object</span><span class="o">.</span><span class="n">frame</span><span class="p">,</span> <span class="n">Image</span><span class="o">.</span><span class="n">Image</span><span class="p">)</span>
+                <span class="p">):</span>
+                    <span class="n">frame_object</span><span class="o">.</span><span class="n">frame</span><span class="o">.</span><span class="n">save</span><span class="p">(</span>
+                        <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
+                            <span class="n">output_path</span><span class="p">,</span>
+                            <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">frame_object</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;frame_time&#39;</span><span class="p">]</span><span class="si">}</span><span class="s2">.jpg&quot;</span><span class="p">,</span>
+                        <span class="p">)</span>
+                    <span class="p">)</span>
+        <span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="n">read_interval</span><span class="p">)</span>
+</code></pre></div></td></tr></table></div>
+          </details>
+  </div>
+
+</div>
+
+
 
   </div>
 
diff --git a/reference/video_sampler/visualisation/clustering/index.html b/reference/video_sampler/visualisation/clustering/index.html
index d76e1d2..a55d57f 100644
--- a/reference/video_sampler/visualisation/clustering/index.html
+++ b/reference/video_sampler/visualisation/clustering/index.html
@@ -776,9 +776,60 @@ <h2 id="video_sampler.visualisation.clustering.build_feature_model" class="doc d
 
   <div class="doc doc-contents ">
   
-      <p>Build a feature extraction model
-:param model_str: model name
-:return: tuple of (model, extractor)</p>
+      <p>Build a feature extraction model.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>model_str</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Model name.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Returns:</strong></p>
+  <table>
+    <thead>
+      <tr>
+<th>Name</th>        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+<td><code>tuple</code></td>          <td>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Tuple of (model, extractor).</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/visualisation/clustering.py</code></summary>
@@ -789,10 +840,18 @@ <h2 id="video_sampler.visualisation.clustering.build_feature_model" class="doc d
 <span class="normal">19</span>
 <span class="normal">20</span>
 <span class="normal">21</span>
-<span class="normal">22</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">build_feature_model</span><span class="p">(</span><span class="n">model_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Build a feature extraction model</span>
-<span class="sd">    :param model_str: model name</span>
-<span class="sd">    :return: tuple of (model, extractor)</span>
+<span class="normal">22</span>
+<span class="normal">23</span>
+<span class="normal">24</span>
+<span class="normal">25</span>
+<span class="normal">26</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">build_feature_model</span><span class="p">(</span><span class="n">model_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Build a feature extraction model.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        model_str (str): Model name.</span>
+
+<span class="sd">    Returns:</span>
+<span class="sd">        tuple: Tuple of (model, extractor).</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="n">extractor</span> <span class="o">=</span> <span class="n">AutoFeatureExtractor</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_str</span><span class="p">)</span>
     <span class="n">model</span> <span class="o">=</span> <span class="n">ResNetModel</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_str</span><span class="p">)</span>
@@ -816,22 +875,59 @@ <h2 id="video_sampler.visualisation.clustering.cluster_features" class="doc doc-
 
   <div class="doc doc-contents ">
   
-      <p>Cluster features using t-SNE and KMeans
-:param features: dict with keys "embeds" and "paths"
-:param max_clusters: maximum number of clusters
-:return: tuple of (X, cluster_labels)</p>
-
+      <p>Cluster features using t-SNE and KMeans</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>features</code></td>
+          <td>
+                <code><span title="numpy.ndarray">ndarray</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>dict with keys "embeds" and "paths"</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>max_clusters</code></td>
+          <td>
+                <code>int</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>maximum number of clusters</p>
+            </div>
+          </td>
+          <td>
+                <code>50</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+<details class="retruns" open>
+  <summary>Retruns</summary>
+  <p>tuple: of (X, cluster_labels)</p>
+</details>
           <details class="quote">
             <summary>Source code in <code>video_sampler/visualisation/clustering.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">56</span>
-<span class="normal">57</span>
-<span class="normal">58</span>
-<span class="normal">59</span>
-<span class="normal">60</span>
-<span class="normal">61</span>
-<span class="normal">62</span>
-<span class="normal">63</span>
-<span class="normal">64</span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">64</span>
 <span class="normal">65</span>
 <span class="normal">66</span>
 <span class="normal">67</span>
@@ -839,14 +935,30 @@ <h2 id="video_sampler.visualisation.clustering.cluster_features" class="doc doc-
 <span class="normal">69</span>
 <span class="normal">70</span>
 <span class="normal">71</span>
-<span class="normal">72</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">cluster_features</span><span class="p">(</span>
+<span class="normal">72</span>
+<span class="normal">73</span>
+<span class="normal">74</span>
+<span class="normal">75</span>
+<span class="normal">76</span>
+<span class="normal">77</span>
+<span class="normal">78</span>
+<span class="normal">79</span>
+<span class="normal">80</span>
+<span class="normal">81</span>
+<span class="normal">82</span>
+<span class="normal">83</span>
+<span class="normal">84</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">cluster_features</span><span class="p">(</span>
     <span class="n">features</span><span class="p">,</span>
     <span class="n">max_clusters</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
 <span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Cluster features using t-SNE and KMeans</span>
-<span class="sd">    :param features: dict with keys &quot;embeds&quot; and &quot;paths&quot;</span>
-<span class="sd">    :param max_clusters: maximum number of clusters</span>
-<span class="sd">    :return: tuple of (X, cluster_labels)</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        features (np.ndarray): dict with keys &quot;embeds&quot; and &quot;paths&quot;</span>
+<span class="sd">        max_clusters (int): maximum number of clusters</span>
+
+<span class="sd">    Retruns:</span>
+<span class="sd">      tuple: of (X, cluster_labels)</span>
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="n">proj</span> <span class="o">=</span> <span class="n">TSNE</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">perplexity</span><span class="o">=</span><span class="mi">35</span><span class="p">,</span> <span class="n">metric</span><span class="o">=</span><span class="s2">&quot;cosine&quot;</span><span class="p">)</span>
     <span class="n">Xorg</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">features</span><span class="p">[</span><span class="s2">&quot;embeds&quot;</span><span class="p">])</span>
@@ -875,20 +987,106 @@ <h2 id="video_sampler.visualisation.clustering.extract_features" class="doc doc-
 
   <div class="doc doc-contents ">
   
-      <p>Extract features from a folder of images
-:param model_str: model name
-:param image_folder: folder with images
-:param mkey: key for the pixel values
-:param batch_size: batch size
-:return: dict with keys "embeds" and "paths"</p>
+      <p>Extract features from a folder of images.</p>
+
+
+
+  <p><strong>Parameters:</strong></p>
+  <table>
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Type</th>
+        <th>Description</th>
+        <th>Default</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+          <td><code>model_str</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Model name.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>image_folder</code></td>
+          <td>
+                <code><span title="pathlib.Path">Path</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Folder with images.</p>
+            </div>
+          </td>
+          <td>
+              <em>required</em>
+          </td>
+        </tr>
+        <tr>
+          <td><code>mkey</code></td>
+          <td>
+                <code>str</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Key for the pixel values. Defaults to "pixel_values".</p>
+            </div>
+          </td>
+          <td>
+                <code>&#39;pixel_values&#39;</code>
+          </td>
+        </tr>
+        <tr>
+          <td><code>batch_size</code></td>
+          <td>
+                <code>int</code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Batch size. Defaults to 8.</p>
+            </div>
+          </td>
+          <td>
+                <code>8</code>
+          </td>
+        </tr>
+    </tbody>
+  </table>
+
+
+
+  <p><strong>Returns:</strong></p>
+  <table>
+    <thead>
+      <tr>
+<th>Name</th>        <th>Type</th>
+        <th>Description</th>
+      </tr>
+    </thead>
+    <tbody>
+        <tr>
+<td><code>dict</code></td>          <td>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Dictionary with keys "embeds" and "paths".</p>
+            </div>
+          </td>
+        </tr>
+    </tbody>
+  </table>
 
           <details class="quote">
             <summary>Source code in <code>video_sampler/visualisation/clustering.py</code></summary>
-            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">25</span>
-<span class="normal">26</span>
-<span class="normal">27</span>
-<span class="normal">28</span>
-<span class="normal">29</span>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">29</span>
 <span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
@@ -912,15 +1110,27 @@ <h2 id="video_sampler.visualisation.clustering.extract_features" class="doc doc-
 <span class="normal">50</span>
 <span class="normal">51</span>
 <span class="normal">52</span>
-<span class="normal">53</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">extract_features</span><span class="p">(</span>
+<span class="normal">53</span>
+<span class="normal">54</span>
+<span class="normal">55</span>
+<span class="normal">56</span>
+<span class="normal">57</span>
+<span class="normal">58</span>
+<span class="normal">59</span>
+<span class="normal">60</span>
+<span class="normal">61</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">extract_features</span><span class="p">(</span>
     <span class="n">model_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">image_folder</span><span class="p">:</span> <span class="n">Path</span><span class="p">,</span> <span class="n">mkey</span><span class="o">=</span><span class="s2">&quot;pixel_values&quot;</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span>
 <span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;Extract features from a folder of images</span>
-<span class="sd">    :param model_str: model name</span>
-<span class="sd">    :param image_folder: folder with images</span>
-<span class="sd">    :param mkey: key for the pixel values</span>
-<span class="sd">    :param batch_size: batch size</span>
-<span class="sd">    :return: dict with keys &quot;embeds&quot; and &quot;paths&quot;</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Extract features from a folder of images.</span>
+
+<span class="sd">    Args:</span>
+<span class="sd">        model_str (str): Model name.</span>
+<span class="sd">        image_folder (Path): Folder with images.</span>
+<span class="sd">        mkey (str, optional): Key for the pixel values. Defaults to &quot;pixel_values&quot;.</span>
+<span class="sd">        batch_size (int, optional): Batch size. Defaults to 8.</span>
+
+<span class="sd">    Returns:</span>
+<span class="sd">        dict: Dictionary with keys &quot;embeds&quot; and &quot;paths&quot;.</span>
 <span class="sd">    &quot;&quot;&quot;</span>
 
     <span class="n">out_features</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="nb">list</span><span class="p">)</span>
diff --git a/search/search_index.json b/search/search_index.json
index a884966..f766983 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"video-sampler","text":"<p>Video sampler allows you to efficiently sample video frames. Currently, it uses keyframe decoding, frame interval gating and perceptual hashing to reduce duplicated samples.</p> <p>Use case: for sampling videos for later annotations used in machine learning.</p>"},{"location":"#table-of-contents","title":"Table of Contents","text":"<ul> <li>video-sampler</li> <li>Table of Contents</li> <li>Documentation</li> <li>Features</li> <li>Installation and Usage<ul> <li>Basic usage</li> <li>YT-DLP integration plugin<ul> <li>Extra YT-DLP options</li> </ul> </li> <li>API examples</li> <li>Advanced usage</li> <li>Gating</li> <li>CLIP-based gating comparison</li> <li>Blur gating</li> </ul> </li> <li>Benchmarks</li> <li>Benchmark videos</li> <li>Flit commands<ul> <li>Build</li> <li>Install</li> <li>Publish</li> </ul> </li> <li>\ud83d\udee1 License</li> <li>\ud83d\udcc3 Citation</li> </ul>"},{"location":"#documentation","title":"Documentation","text":"<p>Documentation is available at https://lemurpwned.github.io/video-sampler/.</p>"},{"location":"#features","title":"Features","text":"<ul> <li>[x] Direct sampling methods:</li> <li>[x] <code>hash</code> - uses perceptual hashing to reduce duplicated samples</li> <li>[x] <code>entropy</code> - uses entropy to reduce duplicated samples (work in progress)</li> <li>[x] <code>gzip</code> - uses gzip compressed size to reduce duplicated samples (work in progress)</li> <li>[x] <code>buffer</code> - uses sliding buffer to reduce duplicated samples</li> <li>[x] <code>grid</code> - uses grid sampling to reduce duplicated samples</li> <li>[x] Gating methods (modifications on top of direct sampling methods):</li> <li>[x] <code>clip</code> - uses CLIP to filter out frames that do not contain the specified objects</li> <li>[x] <code>blur</code> - uses blur detection to filter out frames that are too blurry</li> <li>[x] Integrations</li> <li>[x] YTDLP integration -- streams directly from yt-dlp queries,         playlists or single videos</li> </ul>"},{"location":"#installation-and-usage","title":"Installation and Usage","text":"<pre><code>pip install -U video_sampler\n</code></pre> <p>then you can run</p> <pre><code>python3 -m video_sampler --help\n</code></pre> <p>or simply</p> <pre><code>video_sampler --help\n</code></pre>"},{"location":"#basic-usage","title":"Basic usage","text":"<pre><code>python3 -m video_sampler hash FatCat.mp4 ./dataset-frames/ --hash-size 3 --buffer-size 20\n</code></pre>"},{"location":"#yt-dlp-integration-plugin","title":"YT-DLP integration plugin","text":"<p>Before using please consult the ToS of the website you are scraping from -- use responsibly and for research purposes. To use the YT-DLP integration, you need to install <code>yt-dlp</code> first (see yt-dlp). Then, you simply add <code>--yt-dlp</code> to the command, and it changes the meaning of the <code>video_path</code> argument.</p> <ul> <li>to search</li> </ul> <pre><code>video_sampler hash \"ytsearch:cute cats\" ./folder-frames/ \\\n  --hash-size 3 --buffer-size 20 --ytdlp\n</code></pre> <ul> <li>to sample a single video</li> </ul> <pre><code>video_sampler hash \"https://www.youtube.com/watch?v=W86cTIoMv2U\" ./folder-frames/ \\\n    --hash-size 3 --buffer-size 20 --ytdlp\n</code></pre> <ul> <li>to sample a playlist</li> </ul> <pre><code>video_sampler hash \"https://www.youtube.com/watch?v=GbpP3Sxp-1U&amp;list=PLFezMcAw96RGvTTTbdKrqew9seO2ZGRmk\" ./folder-frames/ \\\n  --hash-size 3 --buffer-size 20 --ytdlp\n</code></pre> <p>The videos are never directly downloaded, only streamed, so you can use it to sample videos from the internet without downloading them first.</p>"},{"location":"#extra-yt-dlp-options","title":"Extra YT-DLP options","text":"<p>You can pass extra options to yt-dlp by using the <code>-yt-extra-args</code> flag. For example:</p> <p>this will only sample videos uploaded before 2019-01-01:</p> <pre><code>... --ytdlp --yt-extra-args '--datebefore 20190101'\n</code></pre> <p>or this will only sample videos uploaded after 2019-01-01:</p> <pre><code>... --ytdlp --yt-extra-args '--dateafter 20190101'\n</code></pre> <p>or this will skip all shorts:</p> <pre><code>... --ytdlp --yt-extra-args '--match-filter \"original_url!*=/shorts/ &amp; url!*=/shorts/\"\n</code></pre>"},{"location":"#api-examples","title":"API examples","text":"<p>See examples in https://github.com/LemurPwned/video-sampler/tree/main/scripts.</p>"},{"location":"#advanced-usage","title":"Advanced usage","text":"<p>There are 3 sampling methods available:</p> <ul> <li><code>hash</code> - uses perceptual hashing to reduce duplicated samples</li> <li><code>entropy</code> - uses entropy to reduce duplicated samples (work in progress)</li> <li><code>gzip</code> - uses gzip compressed size to reduce duplicated samples (work in progress)</li> </ul> <p>To launch any of them you can run and substitute <code>method-name</code> with one of the above:</p> <pre><code>video_sampler buffer `method-name` ...other options\n</code></pre> <p>e.g.</p> <pre><code>video_sampler buffer entropy --buffer-size 20 ...\n</code></pre> <p>where <code>buffer-size</code> for <code>entropy</code> and <code>gzip</code> mean the top-k sliding buffer size. Sliding buffer also uses hashing to reduce duplicated samples.</p>"},{"location":"#gating","title":"Gating","text":"<p>Aside from basic sampling rules, you can also apply gating rules to the sampled frames, further reducing the number of frames. There are 3 gating methods available:</p> <ul> <li><code>pass</code> - pass all frames</li> <li><code>clip</code> - use CLIP to filter out frames that do not contain the specified objects</li> <li><code>blur</code> - use blur detection to filter out frames that are too blurry</li> </ul> <p>Here's a quick example of how to use clip:</p> <pre><code>python3 -m video_sampler clip ./videos ./scratch/clip --pos-samples \"a cat\" --neg-samples \"empty background, a lemur\"  --hash-size 4\n</code></pre>"},{"location":"#clip-based-gating-comparison","title":"CLIP-based gating comparison","text":"<p>Here's a brief comparison of the frames sampled with and without CLIP-based gating with the following config:</p> <pre><code>  gate_def = dict(\n      type=\"clip\",\n      pos_samples=[\"a cat\"],\n      neg_samples=[\n          \"an empty background\",\n          \"text on screen\",\n          \"a forest with no animals\",\n      ],\n      model_name=\"ViT-B-32\",\n      batch_size=32,\n      pos_margin=0.2,\n      neg_margin=0.3,\n  )\n</code></pre> <p>Evidently, CLIP-based gating is able to filter out frames that do not contain a cat and in consequence, reduce the number of frames with plain background. It also thinks that a lemur is a cat, which is not entirely wrong as fluffy creatures go.</p> Pass gate (no gating) CLIP gate Grid <p>The effects of gating in numbers, for this particular set of examples (see <code>produced</code> vs <code>gated</code> columns). <code>produced</code> represents the number of frames sampled without gating, here after the perceptual hashing, while <code>gated</code> represents the number of frames sampled after gating.</p> video buffer gate decoded produced gated FatCat.mp4 grid pass 179 31 31 SmolCat.mp4 grid pass 118 24 24 HighLemurs.mp4 grid pass 161 35 35 FatCat.mp4 hash pass 179 101 101 SmolCat.mp4 hash pass 118 61 61 HighLemurs.mp4 hash pass 161 126 126 FatCat.mp4 hash clip 179 101 73 SmolCat.mp4 hash clip 118 61 31 HighLemurs.mp4 hash clip 161 126 66"},{"location":"#blur-gating","title":"Blur gating","text":"<p>Helps a little with blurry videos. Adjust threshold and method (<code>laplacian</code> or <code>fft</code>) for best results. Some results from <code>fft</code> at <code>threshold=20</code>:</p> video buffer gate decoded produced gated MadLad.mp4 grid pass 120 31 31 MadLad.mp4 hash pass 120 110 110 MadLad.mp4 hash blur 120 110 85"},{"location":"#benchmarks","title":"Benchmarks","text":"<p>Configuration for this benchmark:</p> <pre><code>SamplerConfig(min_frame_interval_sec=1.0, keyframes_only=True, buffer_size=30, hash_size=X, queue_wait=0.1, debug=True)\n</code></pre> Video Total frames Hash size Decoded Saved SmolCat 2936 8 118 106 SmolCat - 4 - 61 Fat Cat 4462 8 179 163 Fat Cat - 4 - 101 HighLemurs 4020 8 161 154 HighLemurs - 4 - 126 <pre><code>SamplerConfig(\n    min_frame_interval_sec=1.0,\n    keyframes_only=True,\n    queue_wait=0.1,\n    debug=False,\n    print_stats=True,\n    buffer_config={'type': 'entropy'/'gzip', 'size': 30, 'debug': False, 'hash_size': 8, 'expiry': 50}\n)\n</code></pre> Video Total frames Type Decoded Saved SmolCat 2936 entropy 118 39 SmolCat - gzip - 39 Fat Cat 4462 entropy 179 64 Fat Cat - gzip - 73 HighLemurs 4020 entropy 161 59 HighLemurs - gzip - 63"},{"location":"#benchmark-videos","title":"Benchmark videos","text":"<ul> <li>SmolCat</li> <li>Fat Cat</li> <li>HighLemurs</li> <li>MadLad</li> </ul>"},{"location":"#flit-commands","title":"Flit commands","text":""},{"location":"#build","title":"Build","text":"<pre><code>flit build\n</code></pre>"},{"location":"#install","title":"Install","text":"<pre><code>flit install\n</code></pre>"},{"location":"#publish","title":"Publish","text":"<p>Remember to bump the version in <code>pyproject.toml</code> before publishing.</p> <pre><code>flit publish\n</code></pre>"},{"location":"#license","title":"\ud83d\udee1 License","text":"<p>This project is licensed under the terms of the <code>MIT</code> license. See LICENSE for more details.</p>"},{"location":"#citation","title":"\ud83d\udcc3 Citation","text":"<pre><code>@misc{video-sampler,\n  author = {video-sampler},\n  title = {Video sampler allows you to efficiently sample video frames},\n  year = {2023},\n  publisher = {GitHub},\n  journal = {GitHub repository},\n  howpublished = {\\url{https://github.com/LemurPwned/video-sampler}}\n}\n</code></pre>"},{"location":"reference/video_sampler/buffer/","title":"Video sampler","text":""},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.EntropyByffer","title":"<code>EntropyByffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>Measure image entropy as a function of the image usability</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class EntropyByffer(FrameBuffer):\n    \"\"\"Measure image entropy as a function of the image usability\"\"\"\n\n    def __init__(\n        self, size: int, expiry: int, debug_flag: bool = False, hash_size: int = 8\n    ) -&gt; None:\n        self.sliding_top_k_buffer = SlidingTopKBuffer(\n            size=size, expiry=expiry, debug_flag=debug_flag, hash_size=hash_size\n        )\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return self.sliding_top_k_buffer.get_buffer_state()\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        entropy = item.entropy()\n        return self.sliding_top_k_buffer.add(item, {**metadata, \"index\": -entropy})\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        return self.sliding_top_k_buffer.final_flush()\n\n    def clear(self):\n        self.sliding_top_k_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer","title":"<code>FrameBuffer</code>","text":"<p>             Bases: <code>ABC</code></p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class FrameBuffer(ABC):\n    @abstractmethod\n    def add(self, item: Image.Image, metadata: dict[str, Any]) -&gt; None | tuple:\n        pass\n\n    @abstractmethod\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        \"\"\"Flush the buffer and return the remaining items\"\"\"\n        pass\n\n    @abstractmethod\n    def get_buffer_state(self) -&gt; list[str]:\n        \"\"\"Return the current state of the buffer\"\"\"\n        pass\n\n    @abstractmethod\n    def clear(self):\n        \"\"\"Clear the buffer\"\"\"\n        pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer.clear","title":"<code>clear()</code>  <code>abstractmethod</code>","text":"<p>Clear the buffer</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@abstractmethod\ndef clear(self):\n    \"\"\"Clear the buffer\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer.final_flush","title":"<code>final_flush()</code>  <code>abstractmethod</code>","text":"<p>Flush the buffer and return the remaining items</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@abstractmethod\ndef final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n    \"\"\"Flush the buffer and return the remaining items\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer.get_buffer_state","title":"<code>get_buffer_state()</code>  <code>abstractmethod</code>","text":"<p>Return the current state of the buffer</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@abstractmethod\ndef get_buffer_state(self) -&gt; list[str]:\n    \"\"\"Return the current state of the buffer\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.GridBuffer","title":"<code>GridBuffer</code>","text":"<p>             Bases: <code>HashBuffer</code></p> <p>A class representing a grid-based buffer for images. Splits the image into a grid and stores the hashes of the grid cells in a mosaic buffer.</p> <p>Parameters:</p> Name Type Description Default <code>size</code> <code>int</code> <p>The maximum size of the buffer.</p> required <code>debug_flag</code> <code>bool</code> <p>A flag indicating whether debug information should be printed.</p> <code>False</code> <code>hash_size</code> <code>int</code> <p>The size of the hash.</p> <code>4</code> <code>grid_x</code> <code>int</code> <p>The number of grid cells in the x-axis.</p> <code>4</code> <code>grid_y</code> <code>int</code> <p>The number of grid cells in the y-axis.</p> <code>4</code> <code>max_hits</code> <code>int</code> <p>The maximum number of hits allowed for a hash.</p> <code>1</code> <p>Attributes:</p> Name Type Description <code>grid_x</code> <code>int</code> <p>The number of grid cells in the x-axis.</p> <code>grid_y</code> <code>int</code> <p>The number of grid cells in the y-axis.</p> <code>max_hits</code> <code>int</code> <p>The maximum number of hits allowed for a hash.</p> <code>mosaic_buffer</code> <code>dict</code> <p>A dictionary storing the mosaic buffer.</p> <p>Methods:</p> Name Description <code>add</code> <p>Adds an image to the buffer along with its metadata.</p> <code>clear</code> <p>Clears the buffer and the mosaic buffer.</p> <code>update_ttl_buffer</code> <p>Updates the buffer by expiring images that are not in the grid.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class GridBuffer(HashBuffer):\n    \"\"\"\n    A class representing a grid-based buffer for images.\n    Splits the image into a grid and stores the hashes of the grid cells in a mosaic buffer.\n\n    Args:\n        size (int): The maximum size of the buffer.\n        debug_flag (bool, optional): A flag indicating whether debug information should be printed.\n        hash_size (int, optional): The size of the hash.\n        grid_x (int, optional): The number of grid cells in the x-axis.\n        grid_y (int, optional): The number of grid cells in the y-axis.\n        max_hits (int, optional): The maximum number of hits allowed for a hash.\n\n    Attributes:\n        grid_x (int): The number of grid cells in the x-axis.\n        grid_y (int): The number of grid cells in the y-axis.\n        max_hits (int): The maximum number of hits allowed for a hash.\n        mosaic_buffer (dict): A dictionary storing the mosaic buffer.\n\n    Methods:\n        add(item, metadata):\n            Adds an image to the buffer along with its metadata.\n        clear():\n            Clears the buffer and the mosaic buffer.\n        update_ttl_buffer():\n            Updates the buffer by expiring images that are not in the grid.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        debug_flag: bool = False,\n        hash_size: int = 4,\n        grid_x: int = 4,\n        grid_y: int = 4,\n        max_hits: int = 1,\n    ) -&gt; None:\n        super().__init__(size, debug_flag, hash_size)\n        self.grid_x = grid_x\n        self.grid_y = grid_y\n        self.max_hits = max_hits\n        self.mosaic_buffer = {}\n\n    def __get_grid_hash(self, item: Image.Image) -&gt; str:\n        \"\"\"Compute grid hashes for a given image\"\"\"\n        for x in range(self.grid_x):\n            for y in range(self.grid_y):\n                yield str(\n                    phash(\n                        item.crop(\n                            (\n                                x * item.width / self.grid_x,\n                                y * item.height / self.grid_y,\n                                (x + 1) * item.width / self.grid_x,\n                                (y + 1) * item.height / self.grid_y,\n                            )\n                        ),\n                        hash_size=self.hash_size,\n                    )\n                )\n\n    def _check_mosaic(self, mosaic_hash: str):\n        return mosaic_hash in self.mosaic_buffer\n\n    def update_ttl_buffer(self):\n        # expire the images that are not in the grid\n        if len(self.ordered_buffer) &gt;= self.max_size:\n            to_return_hash, return_data = self.ordered_buffer.popitem(last=False)\n            if to_return_hash is not None:\n                removal_keys = [\n                    img_hash\n                    for img_hash, mosaic_hash in self.mosaic_buffer.items()\n                    if mosaic_hash == to_return_hash\n                ]\n                for key in removal_keys:\n                    del self.mosaic_buffer[key]\n            return return_data\n        return None\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        hash_ = str(phash(item, hash_size=self.hash_size))\n        if not self._check_duplicate(hash_):\n            # not automatically rejected, check the mosaic buffer\n            hash_hits = 0\n            hash_sets = []\n            for el_hash_ in self.__get_grid_hash(item):\n                if el_hash_ in self.mosaic_buffer:\n                    hash_hits += 1\n                hash_sets.append(el_hash_)\n\n            if hash_hits &lt; self.max_hits:\n                # add image hash to the ttl counter\n                self.ordered_buffer[hash_] = (item, metadata)\n                # add the image to the mosaic buffer\n                # this also automatically overwrites the deleted hashes\n                for el_hash in hash_sets:\n                    self.mosaic_buffer[el_hash] = hash_\n\n            if self.debug_flag:\n                console.print(\n                    f\"\\tHash hits: {hash_hits}\"\n                    f\"\\tHash sets: {len(hash_sets)}\"\n                    f\"\\tHash buffer: {len(self.get_buffer_state())}\"\n                    f\"\\tMosaic buffer: {len(self.mosaic_buffer)}\"\n                )\n        return self.update_ttl_buffer()\n\n    def clear(self):\n        super().clear()\n        self.mosaic_buffer = {}\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.GridBuffer.__get_grid_hash","title":"<code>__get_grid_hash(item)</code>","text":"<p>Compute grid hashes for a given image</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>def __get_grid_hash(self, item: Image.Image) -&gt; str:\n    \"\"\"Compute grid hashes for a given image\"\"\"\n    for x in range(self.grid_x):\n        for y in range(self.grid_y):\n            yield str(\n                phash(\n                    item.crop(\n                        (\n                            x * item.width / self.grid_x,\n                            y * item.height / self.grid_y,\n                            (x + 1) * item.width / self.grid_x,\n                            (y + 1) * item.height / self.grid_y,\n                        )\n                    ),\n                    hash_size=self.hash_size,\n                )\n            )\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.GzipBuffer","title":"<code>GzipBuffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>Measure compression size as a function of the image usability</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class GzipBuffer(FrameBuffer):\n    \"\"\"Measure compression size as a function of the image usability\"\"\"\n\n    def __init__(\n        self, size: int, expiry: int, debug_flag: bool = False, hash_size: int = 8\n    ) -&gt; None:\n        self.sliding_top_k_buffer = SlidingTopKBuffer(\n            size=size, expiry=expiry, debug_flag=debug_flag, hash_size=hash_size\n        )\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return self.sliding_top_k_buffer.get_buffer_state()\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        compressed_l = len(gzip.compress(item.tobytes()))\n        return self.sliding_top_k_buffer.add(item, {**metadata, \"index\": -compressed_l})\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        return self.sliding_top_k_buffer.final_flush()\n\n    def clear(self):\n        self.sliding_top_k_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.HashBuffer","title":"<code>HashBuffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>A buffer that stores frames with their corresponding metadata and checks for duplicates based on image hashes. Args:     size (int): The maximum size of the buffer.     debug_flag (bool, optional): Flag indicating whether to enable debug mode. Defaults to False.     hash_size (int, optional): The size of the image hash. Defaults to 4.</p> <p>Methods:</p> Name Description <code>get_buffer_state</code> <p>Returns the current state of the buffer as a list of image hashes.</p> <code>add</code> <p>Image.Image, metadata: dict[str, Any]) Adds an item to the buffer along with its metadata.</p> <code>final_flush</code> <p>Yields the stored items and their metadata in the buffer.</p> Private Methods <p>__add(item: Image.Image, hash_: str, metadata: dict)     Adds an item to the buffer with the given hash and metadata.</p> <p>__check_duplicate(hash_: str) -&gt; bool:     Checks if the given hash already exists in the buffer and renews its validity if found.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class HashBuffer(FrameBuffer):\n    \"\"\"\n    A buffer that stores frames with their corresponding metadata and\n    checks for duplicates based on image hashes.\n    Args:\n        size (int): The maximum size of the buffer.\n        debug_flag (bool, optional): Flag indicating whether to enable debug mode. Defaults to False.\n        hash_size (int, optional): The size of the image hash. Defaults to 4.\n\n    Methods:\n        get_buffer_state() -&gt; list[str]:\n            Returns the current state of the buffer as a list of image hashes.\n\n        add(item: Image.Image, metadata: dict[str, Any])\n            Adds an item to the buffer along with its metadata.\n\n        final_flush() -&gt; Iterable[tuple[Image.Image | None, dict]]:\n            Yields the stored items and their metadata in the buffer.\n\n        clear()\n            Clears the buffer.\n\n    Private Methods:\n        __add(item: Image.Image, hash_: str, metadata: dict)\n            Adds an item to the buffer with the given hash and metadata.\n\n        __check_duplicate(hash_: str) -&gt; bool:\n            Checks if the given hash already exists in the buffer and renews its validity if found.\n\n    \"\"\"\n\n    def __init__(self, size: int, debug_flag: bool = False, hash_size: int = 4) -&gt; None:\n        self.ordered_buffer = OrderedDict()\n        self.max_size = size\n        self.debug_flag = debug_flag\n        self.hash_size = hash_size\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return list(self.ordered_buffer.keys())\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        hash_ = str(phash(item, hash_size=self.hash_size))\n        if not self._check_duplicate(hash_):\n            return self.__add(hash_, item, metadata)\n        return None\n\n    def __add(self, hash_: str, item: Image.Image, metadata: dict):\n        self.ordered_buffer[hash_] = (item, metadata)\n        if len(self.ordered_buffer) &gt;= self.max_size:\n            return self.ordered_buffer.popitem(last=False)[1]\n        return None\n\n    def _check_duplicate(self, hash_: str) -&gt; bool:\n        if hash_ in self.ordered_buffer:\n            # renew the hash validity\n            if self.debug_flag:\n                console.print(\n                    f\"Renewing {hash_}\",\n                    style=f\"bold {Color.red.value}\",\n                )\n            self.ordered_buffer.move_to_end(hash_)\n            return True\n        return False\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        yield from self.ordered_buffer.values()\n\n    def clear(self):\n        self.ordered_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.SamplerConfig","title":"<code>SamplerConfig</code>  <code>dataclass</code>","text":"<p>Configuration options for the video sampler.</p> <p>Parameters:</p> Name Type Description Default <code>min_frame_interval_sec</code> <code>float</code> <p>The minimum time interval between sampled frames in seconds. Defaults to 1.</p> <code>1</code> <code>keyframes_only</code> <code>bool</code> <p>Flag indicating whether to sample only keyframes. Defaults to True.</p> <code>True</code> <code>queue_wait</code> <code>float</code> <p>The time to wait between checking the frame queue in seconds. Defaults to 0.1.</p> <code>0.1</code> <code>debug</code> <code>bool</code> <p>Flag indicating whether to enable debug mode. Defaults to False.</p> <code>False</code> <code>print_stats</code> <code>bool</code> <p>Flag indicating whether to print sampling statistics. Defaults to False.</p> <code>False</code> <code>buffer_config</code> <code>dict[str, Any]</code> <p>Configuration options for     the frame buffer. Defaults to {\"type\": \"entropy\", \"size\": 15,     \"debug\": True}.</p> <code>field(default_factory=lambda : {'type': 'hash', 'hash_size': 8, 'size': 15, 'debug': True})</code> <code>gate_config</code> <code>dict[str, Any]</code> <p>Configuration options for     the frame gate. Defaults to {\"type\": \"pass\"}.</p> <code>field(default_factory=lambda : {'type': 'pass'})</code> <p>Methods:</p> Name Description <code>__str__</code> <p>Returns a string representation of the configuration.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@dataclass\nclass SamplerConfig:\n    \"\"\"\n    Configuration options for the video sampler.\n\n    Args:\n        min_frame_interval_sec (float, optional): The minimum time interval\n            between sampled frames in seconds. Defaults to 1.\n        keyframes_only (bool, optional): Flag indicating whether to\n            sample only keyframes. Defaults to True.\n        queue_wait (float, optional): The time to wait between checking\n            the frame queue in seconds. Defaults to 0.1.\n        debug (bool, optional): Flag indicating whether to enable debug mode.\n            Defaults to False.\n        print_stats (bool, optional): Flag indicating whether to print\n            sampling statistics. Defaults to False.\n        buffer_config (dict[str, Any], optional): Configuration options for\n                the frame buffer. Defaults to {\"type\": \"entropy\", \"size\": 15,\n                \"debug\": True}.\n        gate_config (dict[str, Any], optional): Configuration options for\n                the frame gate. Defaults to {\"type\": \"pass\"}.\n\n    Methods:\n        __str__() -&gt; str:\n            Returns a string representation of the configuration.\n\n    \"\"\"\n\n    min_frame_interval_sec: float = 1\n    keyframes_only: bool = True\n    queue_wait: float = 0.1\n    debug: bool = False\n    print_stats: bool = False\n    buffer_config: dict[str, Any] = field(\n        default_factory=lambda: {\n            \"type\": \"hash\",\n            \"hash_size\": 8,\n            \"size\": 15,\n            \"debug\": True,\n        }\n    )\n    gate_config: dict[str, Any] = field(\n        default_factory=lambda: {\n            \"type\": \"pass\",\n        }\n    )\n\n    def __str__(self) -&gt; str:\n        return str(asdict(self))\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.SlidingTopKBuffer","title":"<code>SlidingTopKBuffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>A class representing a sliding top-k buffer for frames.</p> <p>Parameters:</p> Name Type Description Default <code>size</code> <code>int</code> <p>The maximum size of the buffer.</p> required <code>debug_flag</code> <code>bool</code> <p>A flag indicating whether debug information should be printed.</p> <code>False</code> <code>expiry</code> <code>int</code> <p>The expiry count for frames.</p> <code>30</code> <code>hash_size</code> <code>int</code> <p>The size of the hash.</p> <code>8</code> <p>Attributes:</p> Name Type Description <code>sliding_buffer</code> <code>list</code> <p>The sliding buffer implemented as a min heap.</p> <code>max_size</code> <code>int</code> <p>The maximum size of the buffer.</p> <code>debug_flag</code> <code>bool</code> <p>A flag indicating whether debug information should be printed.</p> <code>expiry_count</code> <code>int</code> <p>The expiry count for frames.</p> <code>hash_size</code> <code>int</code> <p>The size of the hash.</p> <p>Methods:</p> Name Description <code>get_buffer_state</code> <p>Returns the current state of the buffer.</p> <code>add</code> <p>Adds a frame to the buffer along with its metadata.</p> <code>final_flush</code> <p>Performs a final flush of the buffer and yields the remaining frames.</p> <code>clear</code> <p>Clears the buffer.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class SlidingTopKBuffer(FrameBuffer):\n    \"\"\"\n    A class representing a sliding top-k buffer for frames.\n\n    Args:\n        size (int): The maximum size of the buffer.\n        debug_flag (bool, optional): A flag indicating whether debug information should be printed.\n        expiry (int, optional): The expiry count for frames.\n        hash_size (int, optional): The size of the hash.\n\n    Attributes:\n        sliding_buffer (list): The sliding buffer implemented as a min heap.\n        max_size (int): The maximum size of the buffer.\n        debug_flag (bool): A flag indicating whether debug information should be printed.\n        expiry_count (int): The expiry count for frames.\n        hash_size (int): The size of the hash.\n\n    Methods:\n        get_buffer_state() -&gt; list[str]:\n            Returns the current state of the buffer.\n        add(item, metadata):\n            Adds a frame to the buffer along with its metadata.\n        final_flush() -&gt; Iterable[tuple[Image.Image | None, dict]]:\n            Performs a final flush of the buffer and yields the remaining frames.\n        clear():\n            Clears the buffer.\n\n    \"\"\"\n\n    def __init__(\n        self, size: int, debug_flag: bool = False, expiry: int = 30, hash_size: int = 8\n    ) -&gt; None:\n        # it's a min heap with a fixed size\n        self.sliding_buffer = []\n        self.max_size = size\n        self.debug_flag = debug_flag\n        self.expiry_count = expiry\n        self.hash_size = hash_size\n        assert (\n            self.expiry_count &gt; self.max_size\n        ), \"expiry count must be greater than max size\"\n        console.print(\n            f\"Creating sliding buffer of size {self.max_size} and expiry {expiry}\",\n            style=f\"bold {Color.red.value}\",\n        )\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return [item[:3] for item in self.sliding_buffer]\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        assert \"index\" in metadata, \"metadata must have index key for sliding buffer\"\n        average_hash_ = str(average_hash(item, hash_size=self.hash_size))\n        to_return = None\n        if not self.__check_duplicate(average_hash_):\n            heapq.heappush(\n                self.sliding_buffer,\n                [metadata[\"index\"], 0, average_hash_, item, metadata],\n            )\n            if len(self.sliding_buffer) &gt;= self.max_size:\n                to_return = heapq.heappop(self.sliding_buffer)[-2:]\n        # update the expiry count\n        expired_indx = -1\n        for i in range(len(self.sliding_buffer)):\n            self.sliding_buffer[i][1] += 1\n            if self.sliding_buffer[i][1] &gt;= self.expiry_count:\n                expired_indx = i\n        # at any point only one item can be expired\n        if expired_indx != -1:\n            self.sliding_buffer.pop(expired_indx)  # just drop\n        return to_return\n\n    def __check_duplicate(self, hash_: str) -&gt; bool:\n        for item in self.sliding_buffer:\n            if item[2] == hash_:\n                # renew the hash validity\n                if self.debug_flag:\n                    console.print(\n                        f\"Renewing {hash_}\",\n                        style=f\"bold {Color.red.value}\",\n                    )\n                item[1] = 0\n                return True\n        return False\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        if len(self.sliding_buffer):\n            yield heapq.heappop(self.sliding_buffer)[-2:]\n        yield None, {}\n\n    def clear(self):\n        self.sliding_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.create_buffer","title":"<code>create_buffer(buffer_config)</code>","text":"<p>Create a buffer based on the config</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>def create_buffer(buffer_config: dict[str, Any]):\n    \"\"\"Create a buffer based on the config\"\"\"\n    console.print(\n        f\"Creating buffer of type {buffer_config['type']}\",\n        style=f\"bold {Color.red.value}\",\n    )\n    if buffer_config[\"type\"] == \"hash\":\n        return HashBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n        )\n    elif buffer_config[\"type\"] == \"grid\":\n        return GridBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n            grid_x=buffer_config[\"grid_x\"],\n            grid_y=buffer_config[\"grid_y\"],\n            max_hits=buffer_config[\"max_hits\"],\n        )\n    elif buffer_config[\"type\"] == \"sliding_top_k\":\n        return SlidingTopKBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            expiry=buffer_config[\"expiry\"],\n        )\n    elif buffer_config[\"type\"] == \"passthrough\":\n        return PassThroughBuffer()\n    elif buffer_config[\"type\"] == \"gzip\":\n        return GzipBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n            expiry=buffer_config[\"expiry\"],\n        )\n    elif buffer_config[\"type\"] == \"entropy\":\n        return EntropyByffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n            expiry=buffer_config[\"expiry\"],\n        )\n    else:\n        raise ValueError(f\"Unknown buffer type {buffer_config['type']}\")\n</code></pre>"},{"location":"reference/video_sampler/evaluation/","title":"Evaluation","text":""},{"location":"reference/video_sampler/evaluation/#video_sampler.evaluation.compute_total_video_entropy","title":"<code>compute_total_video_entropy()</code>","text":"<p>Compute the total entropy of a video</p> Source code in <code>video_sampler/evaluation.py</code> <pre><code>def compute_total_video_entropy():\n    \"\"\"Compute the total entropy of a video\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/gating/","title":"Gating","text":""},{"location":"reference/video_sampler/gating/#video_sampler.gating.BlurGate","title":"<code>BlurGate</code>","text":"Source code in <code>video_sampler/gating.py</code> <pre><code>class BlurGate:\n    def __init__(\n        self, method: Literal[\"fft\", \"laplacian\"] = \"laplacian\", threshold: float = 100\n    ) -&gt; None:\n        \"\"\"Gate frames based on bluriness.\n        :param method: The method to use for blur detection. Can be \"fft\" or \"laplacian\".\n        :param threshold: The threshold for bluriness. The higher the threshold, the less\n            blurry the image needs to be to be discarded.\n            Those are different depending on the method:\n            - 20 is a good start for fft\n            - 100 is a good start for laplacian.\n        \"\"\"\n        self.is_blurry = None\n        if method == \"fft\":\n            self.is_blurry = self._is_blurry_fft\n        elif method == \"laplacian\":\n            self.is_blurry = self._is_blurry_laplacian\n        else:\n            raise ValueError(f\"Unknown blur method {method}\")\n        self.threshold = threshold\n\n    def __call__(self, frame: Image.Image, meta: dict, last=False) -&gt; GatedObject:\n        if self.is_blurry(frame) or last:\n            return EMPTY_GATED_OBJECT\n        return GatedObject([FrameObject(frame, meta)], 1)\n\n    def _is_blurry_laplacian(self, frame: Image.Image) -&gt; bool:\n        \"\"\"Check if the image is blurry with laplacian method.\"\"\"\n        return (\n            cv2.Laplacian(\n                cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2GRAY), cv2.CV_64F\n            ).var()\n            &lt; self.threshold\n        )\n\n    def _is_blurry_fft(self, frame: Image.Image) -&gt; bool:\n        \"\"\"Check if the image is blurry with fft method.\"\"\"\n        f = np.fft.fft2(frame)\n        fshift = np.fft.fftshift(f)\n        magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-12)\n        return magnitude_spectrum.mean() &lt; self.threshold\n\n    def flush(self):\n        return EMPTY_GATED_OBJECT\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.BlurGate.__init__","title":"<code>__init__(method='laplacian', threshold=100)</code>","text":"<p>Gate frames based on bluriness. :param method: The method to use for blur detection. Can be \"fft\" or \"laplacian\". :param threshold: The threshold for bluriness. The higher the threshold, the less     blurry the image needs to be to be discarded.     Those are different depending on the method:     - 20 is a good start for fft     - 100 is a good start for laplacian.</p> Source code in <code>video_sampler/gating.py</code> <pre><code>def __init__(\n    self, method: Literal[\"fft\", \"laplacian\"] = \"laplacian\", threshold: float = 100\n) -&gt; None:\n    \"\"\"Gate frames based on bluriness.\n    :param method: The method to use for blur detection. Can be \"fft\" or \"laplacian\".\n    :param threshold: The threshold for bluriness. The higher the threshold, the less\n        blurry the image needs to be to be discarded.\n        Those are different depending on the method:\n        - 20 is a good start for fft\n        - 100 is a good start for laplacian.\n    \"\"\"\n    self.is_blurry = None\n    if method == \"fft\":\n        self.is_blurry = self._is_blurry_fft\n    elif method == \"laplacian\":\n        self.is_blurry = self._is_blurry_laplacian\n    else:\n        raise ValueError(f\"Unknown blur method {method}\")\n    self.threshold = threshold\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.ClipGate","title":"<code>ClipGate</code>","text":"Source code in <code>video_sampler/gating.py</code> <pre><code>class ClipGate:\n    def __init__(\n        self,\n        pos_samples: list[str] = None,\n        neg_samples: list[str] = None,\n        model_name: str = \"ViT-B-32\",\n        batch_size: int = 32,\n        pos_margin: float = 0.2,\n        neg_margin: float = 0.3,\n    ) -&gt; None:\n        self.model, self.preprocess, self.tokenizer = create_model(\n            model_name=model_name\n        )\n        self.pos_margin = pos_margin\n        self.neg_margin = neg_margin\n        self.batch_size = batch_size\n        self.frame_accumulator = []\n        self.metadata_accumulator = []\n        if pos_samples is None:\n            self.pos_samples = torch.zeros((1, 512))\n        else:\n            self.pos_samples = self._preproc_samples(pos_samples)\n        if neg_samples is None:\n            self.neg_samples = torch.zeros((1, 512))\n        else:\n            self.neg_samples = self._preproc_samples(neg_samples)\n\n    def __call__(self, frame: Image.Image, meta: dict, last=False) -&gt; Any:\n        return self.flush() if last else self.add_frame(frame, meta)\n\n    def _preproc_samples(self, sample_texts: list[str]):\n        inputs = self.tokenizer(sample_texts)\n        embeds = torch.zeros((len(sample_texts), 512))\n        with torch.no_grad():\n            for i, batch in enumerate(batched(inputs, n=self.batch_size)):\n                batch = torch.stack(batch)\n                text_embeds = self.model.encode_text(batch.to(DEVICE))\n                embeds[i * self.batch_size : (i + 1) * self.batch_size] = (\n                    text_embeds.cpu()\n                )\n        embeds /= embeds.norm(dim=-1, keepdim=True)\n        return embeds\n\n    def _embed_frames(self, frames: list[Image.Image]):\n        \"\"\"Compute the embeddings for each frame.\"\"\"\n        inputs = torch.stack([self.preprocess(frame) for frame in frames]).to(DEVICE)\n        with torch.no_grad():\n            image_embeds = self.model.encode_image(inputs).cpu()\n            image_embeds /= image_embeds.norm(dim=-1, keepdim=True)\n        return image_embeds\n\n    def _get_margins(self, frame_embeds: torch.Tensor):\n        \"\"\"Compute the margins for each frame.\"\"\"\n        org_indx = np.arange(frame_embeds.shape[0])\n        neg_distance = frame_embeds @ self.neg_samples.T\n        pos_distance = frame_embeds @ self.pos_samples.T\n        neg_margin, _ = neg_distance.max(axis=-1)\n        pos_margin, _ = pos_distance.max(axis=-1)\n        incl_samples = torch.argwhere(\n            (neg_margin &lt; self.neg_margin) &amp; (pos_margin &gt;= self.pos_margin)\n        )\n        return org_indx[incl_samples].ravel()\n\n    def add_frame(self, frame: Image.Image, metadata: dict) -&gt; GatedObject:\n        self.frame_accumulator.append(frame)\n        self.metadata_accumulator.append(metadata)\n        if len(self.frame_accumulator) == self.batch_size:\n            return self.__process_metadata()\n        return EMPTY_GATED_OBJECT\n\n    def flush(self):\n        return self.__process_metadata()\n\n    def __process_metadata(self) -&gt; GatedObject:\n        frame_embeds = self._embed_frames(self.frame_accumulator)\n        selected_frames = self._get_margins(frame_embeds)\n        to_return = [\n            FrameObject(self.frame_accumulator[i], self.metadata_accumulator[i])\n            for i in range(len(self.frame_accumulator))\n            if i in selected_frames\n        ]\n        self.frame_accumulator.clear()\n        self.metadata_accumulator.clear()\n        return GatedObject(to_return, len(selected_frames))\n</code></pre>"},{"location":"reference/video_sampler/iterators/","title":"Iterators","text":""},{"location":"reference/video_sampler/logging/","title":"Logging","text":""},{"location":"reference/video_sampler/sampler/","title":"Sampler","text":""},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.VideoSampler","title":"<code>VideoSampler</code>","text":"<p>The fundamental class for sampling video frames.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>SamplerConfig</code> <p>The configuration for the video sampler.</p> required <p>Attributes:</p> Name Type Description <code>cfg</code> <code>SamplerConfig</code> <p>The configuration for the video sampler.</p> <code>frame_buffer</code> <code>FrameBuffer</code> <p>The frame buffer used for sampling frames.</p> <code>gate</code> <code>Gate</code> <p>The gate used for filtering frames.</p> <code>stats</code> <code>Counter</code> <p>A counter for tracking statistics.</p> <p>Methods:</p> Name Description <code>sample</code> <p>Generates sample frames from a video.</p> <code>write_queue</code> <p>Writes sampled frames to a queue.</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>class VideoSampler:\n    \"\"\"\n    The fundamental class for sampling video frames.\n\n    Args:\n        cfg (SamplerConfig): The configuration for the video sampler.\n\n    Attributes:\n        cfg (SamplerConfig): The configuration for the video sampler.\n        frame_buffer (FrameBuffer): The frame buffer used for sampling frames.\n        gate (Gate): The gate used for filtering frames.\n        stats (Counter): A counter for tracking statistics.\n\n    Methods:\n        sample(video_path) -&gt; Iterable[list[FrameObject]]:\n            Generates sample frames from a video.\n        write_queue(video_path, q):\n            Writes sampled frames to a queue.\n\n    \"\"\"\n\n    def __init__(self, cfg: SamplerConfig) -&gt; None:\n        self.cfg = deepcopy(cfg)\n        self.frame_buffer = create_buffer(self.cfg.buffer_config)\n        self.gate = create_gate(self.cfg.gate_config)\n        self.stats = Counter()\n\n    def sample(self, video_path: str) -&gt; Iterable[list[FrameObject]]:\n        \"\"\"Generate sample frames from a video\"\"\"\n        self.stats.clear()\n        self.frame_buffer.clear()\n        with av.open(video_path) as container:\n            stream = container.streams.video[0]\n            if self.cfg.keyframes_only:\n                stream.codec_context.skip_frame = \"NONKEY\"\n            prev_time = -10\n            for frame_indx, frame in enumerate(container.decode(stream)):\n                # skip frames if keyframes_only is True\n                time_diff = frame.time - prev_time\n                self.stats[\"total\"] += 1\n                if time_diff &lt; self.cfg.min_frame_interval_sec:\n                    continue\n                prev_time = frame.time\n\n                frame_pil: Image = frame.to_image()\n                if self.cfg.debug:\n                    buf = self.frame_buffer.get_buffer_state()\n                    console.print(\n                        f\"Frame {frame_indx}\\ttime: {frame.time}\",\n                        f\"\\t Buffer ({len(buf)}): {buf}\",\n                        style=f\"bold {Color.green.value}\",\n                    )\n                frame_meta = {\"frame_time\": frame.time, \"frame_indx\": frame_indx}\n                self.stats[\"decoded\"] += 1\n                if res := self.frame_buffer.add(\n                    frame_pil,\n                    metadata=frame_meta,\n                ):\n                    gated_obj = self.gate(*res)\n                    self.stats[\"produced\"] += 1\n                    self.stats[\"gated\"] += gated_obj.N\n                    if gated_obj.frames:\n                        yield gated_obj.frames\n\n        # flush buffer\n        for res in self.frame_buffer.final_flush():\n            if res:\n                self.stats[\"produced\"] += 1\n                gated_obj = self.gate(*res)\n                self.stats[\"gated\"] += gated_obj.N\n                if gated_obj.frames:\n                    yield gated_obj.frames\n        gated_obj = self.gate.flush()\n        self.stats[\"gated\"] += gated_obj.N\n        if gated_obj.frames:\n            yield gated_obj.frames\n        yield PROCESSING_DONE_ITERABLE\n\n    def write_queue(self, video_path: str, q: Queue):\n        try:\n            item: tuple[FrameObject, int]\n            for item in self.sample(video_path=video_path):\n                q.put(item)\n        except (av.IsADirectoryError, av.InvalidDataError) as e:\n            console.print(\n                f\"Error while processing {video_path}\",\n                f\"\\n\\t{e}\",\n                style=f\"bold {Color.red.value}\",\n            )\n            q.put(PROCESSING_DONE_ITERABLE)\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.VideoSampler.sample","title":"<code>sample(video_path)</code>","text":"<p>Generate sample frames from a video</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>def sample(self, video_path: str) -&gt; Iterable[list[FrameObject]]:\n    \"\"\"Generate sample frames from a video\"\"\"\n    self.stats.clear()\n    self.frame_buffer.clear()\n    with av.open(video_path) as container:\n        stream = container.streams.video[0]\n        if self.cfg.keyframes_only:\n            stream.codec_context.skip_frame = \"NONKEY\"\n        prev_time = -10\n        for frame_indx, frame in enumerate(container.decode(stream)):\n            # skip frames if keyframes_only is True\n            time_diff = frame.time - prev_time\n            self.stats[\"total\"] += 1\n            if time_diff &lt; self.cfg.min_frame_interval_sec:\n                continue\n            prev_time = frame.time\n\n            frame_pil: Image = frame.to_image()\n            if self.cfg.debug:\n                buf = self.frame_buffer.get_buffer_state()\n                console.print(\n                    f\"Frame {frame_indx}\\ttime: {frame.time}\",\n                    f\"\\t Buffer ({len(buf)}): {buf}\",\n                    style=f\"bold {Color.green.value}\",\n                )\n            frame_meta = {\"frame_time\": frame.time, \"frame_indx\": frame_indx}\n            self.stats[\"decoded\"] += 1\n            if res := self.frame_buffer.add(\n                frame_pil,\n                metadata=frame_meta,\n            ):\n                gated_obj = self.gate(*res)\n                self.stats[\"produced\"] += 1\n                self.stats[\"gated\"] += gated_obj.N\n                if gated_obj.frames:\n                    yield gated_obj.frames\n\n    # flush buffer\n    for res in self.frame_buffer.final_flush():\n        if res:\n            self.stats[\"produced\"] += 1\n            gated_obj = self.gate(*res)\n            self.stats[\"gated\"] += gated_obj.N\n            if gated_obj.frames:\n                yield gated_obj.frames\n    gated_obj = self.gate.flush()\n    self.stats[\"gated\"] += gated_obj.N\n    if gated_obj.frames:\n        yield gated_obj.frames\n    yield PROCESSING_DONE_ITERABLE\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.Worker","title":"<code>Worker</code>","text":"Source code in <code>video_sampler/sampler.py</code> <pre><code>class Worker:\n    def __init__(\n        self,\n        cfg: SamplerConfig,\n        devnull: bool = False,\n        processor_cls: VideoSampler = VideoSampler,\n        extra_processor_args: dict = None,\n    ) -&gt; None:\n        if extra_processor_args is None:\n            extra_processor_args = {}\n        self.cfg = cfg\n        self.processor = processor_cls(cfg=cfg, **extra_processor_args)\n        self.q = Queue()\n        self.devnull = devnull\n\n    def launch(\n        self, video_path: str, output_path: str = \"\", pretty_video_name: str = \"\"\n    ) -&gt; None:\n        \"\"\"Launch the worker.\n        :param video_path: path to the video file\n        :param output_path: path to the output folder\n        :param pretty_video_name: name of the video file for pretty printing (useful for urls)\n        \"\"\"\n        if not pretty_video_name:\n            pretty_video_name = os.path.basename(video_path)\n        if output_path and self.devnull:\n            raise ValueError(\"Cannot write to disk when devnull is True\")\n        if output_path:\n            os.makedirs(output_path, exist_ok=True)\n        proc_thread = Thread(\n            target=self.processor.write_queue, args=(video_path, self.q)\n        )\n        proc_thread.start()\n        self.queue_reader(output_path, read_interval=self.cfg.queue_wait)\n        proc_thread.join()\n        if self.cfg.print_stats:\n            console.print(\n                f\"Stats for: {pretty_video_name}\",\n                f\"\\n\\tTotal frames: {self.processor.stats['total']}\",\n                f\"\\n\\tDecoded frames: {self.processor.stats['decoded']}\",\n                f\"\\n\\tProduced frames: {self.processor.stats['produced']}\",\n                f\"\\n\\tGated frames: {self.processor.stats['gated']}\",\n                style=f\"bold {Color.magenta.value}\",\n            )\n\n    def queue_reader(self, output_path, read_interval=0.1) -&gt; None:\n        while True:\n            if not self.q.empty():\n                frame_object: FrameObject\n                for frame_object in self.q.get():\n                    if frame_object.metadata.get(\"end\", False):\n                        return\n                    if frame_object.frame is not None and (\n                        not self.devnull and isinstance(frame_object.frame, Image.Image)\n                    ):\n                        frame_object.frame.save(\n                            os.path.join(\n                                output_path,\n                                f\"{frame_object.metadata['frame_time']}.jpg\",\n                            )\n                        )\n            time.sleep(read_interval)\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.Worker.launch","title":"<code>launch(video_path, output_path='', pretty_video_name='')</code>","text":"<p>Launch the worker. :param video_path: path to the video file :param output_path: path to the output folder :param pretty_video_name: name of the video file for pretty printing (useful for urls)</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>def launch(\n    self, video_path: str, output_path: str = \"\", pretty_video_name: str = \"\"\n) -&gt; None:\n    \"\"\"Launch the worker.\n    :param video_path: path to the video file\n    :param output_path: path to the output folder\n    :param pretty_video_name: name of the video file for pretty printing (useful for urls)\n    \"\"\"\n    if not pretty_video_name:\n        pretty_video_name = os.path.basename(video_path)\n    if output_path and self.devnull:\n        raise ValueError(\"Cannot write to disk when devnull is True\")\n    if output_path:\n        os.makedirs(output_path, exist_ok=True)\n    proc_thread = Thread(\n        target=self.processor.write_queue, args=(video_path, self.q)\n    )\n    proc_thread.start()\n    self.queue_reader(output_path, read_interval=self.cfg.queue_wait)\n    proc_thread.join()\n    if self.cfg.print_stats:\n        console.print(\n            f\"Stats for: {pretty_video_name}\",\n            f\"\\n\\tTotal frames: {self.processor.stats['total']}\",\n            f\"\\n\\tDecoded frames: {self.processor.stats['decoded']}\",\n            f\"\\n\\tProduced frames: {self.processor.stats['produced']}\",\n            f\"\\n\\tGated frames: {self.processor.stats['gated']}\",\n            style=f\"bold {Color.magenta.value}\",\n        )\n</code></pre>"},{"location":"reference/video_sampler/schemas/","title":"Schemas","text":""},{"location":"reference/video_sampler/ttl_counter/","title":"Ttl counter","text":""},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter","title":"<code>TTLCounter</code>","text":"<p>TTLCounter is a counter/list that expires items after a TTL period expires.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>class TTLCounter:\n    \"\"\"TTLCounter is a counter/list that expires items after a TTL period expires.\"\"\"\n\n    def __init__(self, max_ttl: int) -&gt; None:\n        self.inner_counter = []\n        self.max_ttl = max_ttl\n\n    def __len__(self):\n        \"\"\"Return the number of items in the counter.\"\"\"\n        return len(self.inner_counter)\n\n    def add_item(self, hash: str):\n        \"\"\"Add an item with the max TTL.\"\"\"\n        heapq.heappush(self.inner_counter, (self.max_ttl, hash))\n\n    def tick(self):\n        \"\"\"Decrease the TTL of all items by 1.\"\"\"\n        for i, (ttl, hash) in enumerate(self.inner_counter):\n            self.inner_counter[i] = (ttl - 1, hash)\n\n    def expire_one(self):\n        \"\"\"Expire the first item if its TTL is 0. Expires AT MOST one item.\"\"\"\n        # peek the first item\n        ttl, hash = self.inner_counter[0]\n        if ttl &lt;= 0:\n            heapq.heappop(self.inner_counter)\n            return hash\n        return None\n\n    def expire_all(self):\n        \"\"\"Expire all items.\"\"\"\n        for _, hash in self.inner_counter:\n            yield hash\n        self.inner_counter.clear()\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.__len__","title":"<code>__len__()</code>","text":"<p>Return the number of items in the counter.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def __len__(self):\n    \"\"\"Return the number of items in the counter.\"\"\"\n    return len(self.inner_counter)\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.add_item","title":"<code>add_item(hash)</code>","text":"<p>Add an item with the max TTL.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def add_item(self, hash: str):\n    \"\"\"Add an item with the max TTL.\"\"\"\n    heapq.heappush(self.inner_counter, (self.max_ttl, hash))\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.expire_all","title":"<code>expire_all()</code>","text":"<p>Expire all items.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def expire_all(self):\n    \"\"\"Expire all items.\"\"\"\n    for _, hash in self.inner_counter:\n        yield hash\n    self.inner_counter.clear()\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.expire_one","title":"<code>expire_one()</code>","text":"<p>Expire the first item if its TTL is 0. Expires AT MOST one item.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def expire_one(self):\n    \"\"\"Expire the first item if its TTL is 0. Expires AT MOST one item.\"\"\"\n    # peek the first item\n    ttl, hash = self.inner_counter[0]\n    if ttl &lt;= 0:\n        heapq.heappop(self.inner_counter)\n        return hash\n    return None\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.tick","title":"<code>tick()</code>","text":"<p>Decrease the TTL of all items by 1.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def tick(self):\n    \"\"\"Decrease the TTL of all items by 1.\"\"\"\n    for i, (ttl, hash) in enumerate(self.inner_counter):\n        self.inner_counter[i] = (ttl - 1, hash)\n</code></pre>"},{"location":"reference/video_sampler/utils/","title":"Utils","text":""},{"location":"reference/video_sampler/utils/#video_sampler.utils.batched","title":"<code>batched(iterable, n)</code>","text":"<p>Batch data into tuples of length n. The last batch may be shorter. from https://docs.python.org/3/library/itertools.html#itertools-recipes</p> Source code in <code>video_sampler/utils.py</code> <pre><code>def batched(iterable, n):\n    \"\"\"\n    Batch data into tuples of length n. The last batch may be shorter.\n    from https://docs.python.org/3/library/itertools.html#itertools-recipes\n    \"\"\"\n    if n &lt; 1:\n        raise ValueError(\"n must be at least one\")\n    it = iter(iterable)\n    while batch := tuple(islice(it, n)):\n        yield batch\n</code></pre>"},{"location":"reference/video_sampler/utils/#video_sampler.utils.slugify","title":"<code>slugify(value, allow_unicode=False)</code>","text":"<p>Taken from https://github.com/django/django/blob/master/django/utils/text.py Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated dashes to single dashes. Remove characters that aren't alphanumerics, underscores, or hyphens. Convert to lowercase. Also strip leading and trailing whitespace, dashes, and underscores.</p> Source code in <code>video_sampler/utils.py</code> <pre><code>def slugify(value, allow_unicode=False):\n    \"\"\"\n    Taken from https://github.com/django/django/blob/master/django/utils/text.py\n    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated\n    dashes to single dashes. Remove characters that aren't alphanumerics,\n    underscores, or hyphens. Convert to lowercase. Also strip leading and\n    trailing whitespace, dashes, and underscores.\n    \"\"\"\n    value = str(value)\n    if allow_unicode:\n        value = unicodedata.normalize(\"NFKC\", value)\n    else:\n        value = (\n            unicodedata.normalize(\"NFKD\", value)\n            .encode(\"ascii\", \"ignore\")\n            .decode(\"ascii\")\n        )\n    value = re.sub(r\"[^\\w\\s-]\", \"\", value.lower())\n    return re.sub(r\"[-\\s]+\", \"-\", value).strip(\"-_\")\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/","title":"Integrations","text":""},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin","title":"<code>YTDLPPlugin</code>","text":"<p>A plugin for yt-dlp to generate URLs and corresponding titles from the given URL. Methods:     generate_urls(url, extra_yt_constr_args=None, extra_info_extract_opts=None) -&gt; Iterable[str]:         Generates URLs and corresponding titles from the given URL.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>class YTDLPPlugin:\n    \"\"\"\n    A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.\n    Methods:\n        generate_urls(url, extra_yt_constr_args=None, extra_info_extract_opts=None) -&gt; Iterable[str]:\n            Generates URLs and corresponding titles from the given URL.\n\n    \"\"\"\n\n    def __init__(self, ie_key: str = \"Generic\"):\n        \"\"\"\n        Initialize the YTDLPPlugin instance.\n        :param ie_key (str): The key for the information extractor.\n        \"\"\"\n        self.ie_key = ie_key\n        self.ydl_opts = {\n            \"format\": best_video_only,\n        }\n\n    def generate_urls(\n        self,\n        url: str,\n        extra_info_extract_opts: dict = None,\n    ) -&gt; Iterable[str]:\n        \"\"\"Generate URLs and corresponding titles from the given URL.\n\n        :param url (str): The URL to extract information from.\n        :param extra_info_extract_opts (dict, optional): Extra options for information extraction.\n\n        :return Iterable[str]:\n            Tuple[str, str]: A tuple containing the title and URL of each extracted entry.\n        \"\"\"\n        if extra_info_extract_opts is None:\n            extra_info_extract_opts = {}\n        extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n        with YoutubeDL(params=(self.ydl_opts | extra_info_extract_opts)) as ydl:\n            info = ydl.extract_info(url, download=False, **extr_args)\n            if \"entries\" not in info:\n                req_format = info[\"requested_formats\"][0]\n                yield info[\"title\"], req_format[\"url\"]\n            else:\n                for entry in info.get(\"entries\", []):\n                    req_format = entry[\"requested_formats\"][0]\n                    yield entry[\"title\"], req_format[\"url\"]\n\n    def get_subtitles_opts(self, no_download: bool = False) -&gt; dict:\n        return {\n            \"postprocessors\": [\n                {\n                    \"format\": \"srt\",\n                    \"key\": \"FFmpegSubtitlesConvertor\",\n                    \"when\": \"before_dl\",\n                }\n            ],\n            \"format\": best_video_only,\n            \"subtitleslangs\": [\"en.*\"],\n            \"writeautomaticsub\": True,\n            \"writesubtitles\": True,\n        }\n\n    def generate_urls_by_subs(\n        self,\n        url: str,\n        extra_info_extract_opts: dict = None,\n    ):\n        \"\"\"Download subtitles for a given video URL.\n\n        :param video_url (str): The URL of the video to download subtitles for.\n        \"\"\"\n        if extra_info_extract_opts is None:\n            extra_info_extract_opts = {}\n        extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n        with YoutubeDL(\n            params=(self.ydl_opts | extra_info_extract_opts | self.get_subtitles_opts())\n        ) as ydl:\n            info = ydl.extract_info(url, download=False, **extr_args)\n            import json\n\n            json.dump(info, open(\"info.json\", \"w\"))\n            if \"entries\" not in info:\n                req_subs = list(info[\"requested_subtitles\"].values())[0]\n                req_format = info[\"requested_formats\"][0]\n                yield info[\"title\"], req_format[\"url\"], download_sub(req_subs[\"url\"])\n            else:\n                for entry in info.get(\"entries\", []):\n                    req_format = entry[\"requested_formats\"][0]\n                    req_subs = list(entry[\"requested_subtitles\"].values())[0]\n                    yield entry[\"title\"], req_format[\"url\"], download_sub(\n                        req_subs[\"url\"]\n                    )\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.__init__","title":"<code>__init__(ie_key='Generic')</code>","text":"<p>Initialize the YTDLPPlugin instance. :param ie_key (str): The key for the information extractor.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def __init__(self, ie_key: str = \"Generic\"):\n    \"\"\"\n    Initialize the YTDLPPlugin instance.\n    :param ie_key (str): The key for the information extractor.\n    \"\"\"\n    self.ie_key = ie_key\n    self.ydl_opts = {\n        \"format\": best_video_only,\n    }\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls","title":"<code>generate_urls(url, extra_info_extract_opts=None)</code>","text":"<p>Generate URLs and corresponding titles from the given URL.</p> <p>:param url (str): The URL to extract information from. :param extra_info_extract_opts (dict, optional): Extra options for information extraction.</p> <p>:return Iterable[str]:     Tuple[str, str]: A tuple containing the title and URL of each extracted entry.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def generate_urls(\n    self,\n    url: str,\n    extra_info_extract_opts: dict = None,\n) -&gt; Iterable[str]:\n    \"\"\"Generate URLs and corresponding titles from the given URL.\n\n    :param url (str): The URL to extract information from.\n    :param extra_info_extract_opts (dict, optional): Extra options for information extraction.\n\n    :return Iterable[str]:\n        Tuple[str, str]: A tuple containing the title and URL of each extracted entry.\n    \"\"\"\n    if extra_info_extract_opts is None:\n        extra_info_extract_opts = {}\n    extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n    with YoutubeDL(params=(self.ydl_opts | extra_info_extract_opts)) as ydl:\n        info = ydl.extract_info(url, download=False, **extr_args)\n        if \"entries\" not in info:\n            req_format = info[\"requested_formats\"][0]\n            yield info[\"title\"], req_format[\"url\"]\n        else:\n            for entry in info.get(\"entries\", []):\n                req_format = entry[\"requested_formats\"][0]\n                yield entry[\"title\"], req_format[\"url\"]\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls_by_subs","title":"<code>generate_urls_by_subs(url, extra_info_extract_opts=None)</code>","text":"<p>Download subtitles for a given video URL.</p> <p>:param video_url (str): The URL of the video to download subtitles for.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def generate_urls_by_subs(\n    self,\n    url: str,\n    extra_info_extract_opts: dict = None,\n):\n    \"\"\"Download subtitles for a given video URL.\n\n    :param video_url (str): The URL of the video to download subtitles for.\n    \"\"\"\n    if extra_info_extract_opts is None:\n        extra_info_extract_opts = {}\n    extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n    with YoutubeDL(\n        params=(self.ydl_opts | extra_info_extract_opts | self.get_subtitles_opts())\n    ) as ydl:\n        info = ydl.extract_info(url, download=False, **extr_args)\n        import json\n\n        json.dump(info, open(\"info.json\", \"w\"))\n        if \"entries\" not in info:\n            req_subs = list(info[\"requested_subtitles\"].values())[0]\n            req_format = info[\"requested_formats\"][0]\n            yield info[\"title\"], req_format[\"url\"], download_sub(req_subs[\"url\"])\n        else:\n            for entry in info.get(\"entries\", []):\n                req_format = entry[\"requested_formats\"][0]\n                req_subs = list(entry[\"requested_subtitles\"].values())[0]\n                yield entry[\"title\"], req_format[\"url\"], download_sub(\n                    req_subs[\"url\"]\n                )\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.best_video_best_audio","title":"<code>best_video_best_audio(ctx)</code>","text":"<p>Taken from the yt-dlp documentation as-is</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def best_video_best_audio(ctx):\n    \"\"\"Taken from the yt-dlp documentation as-is\"\"\"\n    \"\"\"Select the best video and the best audio that won't result in an mkv.\n    NOTE: This is just an example and does not handle all cases\"\"\"\n\n    # formats are already sorted worst to best\n    formats = ctx.get(\"formats\")[::-1]\n\n    # acodec='none' means there is no audio\n    best_video = next(\n        f for f in formats if f[\"vcodec\"] != \"none\" and f[\"acodec\"] == \"none\"\n    )\n\n    # find compatible audio extension\n    audio_ext = {\"mp4\": \"m4a\", \"webm\": \"webm\"}[best_video[\"ext\"]]\n    # vcodec='none' means there is no video\n    best_audio = next(\n        f\n        for f in formats\n        if (f[\"acodec\"] != \"none\" and f[\"vcodec\"] == \"none\" and f[\"ext\"] == audio_ext)\n    )\n\n    # These are the minimum required fields for a merged format\n    yield {\n        \"format_id\": f'{best_video[\"format_id\"]}+{best_audio[\"format_id\"]}',\n        \"ext\": best_video[\"ext\"],\n        \"requested_formats\": [best_video, best_audio],\n        # Must be + separated list of protocols\n        \"protocol\": f'{best_video[\"protocol\"]}+{best_audio[\"protocol\"]}',\n    }\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.best_video_only","title":"<code>best_video_only(ctx)</code>","text":"<p>Just best video -- save bandwidth</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def best_video_only(ctx):\n    \"\"\"Just best video -- save bandwidth\"\"\"\n    # formats are already sorted worst to best\n    formats = ctx.get(\"formats\")[::-1]\n\n    # acodec='none' means there is no audio\n    best_video = next(f for f in formats if f[\"vcodec\"] != \"none\")\n    # These are the minimum required fields for a merged format\n    yield {\n        \"format_id\": f'{best_video[\"format_id\"]}',\n        \"ext\": best_video[\"ext\"],\n        \"requested_formats\": [best_video],\n        # Must be + separated list of protocols\n        \"protocol\": f'{best_video[\"protocol\"]}',\n    }\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.no_shorts","title":"<code>no_shorts(info, *, incomplete)</code>","text":"<p>Filter out short videos</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def no_shorts(info, *, incomplete):\n    \"\"\"Filter out short videos\"\"\"\n    if url := info.get(\"url\", \"\"):\n        if \"/shorts\" in url:\n            return \"This is a short video\"\n</code></pre>"},{"location":"reference/video_sampler/language/keyword_capture/","title":"Language","text":""},{"location":"reference/video_sampler/language/keyword_capture/#video_sampler.language.keyword_capture.download_sub","title":"<code>download_sub(sub_url)</code>","text":"<p>Download a VTT subtitle file to a string.</p> Source code in <code>video_sampler/language/keyword_capture.py</code> <pre><code>def download_sub(sub_url: str):\n    \"\"\"Download a VTT subtitle file to a string.\"\"\"\n    response = requests.get(url=sub_url)\n    return parse_srt_subtitle(response.text)\n</code></pre>"},{"location":"reference/video_sampler/visualisation/clustering/","title":"Visualisation","text":""},{"location":"reference/video_sampler/visualisation/clustering/#video_sampler.visualisation.clustering.build_feature_model","title":"<code>build_feature_model(model_str)</code>","text":"<p>Build a feature extraction model :param model_str: model name :return: tuple of (model, extractor)</p> Source code in <code>video_sampler/visualisation/clustering.py</code> <pre><code>def build_feature_model(model_str: str):\n    \"\"\"Build a feature extraction model\n    :param model_str: model name\n    :return: tuple of (model, extractor)\n    \"\"\"\n    extractor = AutoFeatureExtractor.from_pretrained(model_str)\n    model = ResNetModel.from_pretrained(model_str)\n    return model, extractor\n</code></pre>"},{"location":"reference/video_sampler/visualisation/clustering/#video_sampler.visualisation.clustering.cluster_features","title":"<code>cluster_features(features, max_clusters=50)</code>","text":"<p>Cluster features using t-SNE and KMeans :param features: dict with keys \"embeds\" and \"paths\" :param max_clusters: maximum number of clusters :return: tuple of (X, cluster_labels)</p> Source code in <code>video_sampler/visualisation/clustering.py</code> <pre><code>def cluster_features(\n    features,\n    max_clusters=50,\n):\n    \"\"\"Cluster features using t-SNE and KMeans\n    :param features: dict with keys \"embeds\" and \"paths\"\n    :param max_clusters: maximum number of clusters\n    :return: tuple of (X, cluster_labels)\n    \"\"\"\n    proj = TSNE(n_components=2, perplexity=35, metric=\"cosine\")\n    Xorg = np.asarray(features[\"embeds\"])\n    X = proj.fit_transform(Xorg)\n\n    # take about 10% of the frame as the number of clusters\n    n_clusters = min(int(0.1 * len(features[\"embeds\"])), max_clusters)\n    cluster_model = KMeans(n_clusters=n_clusters, random_state=0).fit(Xorg)\n    return X, cluster_model.labels_\n</code></pre>"},{"location":"reference/video_sampler/visualisation/clustering/#video_sampler.visualisation.clustering.extract_features","title":"<code>extract_features(model_str, image_folder, mkey='pixel_values', batch_size=8)</code>","text":"<p>Extract features from a folder of images :param model_str: model name :param image_folder: folder with images :param mkey: key for the pixel values :param batch_size: batch size :return: dict with keys \"embeds\" and \"paths\"</p> Source code in <code>video_sampler/visualisation/clustering.py</code> <pre><code>def extract_features(\n    model_str: str, image_folder: Path, mkey=\"pixel_values\", batch_size: int = 8\n):\n    \"\"\"Extract features from a folder of images\n    :param model_str: model name\n    :param image_folder: folder with images\n    :param mkey: key for the pixel values\n    :param batch_size: batch size\n    :return: dict with keys \"embeds\" and \"paths\"\n    \"\"\"\n\n    out_features = defaultdict(list)\n    model, extractor = build_feature_model(model_str)\n    with torch.no_grad():\n        all_files = list(image_folder.iterdir())\n        for batch in tqdm(\n            batched(all_files, batch_size), total=len(all_files) // batch_size\n        ):\n            # load images\n            batch_imgs = [Image.open(img_path).convert(\"RGB\") for img_path in batch]\n            # extract features\n            batch_imgs = extractor(batch_imgs, return_tensors=\"pt\")[mkey]\n            batch_features = model(batch_imgs).pooler_output.squeeze()\n            if len(batch) == 1:\n                batch_features = batch_features.expand(1, -1)\n            batch_features = torch.functional.F.normalize(batch_features, p=2, dim=1)\n            out_features[\"embeds\"].extend(batch_features)\n            out_features[\"paths\"].extend([img_path.name for img_path in batch])\n    return out_features\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"video-sampler","text":"<p>Video sampler allows you to efficiently sample video frames. Currently, it uses keyframe decoding, frame interval gating and perceptual hashing to reduce duplicated samples.</p> <p>Use case: for sampling videos for later annotations used in machine learning.</p>"},{"location":"#table-of-contents","title":"Table of Contents","text":"<ul> <li>video-sampler</li> <li>Table of Contents</li> <li>Documentation</li> <li>Features</li> <li>Installation and Usage<ul> <li>Basic usage</li> <li>YT-DLP integration plugin<ul> <li>Extra YT-DLP options</li> </ul> </li> <li>API examples</li> <li>Advanced usage</li> <li>Gating</li> <li>CLIP-based gating comparison</li> <li>Blur gating</li> </ul> </li> <li>Benchmarks</li> <li>Benchmark videos</li> <li>Flit commands<ul> <li>Build</li> <li>Install</li> <li>Publish</li> </ul> </li> <li>\ud83d\udee1 License</li> <li>\ud83d\udcc3 Citation</li> </ul>"},{"location":"#documentation","title":"Documentation","text":"<p>Documentation is available at https://lemurpwned.github.io/video-sampler/.</p>"},{"location":"#features","title":"Features","text":"<ul> <li>[x] Direct sampling methods:</li> <li>[x] <code>hash</code> - uses perceptual hashing to reduce duplicated samples</li> <li>[x] <code>entropy</code> - uses entropy to reduce duplicated samples (work in progress)</li> <li>[x] <code>gzip</code> - uses gzip compressed size to reduce duplicated samples (work in progress)</li> <li>[x] <code>buffer</code> - uses sliding buffer to reduce duplicated samples</li> <li>[x] <code>grid</code> - uses grid sampling to reduce duplicated samples</li> <li>[x] Gating methods (modifications on top of direct sampling methods):</li> <li>[x] <code>clip</code> - uses CLIP to filter out frames that do not contain the specified objects</li> <li>[x] <code>blur</code> - uses blur detection to filter out frames that are too blurry</li> <li>[x] Integrations</li> <li>[x] YTDLP integration -- streams directly from yt-dlp queries,         playlists or single videos</li> </ul>"},{"location":"#installation-and-usage","title":"Installation and Usage","text":"<pre><code>pip install -U video_sampler\n</code></pre> <p>then you can run</p> <pre><code>python3 -m video_sampler --help\n</code></pre> <p>or simply</p> <pre><code>video_sampler --help\n</code></pre>"},{"location":"#basic-usage","title":"Basic usage","text":"<pre><code>python3 -m video_sampler hash FatCat.mp4 ./dataset-frames/ --hash-size 3 --buffer-size 20\n</code></pre>"},{"location":"#yt-dlp-integration-plugin","title":"YT-DLP integration plugin","text":"<p>Before using please consult the ToS of the website you are scraping from -- use responsibly and for research purposes. To use the YT-DLP integration, you need to install <code>yt-dlp</code> first (see yt-dlp). Then, you simply add <code>--yt-dlp</code> to the command, and it changes the meaning of the <code>video_path</code> argument.</p> <ul> <li>to search</li> </ul> <pre><code>video_sampler hash \"ytsearch:cute cats\" ./folder-frames/ \\\n  --hash-size 3 --buffer-size 20 --ytdlp\n</code></pre> <ul> <li>to sample a single video</li> </ul> <pre><code>video_sampler hash \"https://www.youtube.com/watch?v=W86cTIoMv2U\" ./folder-frames/ \\\n    --hash-size 3 --buffer-size 20 --ytdlp\n</code></pre> <ul> <li>to sample a playlist</li> </ul> <pre><code>video_sampler hash \"https://www.youtube.com/watch?v=GbpP3Sxp-1U&amp;list=PLFezMcAw96RGvTTTbdKrqew9seO2ZGRmk\" ./folder-frames/ \\\n  --hash-size 3 --buffer-size 20 --ytdlp\n</code></pre> <p>The videos are never directly downloaded, only streamed, so you can use it to sample videos from the internet without downloading them first.</p>"},{"location":"#extra-yt-dlp-options","title":"Extra YT-DLP options","text":"<p>You can pass extra options to yt-dlp by using the <code>-yt-extra-args</code> flag. For example:</p> <p>this will only sample videos uploaded before 2019-01-01:</p> <pre><code>... --ytdlp --yt-extra-args '--datebefore 20190101'\n</code></pre> <p>or this will only sample videos uploaded after 2019-01-01:</p> <pre><code>... --ytdlp --yt-extra-args '--dateafter 20190101'\n</code></pre> <p>or this will skip all shorts:</p> <pre><code>... --ytdlp --yt-extra-args '--match-filter \"original_url!*=/shorts/ &amp; url!*=/shorts/\"\n</code></pre>"},{"location":"#api-examples","title":"API examples","text":"<p>See examples in https://github.com/LemurPwned/video-sampler/tree/main/scripts.</p>"},{"location":"#advanced-usage","title":"Advanced usage","text":"<p>There are 3 sampling methods available:</p> <ul> <li><code>hash</code> - uses perceptual hashing to reduce duplicated samples</li> <li><code>entropy</code> - uses entropy to reduce duplicated samples (work in progress)</li> <li><code>gzip</code> - uses gzip compressed size to reduce duplicated samples (work in progress)</li> </ul> <p>To launch any of them you can run and substitute <code>method-name</code> with one of the above:</p> <pre><code>video_sampler buffer `method-name` ...other options\n</code></pre> <p>e.g.</p> <pre><code>video_sampler buffer entropy --buffer-size 20 ...\n</code></pre> <p>where <code>buffer-size</code> for <code>entropy</code> and <code>gzip</code> mean the top-k sliding buffer size. Sliding buffer also uses hashing to reduce duplicated samples.</p>"},{"location":"#gating","title":"Gating","text":"<p>Aside from basic sampling rules, you can also apply gating rules to the sampled frames, further reducing the number of frames. There are 3 gating methods available:</p> <ul> <li><code>pass</code> - pass all frames</li> <li><code>clip</code> - use CLIP to filter out frames that do not contain the specified objects</li> <li><code>blur</code> - use blur detection to filter out frames that are too blurry</li> </ul> <p>Here's a quick example of how to use clip:</p> <pre><code>python3 -m video_sampler clip ./videos ./scratch/clip --pos-samples \"a cat\" --neg-samples \"empty background, a lemur\"  --hash-size 4\n</code></pre>"},{"location":"#clip-based-gating-comparison","title":"CLIP-based gating comparison","text":"<p>Here's a brief comparison of the frames sampled with and without CLIP-based gating with the following config:</p> <pre><code>  gate_def = dict(\n      type=\"clip\",\n      pos_samples=[\"a cat\"],\n      neg_samples=[\n          \"an empty background\",\n          \"text on screen\",\n          \"a forest with no animals\",\n      ],\n      model_name=\"ViT-B-32\",\n      batch_size=32,\n      pos_margin=0.2,\n      neg_margin=0.3,\n  )\n</code></pre> <p>Evidently, CLIP-based gating is able to filter out frames that do not contain a cat and in consequence, reduce the number of frames with plain background. It also thinks that a lemur is a cat, which is not entirely wrong as fluffy creatures go.</p> Pass gate (no gating) CLIP gate Grid <p>The effects of gating in numbers, for this particular set of examples (see <code>produced</code> vs <code>gated</code> columns). <code>produced</code> represents the number of frames sampled without gating, here after the perceptual hashing, while <code>gated</code> represents the number of frames sampled after gating.</p> video buffer gate decoded produced gated FatCat.mp4 grid pass 179 31 31 SmolCat.mp4 grid pass 118 24 24 HighLemurs.mp4 grid pass 161 35 35 FatCat.mp4 hash pass 179 101 101 SmolCat.mp4 hash pass 118 61 61 HighLemurs.mp4 hash pass 161 126 126 FatCat.mp4 hash clip 179 101 73 SmolCat.mp4 hash clip 118 61 31 HighLemurs.mp4 hash clip 161 126 66"},{"location":"#blur-gating","title":"Blur gating","text":"<p>Helps a little with blurry videos. Adjust threshold and method (<code>laplacian</code> or <code>fft</code>) for best results. Some results from <code>fft</code> at <code>threshold=20</code>:</p> video buffer gate decoded produced gated MadLad.mp4 grid pass 120 31 31 MadLad.mp4 hash pass 120 110 110 MadLad.mp4 hash blur 120 110 85"},{"location":"#benchmarks","title":"Benchmarks","text":"<p>Configuration for this benchmark:</p> <pre><code>SamplerConfig(min_frame_interval_sec=1.0, keyframes_only=True, buffer_size=30, hash_size=X, queue_wait=0.1, debug=True)\n</code></pre> Video Total frames Hash size Decoded Saved SmolCat 2936 8 118 106 SmolCat - 4 - 61 Fat Cat 4462 8 179 163 Fat Cat - 4 - 101 HighLemurs 4020 8 161 154 HighLemurs - 4 - 126 <pre><code>SamplerConfig(\n    min_frame_interval_sec=1.0,\n    keyframes_only=True,\n    queue_wait=0.1,\n    debug=False,\n    print_stats=True,\n    buffer_config={'type': 'entropy'/'gzip', 'size': 30, 'debug': False, 'hash_size': 8, 'expiry': 50}\n)\n</code></pre> Video Total frames Type Decoded Saved SmolCat 2936 entropy 118 39 SmolCat - gzip - 39 Fat Cat 4462 entropy 179 64 Fat Cat - gzip - 73 HighLemurs 4020 entropy 161 59 HighLemurs - gzip - 63"},{"location":"#benchmark-videos","title":"Benchmark videos","text":"<ul> <li>SmolCat</li> <li>Fat Cat</li> <li>HighLemurs</li> <li>MadLad</li> </ul>"},{"location":"#flit-commands","title":"Flit commands","text":""},{"location":"#build","title":"Build","text":"<pre><code>flit build\n</code></pre>"},{"location":"#install","title":"Install","text":"<pre><code>flit install\n</code></pre>"},{"location":"#publish","title":"Publish","text":"<p>Remember to bump the version in <code>pyproject.toml</code> before publishing.</p> <pre><code>flit publish\n</code></pre>"},{"location":"#license","title":"\ud83d\udee1 License","text":"<p>This project is licensed under the terms of the <code>MIT</code> license. See LICENSE for more details.</p>"},{"location":"#citation","title":"\ud83d\udcc3 Citation","text":"<pre><code>@misc{video-sampler,\n  author = {video-sampler},\n  title = {Video sampler allows you to efficiently sample video frames},\n  year = {2023},\n  publisher = {GitHub},\n  journal = {GitHub repository},\n  howpublished = {\\url{https://github.com/LemurPwned/video-sampler}}\n}\n</code></pre>"},{"location":"reference/video_sampler/buffer/","title":"Video sampler","text":""},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.EntropyByffer","title":"<code>EntropyByffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>Measure image entropy as a function of the image usability</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class EntropyByffer(FrameBuffer):\n    \"\"\"Measure image entropy as a function of the image usability\"\"\"\n\n    def __init__(\n        self, size: int, expiry: int, debug_flag: bool = False, hash_size: int = 8\n    ) -&gt; None:\n        self.sliding_top_k_buffer = SlidingTopKBuffer(\n            size=size, expiry=expiry, debug_flag=debug_flag, hash_size=hash_size\n        )\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return self.sliding_top_k_buffer.get_buffer_state()\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        entropy = item.entropy()\n        return self.sliding_top_k_buffer.add(item, {**metadata, \"index\": -entropy})\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        return self.sliding_top_k_buffer.final_flush()\n\n    def clear(self):\n        self.sliding_top_k_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer","title":"<code>FrameBuffer</code>","text":"<p>             Bases: <code>ABC</code></p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class FrameBuffer(ABC):\n    @abstractmethod\n    def add(self, item: Image.Image, metadata: dict[str, Any]) -&gt; None | tuple:\n        pass\n\n    @abstractmethod\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        \"\"\"Flush the buffer and return the remaining items\"\"\"\n        pass\n\n    @abstractmethod\n    def get_buffer_state(self) -&gt; list[str]:\n        \"\"\"Return the current state of the buffer\"\"\"\n        pass\n\n    @abstractmethod\n    def clear(self):\n        \"\"\"Clear the buffer\"\"\"\n        pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer.clear","title":"<code>clear()</code>  <code>abstractmethod</code>","text":"<p>Clear the buffer</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@abstractmethod\ndef clear(self):\n    \"\"\"Clear the buffer\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer.final_flush","title":"<code>final_flush()</code>  <code>abstractmethod</code>","text":"<p>Flush the buffer and return the remaining items</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@abstractmethod\ndef final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n    \"\"\"Flush the buffer and return the remaining items\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.FrameBuffer.get_buffer_state","title":"<code>get_buffer_state()</code>  <code>abstractmethod</code>","text":"<p>Return the current state of the buffer</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@abstractmethod\ndef get_buffer_state(self) -&gt; list[str]:\n    \"\"\"Return the current state of the buffer\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.GridBuffer","title":"<code>GridBuffer</code>","text":"<p>             Bases: <code>HashBuffer</code></p> <p>A class representing a grid-based buffer for images. Splits the image into a grid and stores the hashes of the grid cells in a mosaic buffer.</p> <p>Parameters:</p> Name Type Description Default <code>size</code> <code>int</code> <p>The maximum size of the buffer.</p> required <code>debug_flag</code> <code>bool</code> <p>A flag indicating whether debug information should be printed.</p> <code>False</code> <code>hash_size</code> <code>int</code> <p>The size of the hash.</p> <code>4</code> <code>grid_x</code> <code>int</code> <p>The number of grid cells in the x-axis.</p> <code>4</code> <code>grid_y</code> <code>int</code> <p>The number of grid cells in the y-axis.</p> <code>4</code> <code>max_hits</code> <code>int</code> <p>The maximum number of hits allowed for a hash.</p> <code>1</code> <p>Attributes:</p> Name Type Description <code>grid_x</code> <code>int</code> <p>The number of grid cells in the x-axis.</p> <code>grid_y</code> <code>int</code> <p>The number of grid cells in the y-axis.</p> <code>max_hits</code> <code>int</code> <p>The maximum number of hits allowed for a hash.</p> <code>mosaic_buffer</code> <code>dict</code> <p>A dictionary storing the mosaic buffer.</p> <p>Methods:</p> Name Description <code>add</code> <p>Adds an image to the buffer along with its metadata.</p> <code>clear</code> <p>Clears the buffer and the mosaic buffer.</p> <code>update_ttl_buffer</code> <p>Updates the buffer by expiring images that are not in the grid.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class GridBuffer(HashBuffer):\n    \"\"\"\n    A class representing a grid-based buffer for images.\n    Splits the image into a grid and stores the hashes of the grid cells in a mosaic buffer.\n\n    Args:\n        size (int): The maximum size of the buffer.\n        debug_flag (bool, optional): A flag indicating whether debug information should be printed.\n        hash_size (int, optional): The size of the hash.\n        grid_x (int, optional): The number of grid cells in the x-axis.\n        grid_y (int, optional): The number of grid cells in the y-axis.\n        max_hits (int, optional): The maximum number of hits allowed for a hash.\n\n    Attributes:\n        grid_x (int): The number of grid cells in the x-axis.\n        grid_y (int): The number of grid cells in the y-axis.\n        max_hits (int): The maximum number of hits allowed for a hash.\n        mosaic_buffer (dict): A dictionary storing the mosaic buffer.\n\n    Methods:\n        add(item, metadata):\n            Adds an image to the buffer along with its metadata.\n        clear():\n            Clears the buffer and the mosaic buffer.\n        update_ttl_buffer():\n            Updates the buffer by expiring images that are not in the grid.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        debug_flag: bool = False,\n        hash_size: int = 4,\n        grid_x: int = 4,\n        grid_y: int = 4,\n        max_hits: int = 1,\n    ) -&gt; None:\n        super().__init__(size, debug_flag, hash_size)\n        self.grid_x = grid_x\n        self.grid_y = grid_y\n        self.max_hits = max_hits\n        self.mosaic_buffer = {}\n\n    def __get_grid_hash(self, item: Image.Image) -&gt; str:\n        \"\"\"Compute grid hashes for a given image\"\"\"\n        for x in range(self.grid_x):\n            for y in range(self.grid_y):\n                yield str(\n                    phash(\n                        item.crop(\n                            (\n                                x * item.width / self.grid_x,\n                                y * item.height / self.grid_y,\n                                (x + 1) * item.width / self.grid_x,\n                                (y + 1) * item.height / self.grid_y,\n                            )\n                        ),\n                        hash_size=self.hash_size,\n                    )\n                )\n\n    def _check_mosaic(self, mosaic_hash: str):\n        return mosaic_hash in self.mosaic_buffer\n\n    def update_ttl_buffer(self):\n        # expire the images that are not in the grid\n        if len(self.ordered_buffer) &gt;= self.max_size:\n            to_return_hash, return_data = self.ordered_buffer.popitem(last=False)\n            if to_return_hash is not None:\n                removal_keys = [\n                    img_hash\n                    for img_hash, mosaic_hash in self.mosaic_buffer.items()\n                    if mosaic_hash == to_return_hash\n                ]\n                for key in removal_keys:\n                    del self.mosaic_buffer[key]\n            return return_data\n        return None\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        hash_ = str(phash(item, hash_size=self.hash_size))\n        if not self._check_duplicate(hash_):\n            # not automatically rejected, check the mosaic buffer\n            hash_hits = 0\n            hash_sets = []\n            for el_hash_ in self.__get_grid_hash(item):\n                if el_hash_ in self.mosaic_buffer:\n                    hash_hits += 1\n                hash_sets.append(el_hash_)\n\n            if hash_hits &lt; self.max_hits:\n                # add image hash to the ttl counter\n                self.ordered_buffer[hash_] = (item, metadata)\n                # add the image to the mosaic buffer\n                # this also automatically overwrites the deleted hashes\n                for el_hash in hash_sets:\n                    self.mosaic_buffer[el_hash] = hash_\n\n            if self.debug_flag:\n                console.print(\n                    f\"\\tHash hits: {hash_hits}\"\n                    f\"\\tHash sets: {len(hash_sets)}\"\n                    f\"\\tHash buffer: {len(self.get_buffer_state())}\"\n                    f\"\\tMosaic buffer: {len(self.mosaic_buffer)}\"\n                )\n        return self.update_ttl_buffer()\n\n    def clear(self):\n        super().clear()\n        self.mosaic_buffer = {}\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.GridBuffer.__get_grid_hash","title":"<code>__get_grid_hash(item)</code>","text":"<p>Compute grid hashes for a given image</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>def __get_grid_hash(self, item: Image.Image) -&gt; str:\n    \"\"\"Compute grid hashes for a given image\"\"\"\n    for x in range(self.grid_x):\n        for y in range(self.grid_y):\n            yield str(\n                phash(\n                    item.crop(\n                        (\n                            x * item.width / self.grid_x,\n                            y * item.height / self.grid_y,\n                            (x + 1) * item.width / self.grid_x,\n                            (y + 1) * item.height / self.grid_y,\n                        )\n                    ),\n                    hash_size=self.hash_size,\n                )\n            )\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.GzipBuffer","title":"<code>GzipBuffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>Measure compression size as a function of the image usability</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class GzipBuffer(FrameBuffer):\n    \"\"\"Measure compression size as a function of the image usability\"\"\"\n\n    def __init__(\n        self, size: int, expiry: int, debug_flag: bool = False, hash_size: int = 8\n    ) -&gt; None:\n        self.sliding_top_k_buffer = SlidingTopKBuffer(\n            size=size, expiry=expiry, debug_flag=debug_flag, hash_size=hash_size\n        )\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return self.sliding_top_k_buffer.get_buffer_state()\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        compressed_l = len(gzip.compress(item.tobytes()))\n        return self.sliding_top_k_buffer.add(item, {**metadata, \"index\": -compressed_l})\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        return self.sliding_top_k_buffer.final_flush()\n\n    def clear(self):\n        self.sliding_top_k_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.HashBuffer","title":"<code>HashBuffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>A buffer that stores frames with their corresponding metadata and checks for duplicates based on image hashes. Args:     size (int): The maximum size of the buffer.     debug_flag (bool, optional): Flag indicating whether to enable debug mode. Defaults to False.     hash_size (int, optional): The size of the image hash. Defaults to 4.</p> <p>Methods:</p> Name Description <code>get_buffer_state</code> <p>Returns the current state of the buffer as a list of image hashes.</p> <code>add</code> <p>Image.Image, metadata: dict[str, Any]) Adds an item to the buffer along with its metadata.</p> <code>final_flush</code> <p>Yields the stored items and their metadata in the buffer.</p> Private Methods <p>__add(item: Image.Image, hash_: str, metadata: dict)     Adds an item to the buffer with the given hash and metadata.</p> <p>__check_duplicate(hash_: str) -&gt; bool:     Checks if the given hash already exists in the buffer and renews its validity if found.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class HashBuffer(FrameBuffer):\n    \"\"\"\n    A buffer that stores frames with their corresponding metadata and\n    checks for duplicates based on image hashes.\n    Args:\n        size (int): The maximum size of the buffer.\n        debug_flag (bool, optional): Flag indicating whether to enable debug mode. Defaults to False.\n        hash_size (int, optional): The size of the image hash. Defaults to 4.\n\n    Methods:\n        get_buffer_state() -&gt; list[str]:\n            Returns the current state of the buffer as a list of image hashes.\n\n        add(item: Image.Image, metadata: dict[str, Any])\n            Adds an item to the buffer along with its metadata.\n\n        final_flush() -&gt; Iterable[tuple[Image.Image | None, dict]]:\n            Yields the stored items and their metadata in the buffer.\n\n        clear()\n            Clears the buffer.\n\n    Private Methods:\n        __add(item: Image.Image, hash_: str, metadata: dict)\n            Adds an item to the buffer with the given hash and metadata.\n\n        __check_duplicate(hash_: str) -&gt; bool:\n            Checks if the given hash already exists in the buffer and renews its validity if found.\n\n    \"\"\"\n\n    def __init__(self, size: int, debug_flag: bool = False, hash_size: int = 4) -&gt; None:\n        self.ordered_buffer = OrderedDict()\n        self.max_size = size\n        self.debug_flag = debug_flag\n        self.hash_size = hash_size\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return list(self.ordered_buffer.keys())\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        hash_ = str(phash(item, hash_size=self.hash_size))\n        if not self._check_duplicate(hash_):\n            return self.__add(hash_, item, metadata)\n        return None\n\n    def __add(self, hash_: str, item: Image.Image, metadata: dict):\n        self.ordered_buffer[hash_] = (item, metadata)\n        if len(self.ordered_buffer) &gt;= self.max_size:\n            return self.ordered_buffer.popitem(last=False)[1]\n        return None\n\n    def _check_duplicate(self, hash_: str) -&gt; bool:\n        if hash_ in self.ordered_buffer:\n            # renew the hash validity\n            if self.debug_flag:\n                console.print(\n                    f\"Renewing {hash_}\",\n                    style=f\"bold {Color.red.value}\",\n                )\n            self.ordered_buffer.move_to_end(hash_)\n            return True\n        return False\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        yield from self.ordered_buffer.values()\n\n    def clear(self):\n        self.ordered_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.SamplerConfig","title":"<code>SamplerConfig</code>  <code>dataclass</code>","text":"<p>Configuration options for the video sampler.</p> <p>Parameters:</p> Name Type Description Default <code>min_frame_interval_sec</code> <code>float</code> <p>The minimum time interval between sampled frames in seconds. Defaults to 1.</p> <code>1</code> <code>keyframes_only</code> <code>bool</code> <p>Flag indicating whether to sample only keyframes. Defaults to True.</p> <code>True</code> <code>queue_wait</code> <code>float</code> <p>The time to wait between checking the frame queue in seconds. Defaults to 0.1.</p> <code>0.1</code> <code>debug</code> <code>bool</code> <p>Flag indicating whether to enable debug mode. Defaults to False.</p> <code>False</code> <code>print_stats</code> <code>bool</code> <p>Flag indicating whether to print sampling statistics. Defaults to False.</p> <code>False</code> <code>buffer_config</code> <code>dict[str, Any]</code> <p>Configuration options for     the frame buffer. Defaults to {\"type\": \"entropy\", \"size\": 15,     \"debug\": True}.</p> <code>field(default_factory=lambda : {'type': 'hash', 'hash_size': 8, 'size': 15, 'debug': True})</code> <code>gate_config</code> <code>dict[str, Any]</code> <p>Configuration options for     the frame gate. Defaults to {\"type\": \"pass\"}.</p> <code>field(default_factory=lambda : {'type': 'pass'})</code> <p>Methods:</p> Name Description <code>__str__</code> <p>Returns a string representation of the configuration.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>@dataclass\nclass SamplerConfig:\n    \"\"\"\n    Configuration options for the video sampler.\n\n    Args:\n        min_frame_interval_sec (float, optional): The minimum time interval\n            between sampled frames in seconds. Defaults to 1.\n        keyframes_only (bool, optional): Flag indicating whether to\n            sample only keyframes. Defaults to True.\n        queue_wait (float, optional): The time to wait between checking\n            the frame queue in seconds. Defaults to 0.1.\n        debug (bool, optional): Flag indicating whether to enable debug mode.\n            Defaults to False.\n        print_stats (bool, optional): Flag indicating whether to print\n            sampling statistics. Defaults to False.\n        buffer_config (dict[str, Any], optional): Configuration options for\n                the frame buffer. Defaults to {\"type\": \"entropy\", \"size\": 15,\n                \"debug\": True}.\n        gate_config (dict[str, Any], optional): Configuration options for\n                the frame gate. Defaults to {\"type\": \"pass\"}.\n\n    Methods:\n        __str__() -&gt; str:\n            Returns a string representation of the configuration.\n\n    \"\"\"\n\n    min_frame_interval_sec: float = 1\n    keyframes_only: bool = True\n    queue_wait: float = 0.1\n    debug: bool = False\n    print_stats: bool = False\n    buffer_config: dict[str, Any] = field(\n        default_factory=lambda: {\n            \"type\": \"hash\",\n            \"hash_size\": 8,\n            \"size\": 15,\n            \"debug\": True,\n        }\n    )\n    gate_config: dict[str, Any] = field(\n        default_factory=lambda: {\n            \"type\": \"pass\",\n        }\n    )\n\n    def __str__(self) -&gt; str:\n        return str(asdict(self))\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.SlidingTopKBuffer","title":"<code>SlidingTopKBuffer</code>","text":"<p>             Bases: <code>FrameBuffer</code></p> <p>A class representing a sliding top-k buffer for frames.</p> <p>Parameters:</p> Name Type Description Default <code>size</code> <code>int</code> <p>The maximum size of the buffer.</p> required <code>debug_flag</code> <code>bool</code> <p>A flag indicating whether debug information should be printed.</p> <code>False</code> <code>expiry</code> <code>int</code> <p>The expiry count for frames.</p> <code>30</code> <code>hash_size</code> <code>int</code> <p>The size of the hash.</p> <code>8</code> <p>Attributes:</p> Name Type Description <code>sliding_buffer</code> <code>list</code> <p>The sliding buffer implemented as a min heap.</p> <code>max_size</code> <code>int</code> <p>The maximum size of the buffer.</p> <code>debug_flag</code> <code>bool</code> <p>A flag indicating whether debug information should be printed.</p> <code>expiry_count</code> <code>int</code> <p>The expiry count for frames.</p> <code>hash_size</code> <code>int</code> <p>The size of the hash.</p> <p>Methods:</p> Name Description <code>get_buffer_state</code> <p>Returns the current state of the buffer.</p> <code>add</code> <p>Adds a frame to the buffer along with its metadata.</p> <code>final_flush</code> <p>Performs a final flush of the buffer and yields the remaining frames.</p> <code>clear</code> <p>Clears the buffer.</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>class SlidingTopKBuffer(FrameBuffer):\n    \"\"\"\n    A class representing a sliding top-k buffer for frames.\n\n    Args:\n        size (int): The maximum size of the buffer.\n        debug_flag (bool, optional): A flag indicating whether debug information should be printed.\n        expiry (int, optional): The expiry count for frames.\n        hash_size (int, optional): The size of the hash.\n\n    Attributes:\n        sliding_buffer (list): The sliding buffer implemented as a min heap.\n        max_size (int): The maximum size of the buffer.\n        debug_flag (bool): A flag indicating whether debug information should be printed.\n        expiry_count (int): The expiry count for frames.\n        hash_size (int): The size of the hash.\n\n    Methods:\n        get_buffer_state() -&gt; list[str]:\n            Returns the current state of the buffer.\n        add(item, metadata):\n            Adds a frame to the buffer along with its metadata.\n        final_flush() -&gt; Iterable[tuple[Image.Image | None, dict]]:\n            Performs a final flush of the buffer and yields the remaining frames.\n        clear():\n            Clears the buffer.\n\n    \"\"\"\n\n    def __init__(\n        self, size: int, debug_flag: bool = False, expiry: int = 30, hash_size: int = 8\n    ) -&gt; None:\n        # it's a min heap with a fixed size\n        self.sliding_buffer = []\n        self.max_size = size\n        self.debug_flag = debug_flag\n        self.expiry_count = expiry\n        self.hash_size = hash_size\n        assert (\n            self.expiry_count &gt; self.max_size\n        ), \"expiry count must be greater than max size\"\n        console.print(\n            f\"Creating sliding buffer of size {self.max_size} and expiry {expiry}\",\n            style=f\"bold {Color.red.value}\",\n        )\n\n    def get_buffer_state(self) -&gt; list[str]:\n        return [item[:3] for item in self.sliding_buffer]\n\n    def add(self, item: Image.Image, metadata: dict[str, Any]):\n        assert \"index\" in metadata, \"metadata must have index key for sliding buffer\"\n        average_hash_ = str(average_hash(item, hash_size=self.hash_size))\n        to_return = None\n        if not self.__check_duplicate(average_hash_):\n            heapq.heappush(\n                self.sliding_buffer,\n                [metadata[\"index\"], 0, average_hash_, item, metadata],\n            )\n            if len(self.sliding_buffer) &gt;= self.max_size:\n                to_return = heapq.heappop(self.sliding_buffer)[-2:]\n        # update the expiry count\n        expired_indx = -1\n        for i in range(len(self.sliding_buffer)):\n            self.sliding_buffer[i][1] += 1\n            if self.sliding_buffer[i][1] &gt;= self.expiry_count:\n                expired_indx = i\n        # at any point only one item can be expired\n        if expired_indx != -1:\n            self.sliding_buffer.pop(expired_indx)  # just drop\n        return to_return\n\n    def __check_duplicate(self, hash_: str) -&gt; bool:\n        for item in self.sliding_buffer:\n            if item[2] == hash_:\n                # renew the hash validity\n                if self.debug_flag:\n                    console.print(\n                        f\"Renewing {hash_}\",\n                        style=f\"bold {Color.red.value}\",\n                    )\n                item[1] = 0\n                return True\n        return False\n\n    def final_flush(self) -&gt; Iterable[tuple[Image.Image | None, dict]]:\n        if len(self.sliding_buffer):\n            yield heapq.heappop(self.sliding_buffer)[-2:]\n        yield None, {}\n\n    def clear(self):\n        self.sliding_buffer.clear()\n</code></pre>"},{"location":"reference/video_sampler/buffer/#video_sampler.buffer.create_buffer","title":"<code>create_buffer(buffer_config)</code>","text":"<p>Create a buffer based on the config</p> Source code in <code>video_sampler/buffer.py</code> <pre><code>def create_buffer(buffer_config: dict[str, Any]):\n    \"\"\"Create a buffer based on the config\"\"\"\n    console.print(\n        f\"Creating buffer of type {buffer_config['type']}\",\n        style=f\"bold {Color.red.value}\",\n    )\n    if buffer_config[\"type\"] == \"hash\":\n        return HashBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n        )\n    elif buffer_config[\"type\"] == \"grid\":\n        return GridBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n            grid_x=buffer_config[\"grid_x\"],\n            grid_y=buffer_config[\"grid_y\"],\n            max_hits=buffer_config[\"max_hits\"],\n        )\n    elif buffer_config[\"type\"] == \"sliding_top_k\":\n        return SlidingTopKBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            expiry=buffer_config[\"expiry\"],\n        )\n    elif buffer_config[\"type\"] == \"passthrough\":\n        return PassThroughBuffer()\n    elif buffer_config[\"type\"] == \"gzip\":\n        return GzipBuffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n            expiry=buffer_config[\"expiry\"],\n        )\n    elif buffer_config[\"type\"] == \"entropy\":\n        return EntropyByffer(\n            size=buffer_config[\"size\"],\n            debug_flag=buffer_config[\"debug\"],\n            hash_size=buffer_config[\"hash_size\"],\n            expiry=buffer_config[\"expiry\"],\n        )\n    else:\n        raise ValueError(f\"Unknown buffer type {buffer_config['type']}\")\n</code></pre>"},{"location":"reference/video_sampler/evaluation/","title":"Evaluation","text":""},{"location":"reference/video_sampler/evaluation/#video_sampler.evaluation.compute_total_video_entropy","title":"<code>compute_total_video_entropy()</code>","text":"<p>Compute the total entropy of a video</p> Source code in <code>video_sampler/evaluation.py</code> <pre><code>def compute_total_video_entropy():\n    \"\"\"Compute the total entropy of a video\"\"\"\n    pass\n</code></pre>"},{"location":"reference/video_sampler/gating/","title":"Gating","text":""},{"location":"reference/video_sampler/gating/#video_sampler.gating.BlurGate","title":"<code>BlurGate</code>","text":"Source code in <code>video_sampler/gating.py</code> <pre><code>class BlurGate:\n    def __init__(\n        self, method: Literal[\"fft\", \"laplacian\"] = \"laplacian\", threshold: float = 100\n    ) -&gt; None:\n        \"\"\"\n        Initializes the Gating object.\n\n        Args:\n            method (str): The method to use for blur detection. Can be \"fft\" or \"laplacian\".\n            threshold (float): The threshold for bluriness. The higher the threshold, the less\n                blurry the image needs to be to be discarded.\n                The default threshold values are:\n                - 20 for the \"fft\" method\n                - 100 for the \"laplacian\" method.\n\n        Raises:\n            ValueError: If an unknown blur method is provided.\n        \"\"\"\n        self.is_blurry = None\n        if method == \"fft\":\n            self.is_blurry = self._is_blurry_fft\n        elif method == \"laplacian\":\n            self.is_blurry = self._is_blurry_laplacian\n        else:\n            raise ValueError(f\"Unknown blur method {method}\")\n        self.threshold = threshold\n\n    def __call__(self, frame: Image.Image, meta: dict, last=False) -&gt; GatedObject:\n        if self.is_blurry(frame) or last:\n            return EMPTY_GATED_OBJECT\n        return GatedObject([FrameObject(frame, meta)], 1)\n\n    def _is_blurry_laplacian(self, frame: Image.Image) -&gt; bool:\n        \"\"\"Check if the image is blurry with laplacian method.\"\"\"\n        return (\n            cv2.Laplacian(\n                cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2GRAY), cv2.CV_64F\n            ).var()\n            &lt; self.threshold\n        )\n\n    def _is_blurry_fft(self, frame: Image.Image) -&gt; bool:\n        \"\"\"Check if the image is blurry with fft method.\"\"\"\n        f = np.fft.fft2(frame)\n        fshift = np.fft.fftshift(f)\n        magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-12)\n        return magnitude_spectrum.mean() &lt; self.threshold\n\n    def flush(self):\n        return EMPTY_GATED_OBJECT\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.BlurGate.__init__","title":"<code>__init__(method='laplacian', threshold=100)</code>","text":"<p>Initializes the Gating object.</p> <p>Parameters:</p> Name Type Description Default <code>method</code> <code>str</code> <p>The method to use for blur detection. Can be \"fft\" or \"laplacian\".</p> <code>'laplacian'</code> <code>threshold</code> <code>float</code> <p>The threshold for bluriness. The higher the threshold, the less blurry the image needs to be to be discarded. The default threshold values are: - 20 for the \"fft\" method - 100 for the \"laplacian\" method.</p> <code>100</code> <p>Raises:</p> Type Description <code>ValueError</code> <p>If an unknown blur method is provided.</p> Source code in <code>video_sampler/gating.py</code> <pre><code>def __init__(\n    self, method: Literal[\"fft\", \"laplacian\"] = \"laplacian\", threshold: float = 100\n) -&gt; None:\n    \"\"\"\n    Initializes the Gating object.\n\n    Args:\n        method (str): The method to use for blur detection. Can be \"fft\" or \"laplacian\".\n        threshold (float): The threshold for bluriness. The higher the threshold, the less\n            blurry the image needs to be to be discarded.\n            The default threshold values are:\n            - 20 for the \"fft\" method\n            - 100 for the \"laplacian\" method.\n\n    Raises:\n        ValueError: If an unknown blur method is provided.\n    \"\"\"\n    self.is_blurry = None\n    if method == \"fft\":\n        self.is_blurry = self._is_blurry_fft\n    elif method == \"laplacian\":\n        self.is_blurry = self._is_blurry_laplacian\n    else:\n        raise ValueError(f\"Unknown blur method {method}\")\n    self.threshold = threshold\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.ClipGate","title":"<code>ClipGate</code>","text":"Source code in <code>video_sampler/gating.py</code> <pre><code>class ClipGate:\n    def __init__(\n        self,\n        pos_samples: list[str] = None,\n        neg_samples: list[str] = None,\n        model_name: str = \"ViT-B-32\",\n        batch_size: int = 32,\n        pos_margin: float = 0.2,\n        neg_margin: float = 0.3,\n    ) -&gt; None:\n        \"\"\"\n        Initializes the Clip Gating object.\n\n        Args:\n            pos_samples (list[str], optional): List of positive samples. Defaults to None.\n            neg_samples (list[str], optional): List of negative samples. Defaults to None.\n            model_name (str, optional): Name of the model. Defaults to \"ViT-B-32\".\n            batch_size (int, optional): Batch size. Defaults to 32.\n            pos_margin (float, optional): Positive margin. Defaults to 0.2.\n            neg_margin (float, optional): Negative margin. Defaults to 0.3.\n        \"\"\"\n        self.model, self.preprocess, self.tokenizer = create_model(\n            model_name=model_name\n        )\n        self.pos_margin = pos_margin\n        self.neg_margin = neg_margin\n        self.batch_size = batch_size\n        self.frame_accumulator = []\n        self.metadata_accumulator = []\n        if pos_samples is None:\n            self.pos_samples = torch.zeros((1, 512))\n        else:\n            self.pos_samples = self._preproc_samples(pos_samples)\n        if neg_samples is None:\n            self.neg_samples = torch.zeros((1, 512))\n        else:\n            self.neg_samples = self._preproc_samples(neg_samples)\n\n    def __call__(self, frame: Image.Image, meta: dict, last=False) -&gt; Any:\n        return self.flush() if last else self.add_frame(frame, meta)\n\n    def _preproc_samples(self, sample_texts: list[str]):\n        inputs = self.tokenizer(sample_texts)\n        embeds = torch.zeros((len(sample_texts), 512))\n        with torch.no_grad():\n            for i, batch in enumerate(batched(inputs, n=self.batch_size)):\n                batch = torch.stack(batch)\n                text_embeds = self.model.encode_text(batch.to(DEVICE))\n                embeds[i * self.batch_size : (i + 1) * self.batch_size] = (\n                    text_embeds.cpu()\n                )\n        embeds /= embeds.norm(dim=-1, keepdim=True)\n        return embeds\n\n    def _embed_frames(self, frames: list[Image.Image]):\n        \"\"\"Compute the embeddings for each frame.\"\"\"\n        inputs = torch.stack([self.preprocess(frame) for frame in frames]).to(DEVICE)\n        with torch.no_grad():\n            image_embeds = self.model.encode_image(inputs).cpu()\n            image_embeds /= image_embeds.norm(dim=-1, keepdim=True)\n        return image_embeds\n\n    def _get_margins(self, frame_embeds: torch.Tensor):\n        \"\"\"Compute the margins for each frame.\"\"\"\n        org_indx = np.arange(frame_embeds.shape[0])\n        neg_distance = frame_embeds @ self.neg_samples.T\n        pos_distance = frame_embeds @ self.pos_samples.T\n        neg_margin, _ = neg_distance.max(axis=-1)\n        pos_margin, _ = pos_distance.max(axis=-1)\n        incl_samples = torch.argwhere(\n            (neg_margin &lt; self.neg_margin) &amp; (pos_margin &gt;= self.pos_margin)\n        )\n        return org_indx[incl_samples].ravel()\n\n    def add_frame(self, frame: Image.Image, metadata: dict) -&gt; GatedObject:\n        self.frame_accumulator.append(frame)\n        self.metadata_accumulator.append(metadata)\n        if len(self.frame_accumulator) == self.batch_size:\n            return self.__process_metadata()\n        return EMPTY_GATED_OBJECT\n\n    def flush(self):\n        return self.__process_metadata()\n\n    def __process_metadata(self) -&gt; GatedObject:\n        frame_embeds = self._embed_frames(self.frame_accumulator)\n        selected_frames = self._get_margins(frame_embeds)\n        to_return = [\n            FrameObject(self.frame_accumulator[i], self.metadata_accumulator[i])\n            for i in range(len(self.frame_accumulator))\n            if i in selected_frames\n        ]\n        self.frame_accumulator.clear()\n        self.metadata_accumulator.clear()\n        return GatedObject(to_return, len(selected_frames))\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.ClipGate.__init__","title":"<code>__init__(pos_samples=None, neg_samples=None, model_name='ViT-B-32', batch_size=32, pos_margin=0.2, neg_margin=0.3)</code>","text":"<p>Initializes the Clip Gating object.</p> <p>Parameters:</p> Name Type Description Default <code>pos_samples</code> <code>list[str]</code> <p>List of positive samples. Defaults to None.</p> <code>None</code> <code>neg_samples</code> <code>list[str]</code> <p>List of negative samples. Defaults to None.</p> <code>None</code> <code>model_name</code> <code>str</code> <p>Name of the model. Defaults to \"ViT-B-32\".</p> <code>'ViT-B-32'</code> <code>batch_size</code> <code>int</code> <p>Batch size. Defaults to 32.</p> <code>32</code> <code>pos_margin</code> <code>float</code> <p>Positive margin. Defaults to 0.2.</p> <code>0.2</code> <code>neg_margin</code> <code>float</code> <p>Negative margin. Defaults to 0.3.</p> <code>0.3</code> Source code in <code>video_sampler/gating.py</code> <pre><code>def __init__(\n    self,\n    pos_samples: list[str] = None,\n    neg_samples: list[str] = None,\n    model_name: str = \"ViT-B-32\",\n    batch_size: int = 32,\n    pos_margin: float = 0.2,\n    neg_margin: float = 0.3,\n) -&gt; None:\n    \"\"\"\n    Initializes the Clip Gating object.\n\n    Args:\n        pos_samples (list[str], optional): List of positive samples. Defaults to None.\n        neg_samples (list[str], optional): List of negative samples. Defaults to None.\n        model_name (str, optional): Name of the model. Defaults to \"ViT-B-32\".\n        batch_size (int, optional): Batch size. Defaults to 32.\n        pos_margin (float, optional): Positive margin. Defaults to 0.2.\n        neg_margin (float, optional): Negative margin. Defaults to 0.3.\n    \"\"\"\n    self.model, self.preprocess, self.tokenizer = create_model(\n        model_name=model_name\n    )\n    self.pos_margin = pos_margin\n    self.neg_margin = neg_margin\n    self.batch_size = batch_size\n    self.frame_accumulator = []\n    self.metadata_accumulator = []\n    if pos_samples is None:\n        self.pos_samples = torch.zeros((1, 512))\n    else:\n        self.pos_samples = self._preproc_samples(pos_samples)\n    if neg_samples is None:\n        self.neg_samples = torch.zeros((1, 512))\n    else:\n        self.neg_samples = self._preproc_samples(neg_samples)\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.PassGate","title":"<code>PassGate</code>","text":"Source code in <code>video_sampler/gating.py</code> <pre><code>class PassGate:\n    def __call__(self, frame: Image.Image, meta: dict, last=False) -&gt; GatedObject:\n        \"\"\"\n        Passes the frame through the gating mechanism.\n\n        Args:\n            frame (Image.Image): The frame to pass through.\n            meta (dict): The metadata for the frame.\n            last (bool): If this is the last frame in the video.\n\n        Returns:\n            GatedObject: The gated object containing the processed frame.\n        \"\"\"\n        return self.flush() if last else GatedObject([FrameObject(frame, meta)], 1)\n\n    def flush(self):\n        return EMPTY_GATED_OBJECT\n</code></pre>"},{"location":"reference/video_sampler/gating/#video_sampler.gating.PassGate.__call__","title":"<code>__call__(frame, meta, last=False)</code>","text":"<p>Passes the frame through the gating mechanism.</p> <p>Parameters:</p> Name Type Description Default <code>frame</code> <code>Image</code> <p>The frame to pass through.</p> required <code>meta</code> <code>dict</code> <p>The metadata for the frame.</p> required <code>last</code> <code>bool</code> <p>If this is the last frame in the video.</p> <code>False</code> <p>Returns:</p> Name Type Description <code>GatedObject</code> <code>GatedObject</code> <p>The gated object containing the processed frame.</p> Source code in <code>video_sampler/gating.py</code> <pre><code>def __call__(self, frame: Image.Image, meta: dict, last=False) -&gt; GatedObject:\n    \"\"\"\n    Passes the frame through the gating mechanism.\n\n    Args:\n        frame (Image.Image): The frame to pass through.\n        meta (dict): The metadata for the frame.\n        last (bool): If this is the last frame in the video.\n\n    Returns:\n        GatedObject: The gated object containing the processed frame.\n    \"\"\"\n    return self.flush() if last else GatedObject([FrameObject(frame, meta)], 1)\n</code></pre>"},{"location":"reference/video_sampler/iterators/","title":"Iterators","text":""},{"location":"reference/video_sampler/logging/","title":"Logging","text":""},{"location":"reference/video_sampler/sampler/","title":"Sampler","text":""},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.SegmentSampler","title":"<code>SegmentSampler</code>","text":"<p>             Bases: <code>VideoSampler</code></p> Source code in <code>video_sampler/sampler.py</code> <pre><code>class SegmentSampler(VideoSampler):\n    def __init__(\n        self, cfg: SamplerConfig, segment_generator: Iterable[subtitle_line]\n    ) -&gt; None:\n        super().__init__(cfg)\n        self.segment_generator: Iterable[subtitle_line] = segment_generator\n\n    def sample(self, video_path: str) -&gt; Iterable[list[FrameObject]]:\n        \"\"\"Generate sample frames from a video.\n\n        Args:\n            video_path (str): The path to the video file.\n\n        Yields:\n            Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.\n        \"\"\"\n        self.stats.clear()\n        self.frame_buffer.clear()\n        next_segment = next(self.segment_generator)\n        segment_boundary_end_sec = next_segment.end_time / 1000\n        segment_boundary_start_sec = next_segment.start_time / 1000\n        absolute_stop = False\n        with av.open(video_path) as container:\n            stream = container.streams.video[0]\n            if self.cfg.keyframes_only:\n                stream.codec_context.skip_frame = \"NONKEY\"\n            prev_time = -10\n            for frame_indx, frame in enumerate(container.decode(stream)):\n                ftime = frame.time\n                reiters = 0\n                # find the next segment that starts after the current frame\n                while ftime &gt; segment_boundary_end_sec:\n                    console.print(\n                        f\"Seeking to next segment: {segment_boundary_end_sec}/{ftime}\",\n                        style=f\"bold {Color.yellow.value}\",\n                    )\n                    try:\n                        next_segment = next(self.segment_generator)\n                        reiters += 1\n                        segment_boundary_end_sec = next_segment.end_time / 1000\n                        segment_boundary_start_sec = next_segment.start_time / 1000\n                    except StopIteration:\n                        absolute_stop = True\n                        break\n                if reiters &gt; 0:\n                    console.print(\n                        f\"Skipped {reiters} segments!\",\n                        style=f\"bold {Color.red.value}\",\n                    )\n                if absolute_stop:\n                    break\n                # we haven't found the next segment yet\n                # the other condition, is where we are after the segment\n                # but this is handled by the while loop above\n                if ftime &lt;= segment_boundary_start_sec:\n                    continue\n\n                self.stats[\"total\"] += 1\n                time_diff = ftime - prev_time\n                if time_diff &lt; self.cfg.min_frame_interval_sec:\n                    continue\n                prev_time = ftime\n\n                frame_pil: Image = frame.to_image()\n                if self.cfg.debug:\n                    buf = self.frame_buffer.get_buffer_state()\n                    console.print(\n                        f\"Frame {frame_indx}\\ttime: {ftime}\",\n                        f\"\\t Buffer ({len(buf)}): {buf}\",\n                        style=f\"bold {Color.green.value}\",\n                    )\n                frame_meta = {\"frame_time\": ftime, \"frame_indx\": frame_indx}\n                self.stats[\"decoded\"] += 1\n                if res := self.frame_buffer.add(\n                    frame_pil,\n                    metadata=frame_meta,\n                ):\n                    gated_obj = self.gate(*res)\n                    self.stats[\"produced\"] += 1\n                    self.stats[\"gated\"] += gated_obj.N\n                    if gated_obj.frames:\n                        yield gated_obj.frames\n\n        # flush buffer\n        yield from self.flush_buffer()\n\n    def write_queue(self, video_path: str, q: Queue):\n        super().write_queue(video_path, q)\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.SegmentSampler.sample","title":"<code>sample(video_path)</code>","text":"<p>Generate sample frames from a video.</p> <p>Parameters:</p> Name Type Description Default <code>video_path</code> <code>str</code> <p>The path to the video file.</p> required <p>Yields:</p> Type Description <code>Iterable[list[FrameObject]]</code> <p>Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>def sample(self, video_path: str) -&gt; Iterable[list[FrameObject]]:\n    \"\"\"Generate sample frames from a video.\n\n    Args:\n        video_path (str): The path to the video file.\n\n    Yields:\n        Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.\n    \"\"\"\n    self.stats.clear()\n    self.frame_buffer.clear()\n    next_segment = next(self.segment_generator)\n    segment_boundary_end_sec = next_segment.end_time / 1000\n    segment_boundary_start_sec = next_segment.start_time / 1000\n    absolute_stop = False\n    with av.open(video_path) as container:\n        stream = container.streams.video[0]\n        if self.cfg.keyframes_only:\n            stream.codec_context.skip_frame = \"NONKEY\"\n        prev_time = -10\n        for frame_indx, frame in enumerate(container.decode(stream)):\n            ftime = frame.time\n            reiters = 0\n            # find the next segment that starts after the current frame\n            while ftime &gt; segment_boundary_end_sec:\n                console.print(\n                    f\"Seeking to next segment: {segment_boundary_end_sec}/{ftime}\",\n                    style=f\"bold {Color.yellow.value}\",\n                )\n                try:\n                    next_segment = next(self.segment_generator)\n                    reiters += 1\n                    segment_boundary_end_sec = next_segment.end_time / 1000\n                    segment_boundary_start_sec = next_segment.start_time / 1000\n                except StopIteration:\n                    absolute_stop = True\n                    break\n            if reiters &gt; 0:\n                console.print(\n                    f\"Skipped {reiters} segments!\",\n                    style=f\"bold {Color.red.value}\",\n                )\n            if absolute_stop:\n                break\n            # we haven't found the next segment yet\n            # the other condition, is where we are after the segment\n            # but this is handled by the while loop above\n            if ftime &lt;= segment_boundary_start_sec:\n                continue\n\n            self.stats[\"total\"] += 1\n            time_diff = ftime - prev_time\n            if time_diff &lt; self.cfg.min_frame_interval_sec:\n                continue\n            prev_time = ftime\n\n            frame_pil: Image = frame.to_image()\n            if self.cfg.debug:\n                buf = self.frame_buffer.get_buffer_state()\n                console.print(\n                    f\"Frame {frame_indx}\\ttime: {ftime}\",\n                    f\"\\t Buffer ({len(buf)}): {buf}\",\n                    style=f\"bold {Color.green.value}\",\n                )\n            frame_meta = {\"frame_time\": ftime, \"frame_indx\": frame_indx}\n            self.stats[\"decoded\"] += 1\n            if res := self.frame_buffer.add(\n                frame_pil,\n                metadata=frame_meta,\n            ):\n                gated_obj = self.gate(*res)\n                self.stats[\"produced\"] += 1\n                self.stats[\"gated\"] += gated_obj.N\n                if gated_obj.frames:\n                    yield gated_obj.frames\n\n    # flush buffer\n    yield from self.flush_buffer()\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.VideoSampler","title":"<code>VideoSampler</code>","text":"<p>The fundamental class for sampling video frames.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>SamplerConfig</code> <p>The configuration for the video sampler.</p> required <p>Attributes:</p> Name Type Description <code>cfg</code> <code>SamplerConfig</code> <p>The configuration for the video sampler.</p> <code>frame_buffer</code> <code>FrameBuffer</code> <p>The frame buffer used for sampling frames.</p> <code>gate</code> <code>Gate</code> <p>The gate used for filtering frames.</p> <code>stats</code> <code>Counter</code> <p>A counter for tracking statistics.</p> <p>Methods:</p> Name Description <code>sample</code> <p>Generates sample frames from a video.</p> <code>write_queue</code> <p>Writes sampled frames to a queue.</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>class VideoSampler:\n    \"\"\"\n    The fundamental class for sampling video frames.\n\n    Args:\n        cfg (SamplerConfig): The configuration for the video sampler.\n\n    Attributes:\n        cfg (SamplerConfig): The configuration for the video sampler.\n        frame_buffer (FrameBuffer): The frame buffer used for sampling frames.\n        gate (Gate): The gate used for filtering frames.\n        stats (Counter): A counter for tracking statistics.\n\n    Methods:\n        sample(video_path) -&gt; Iterable[list[FrameObject]]:\n            Generates sample frames from a video.\n        write_queue(video_path, q):\n            Writes sampled frames to a queue.\n\n    \"\"\"\n\n    def __init__(self, cfg: SamplerConfig) -&gt; None:\n        self.cfg = deepcopy(cfg)\n        self.frame_buffer = create_buffer(self.cfg.buffer_config)\n        self.gate = create_gate(self.cfg.gate_config)\n        self.stats = Counter()\n\n    def flush_buffer(self):\n        \"\"\"Flushes the frame buffer and yields gated frames\"\"\"\n        for res in self.frame_buffer.final_flush():\n            if res:\n                self.stats[\"produced\"] += 1\n                gated_obj = self.gate(*res)\n                self.stats[\"gated\"] += gated_obj.N\n                if gated_obj.frames:\n                    yield gated_obj.frames\n        gated_obj = self.gate.flush()\n        self.stats[\"gated\"] += gated_obj.N\n        if gated_obj.frames:\n            yield gated_obj.frames\n        yield PROCESSING_DONE_ITERABLE\n\n    def sample(self, video_path: str) -&gt; Iterable[list[FrameObject]]:\n        \"\"\"Generate sample frames from a video.\n\n        Args:\n            video_path (str): The path to the video file.\n\n        Yields:\n            Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.\n        \"\"\"\n        self.stats.clear()\n        self.frame_buffer.clear()\n        with av.open(video_path) as container:\n            stream = container.streams.video[0]\n            if self.cfg.keyframes_only:\n                stream.codec_context.skip_frame = \"NONKEY\"\n            prev_time = -10\n            for frame_indx, frame in enumerate(container.decode(stream)):\n                # skip frames if keyframes_only is True\n                time_diff = frame.time - prev_time\n                self.stats[\"total\"] += 1\n                if time_diff &lt; self.cfg.min_frame_interval_sec:\n                    continue\n                prev_time = frame.time\n\n                frame_pil: Image = frame.to_image()\n                if self.cfg.debug:\n                    buf = self.frame_buffer.get_buffer_state()\n                    console.print(\n                        f\"Frame {frame_indx}\\ttime: {frame.time}\",\n                        f\"\\t Buffer ({len(buf)}): {buf}\",\n                        style=f\"bold {Color.green.value}\",\n                    )\n                frame_meta = {\"frame_time\": frame.time, \"frame_indx\": frame_indx}\n                self.stats[\"decoded\"] += 1\n                if res := self.frame_buffer.add(\n                    frame_pil,\n                    metadata=frame_meta,\n                ):\n                    gated_obj = self.gate(*res)\n                    self.stats[\"produced\"] += 1\n                    self.stats[\"gated\"] += gated_obj.N\n                    if gated_obj.frames:\n                        yield gated_obj.frames\n\n        # flush buffer\n        yield from self.flush_buffer()\n\n    def write_queue(self, video_path: str, q: Queue):\n        try:\n            item: tuple[FrameObject, int]\n            for item in self.sample(video_path=video_path):\n                q.put(item)\n        except (av.IsADirectoryError, av.InvalidDataError) as e:\n            console.print(\n                f\"Error while processing {video_path}\",\n                f\"\\n\\t{e}\",\n                style=f\"bold {Color.red.value}\",\n            )\n            q.put(PROCESSING_DONE_ITERABLE)\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.VideoSampler.flush_buffer","title":"<code>flush_buffer()</code>","text":"<p>Flushes the frame buffer and yields gated frames</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>def flush_buffer(self):\n    \"\"\"Flushes the frame buffer and yields gated frames\"\"\"\n    for res in self.frame_buffer.final_flush():\n        if res:\n            self.stats[\"produced\"] += 1\n            gated_obj = self.gate(*res)\n            self.stats[\"gated\"] += gated_obj.N\n            if gated_obj.frames:\n                yield gated_obj.frames\n    gated_obj = self.gate.flush()\n    self.stats[\"gated\"] += gated_obj.N\n    if gated_obj.frames:\n        yield gated_obj.frames\n    yield PROCESSING_DONE_ITERABLE\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.VideoSampler.sample","title":"<code>sample(video_path)</code>","text":"<p>Generate sample frames from a video.</p> <p>Parameters:</p> Name Type Description Default <code>video_path</code> <code>str</code> <p>The path to the video file.</p> required <p>Yields:</p> Type Description <code>Iterable[list[FrameObject]]</code> <p>Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.</p> Source code in <code>video_sampler/sampler.py</code> <pre><code>def sample(self, video_path: str) -&gt; Iterable[list[FrameObject]]:\n    \"\"\"Generate sample frames from a video.\n\n    Args:\n        video_path (str): The path to the video file.\n\n    Yields:\n        Iterable[list[FrameObject]]: A generator that yields a list of FrameObjects representing sampled frames.\n    \"\"\"\n    self.stats.clear()\n    self.frame_buffer.clear()\n    with av.open(video_path) as container:\n        stream = container.streams.video[0]\n        if self.cfg.keyframes_only:\n            stream.codec_context.skip_frame = \"NONKEY\"\n        prev_time = -10\n        for frame_indx, frame in enumerate(container.decode(stream)):\n            # skip frames if keyframes_only is True\n            time_diff = frame.time - prev_time\n            self.stats[\"total\"] += 1\n            if time_diff &lt; self.cfg.min_frame_interval_sec:\n                continue\n            prev_time = frame.time\n\n            frame_pil: Image = frame.to_image()\n            if self.cfg.debug:\n                buf = self.frame_buffer.get_buffer_state()\n                console.print(\n                    f\"Frame {frame_indx}\\ttime: {frame.time}\",\n                    f\"\\t Buffer ({len(buf)}): {buf}\",\n                    style=f\"bold {Color.green.value}\",\n                )\n            frame_meta = {\"frame_time\": frame.time, \"frame_indx\": frame_indx}\n            self.stats[\"decoded\"] += 1\n            if res := self.frame_buffer.add(\n                frame_pil,\n                metadata=frame_meta,\n            ):\n                gated_obj = self.gate(*res)\n                self.stats[\"produced\"] += 1\n                self.stats[\"gated\"] += gated_obj.N\n                if gated_obj.frames:\n                    yield gated_obj.frames\n\n    # flush buffer\n    yield from self.flush_buffer()\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.Worker","title":"<code>Worker</code>","text":"Source code in <code>video_sampler/sampler.py</code> <pre><code>class Worker:\n    def __init__(\n        self,\n        cfg: SamplerConfig,\n        devnull: bool = False,\n        processor_cls: VideoSampler = VideoSampler,\n        extra_processor_args: dict = None,\n    ) -&gt; None:\n        if extra_processor_args is None:\n            extra_processor_args = {}\n        self.cfg = cfg\n        self.processor = processor_cls(cfg=cfg, **extra_processor_args)\n        self.q = Queue()\n        self.devnull = devnull\n\n    def launch(\n        self, video_path: str, output_path: str = \"\", pretty_video_name: str = \"\"\n    ) -&gt; None:\n        \"\"\"\n        Launch the worker.\n\n        Args:\n            video_path (str): Path to the video file.\n            output_path (str, optional): Path to the output folder. Defaults to \"\".\n            pretty_video_name (str, optional): Name of the video file for pretty printing (useful for urls).\n                                                Defaults to \"\".\n        \"\"\"\n        if not pretty_video_name:\n            pretty_video_name = os.path.basename(video_path)\n        if output_path and self.devnull:\n            raise ValueError(\"Cannot write to disk when devnull is True\")\n        if output_path:\n            os.makedirs(output_path, exist_ok=True)\n        proc_thread = Thread(\n            target=self.processor.write_queue, args=(video_path, self.q)\n        )\n        proc_thread.start()\n        self.queue_reader(output_path, read_interval=self.cfg.queue_wait)\n        proc_thread.join()\n        if self.cfg.print_stats:\n            console.print(\n                f\"Stats for: {pretty_video_name}\",\n                f\"\\n\\tTotal frames: {self.processor.stats['total']}\",\n                f\"\\n\\tDecoded frames: {self.processor.stats['decoded']}\",\n                f\"\\n\\tProduced frames: {self.processor.stats['produced']}\",\n                f\"\\n\\tGated frames: {self.processor.stats['gated']}\",\n                style=f\"bold {Color.magenta.value}\",\n            )\n\n    def queue_reader(self, output_path, read_interval=0.1) -&gt; None:\n        \"\"\"\n        Reads frames from the queue and saves them as JPEG images.\n\n        Args:\n            output_path (str): The directory path where the frames will be saved.\n            read_interval (float, optional): The time interval between reading frames from the queue.\n                    Defaults to 0.1 seconds.\n        \"\"\"\n        while True:\n            if not self.q.empty():\n                frame_object: FrameObject\n                for frame_object in self.q.get():\n                    if frame_object.metadata.get(\"end\", False):\n                        return\n                    if frame_object.frame is not None and (\n                        not self.devnull and isinstance(frame_object.frame, Image.Image)\n                    ):\n                        frame_object.frame.save(\n                            os.path.join(\n                                output_path,\n                                f\"{frame_object.metadata['frame_time']}.jpg\",\n                            )\n                        )\n            time.sleep(read_interval)\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.Worker.launch","title":"<code>launch(video_path, output_path='', pretty_video_name='')</code>","text":"<p>Launch the worker.</p> <p>Parameters:</p> Name Type Description Default <code>video_path</code> <code>str</code> <p>Path to the video file.</p> required <code>output_path</code> <code>str</code> <p>Path to the output folder. Defaults to \"\".</p> <code>''</code> <code>pretty_video_name</code> <code>str</code> <p>Name of the video file for pretty printing (useful for urls).                                 Defaults to \"\".</p> <code>''</code> Source code in <code>video_sampler/sampler.py</code> <pre><code>def launch(\n    self, video_path: str, output_path: str = \"\", pretty_video_name: str = \"\"\n) -&gt; None:\n    \"\"\"\n    Launch the worker.\n\n    Args:\n        video_path (str): Path to the video file.\n        output_path (str, optional): Path to the output folder. Defaults to \"\".\n        pretty_video_name (str, optional): Name of the video file for pretty printing (useful for urls).\n                                            Defaults to \"\".\n    \"\"\"\n    if not pretty_video_name:\n        pretty_video_name = os.path.basename(video_path)\n    if output_path and self.devnull:\n        raise ValueError(\"Cannot write to disk when devnull is True\")\n    if output_path:\n        os.makedirs(output_path, exist_ok=True)\n    proc_thread = Thread(\n        target=self.processor.write_queue, args=(video_path, self.q)\n    )\n    proc_thread.start()\n    self.queue_reader(output_path, read_interval=self.cfg.queue_wait)\n    proc_thread.join()\n    if self.cfg.print_stats:\n        console.print(\n            f\"Stats for: {pretty_video_name}\",\n            f\"\\n\\tTotal frames: {self.processor.stats['total']}\",\n            f\"\\n\\tDecoded frames: {self.processor.stats['decoded']}\",\n            f\"\\n\\tProduced frames: {self.processor.stats['produced']}\",\n            f\"\\n\\tGated frames: {self.processor.stats['gated']}\",\n            style=f\"bold {Color.magenta.value}\",\n        )\n</code></pre>"},{"location":"reference/video_sampler/sampler/#video_sampler.sampler.Worker.queue_reader","title":"<code>queue_reader(output_path, read_interval=0.1)</code>","text":"<p>Reads frames from the queue and saves them as JPEG images.</p> <p>Parameters:</p> Name Type Description Default <code>output_path</code> <code>str</code> <p>The directory path where the frames will be saved.</p> required <code>read_interval</code> <code>float</code> <p>The time interval between reading frames from the queue.     Defaults to 0.1 seconds.</p> <code>0.1</code> Source code in <code>video_sampler/sampler.py</code> <pre><code>def queue_reader(self, output_path, read_interval=0.1) -&gt; None:\n    \"\"\"\n    Reads frames from the queue and saves them as JPEG images.\n\n    Args:\n        output_path (str): The directory path where the frames will be saved.\n        read_interval (float, optional): The time interval between reading frames from the queue.\n                Defaults to 0.1 seconds.\n    \"\"\"\n    while True:\n        if not self.q.empty():\n            frame_object: FrameObject\n            for frame_object in self.q.get():\n                if frame_object.metadata.get(\"end\", False):\n                    return\n                if frame_object.frame is not None and (\n                    not self.devnull and isinstance(frame_object.frame, Image.Image)\n                ):\n                    frame_object.frame.save(\n                        os.path.join(\n                            output_path,\n                            f\"{frame_object.metadata['frame_time']}.jpg\",\n                        )\n                    )\n        time.sleep(read_interval)\n</code></pre>"},{"location":"reference/video_sampler/schemas/","title":"Schemas","text":""},{"location":"reference/video_sampler/ttl_counter/","title":"Ttl counter","text":""},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter","title":"<code>TTLCounter</code>","text":"<p>TTLCounter is a counter/list that expires items after a TTL period expires.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>class TTLCounter:\n    \"\"\"TTLCounter is a counter/list that expires items after a TTL period expires.\"\"\"\n\n    def __init__(self, max_ttl: int) -&gt; None:\n        self.inner_counter = []\n        self.max_ttl = max_ttl\n\n    def __len__(self):\n        \"\"\"Return the number of items in the counter.\"\"\"\n        return len(self.inner_counter)\n\n    def add_item(self, hash: str):\n        \"\"\"Add an item with the max TTL.\"\"\"\n        heapq.heappush(self.inner_counter, (self.max_ttl, hash))\n\n    def tick(self):\n        \"\"\"Decrease the TTL of all items by 1.\"\"\"\n        for i, (ttl, hash) in enumerate(self.inner_counter):\n            self.inner_counter[i] = (ttl - 1, hash)\n\n    def expire_one(self):\n        \"\"\"Expire the first item if its TTL is 0. Expires AT MOST one item.\"\"\"\n        # peek the first item\n        ttl, hash = self.inner_counter[0]\n        if ttl &lt;= 0:\n            heapq.heappop(self.inner_counter)\n            return hash\n        return None\n\n    def expire_all(self):\n        \"\"\"Expire all items.\"\"\"\n        for _, hash in self.inner_counter:\n            yield hash\n        self.inner_counter.clear()\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.__len__","title":"<code>__len__()</code>","text":"<p>Return the number of items in the counter.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def __len__(self):\n    \"\"\"Return the number of items in the counter.\"\"\"\n    return len(self.inner_counter)\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.add_item","title":"<code>add_item(hash)</code>","text":"<p>Add an item with the max TTL.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def add_item(self, hash: str):\n    \"\"\"Add an item with the max TTL.\"\"\"\n    heapq.heappush(self.inner_counter, (self.max_ttl, hash))\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.expire_all","title":"<code>expire_all()</code>","text":"<p>Expire all items.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def expire_all(self):\n    \"\"\"Expire all items.\"\"\"\n    for _, hash in self.inner_counter:\n        yield hash\n    self.inner_counter.clear()\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.expire_one","title":"<code>expire_one()</code>","text":"<p>Expire the first item if its TTL is 0. Expires AT MOST one item.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def expire_one(self):\n    \"\"\"Expire the first item if its TTL is 0. Expires AT MOST one item.\"\"\"\n    # peek the first item\n    ttl, hash = self.inner_counter[0]\n    if ttl &lt;= 0:\n        heapq.heappop(self.inner_counter)\n        return hash\n    return None\n</code></pre>"},{"location":"reference/video_sampler/ttl_counter/#video_sampler.ttl_counter.TTLCounter.tick","title":"<code>tick()</code>","text":"<p>Decrease the TTL of all items by 1.</p> Source code in <code>video_sampler/ttl_counter.py</code> <pre><code>def tick(self):\n    \"\"\"Decrease the TTL of all items by 1.\"\"\"\n    for i, (ttl, hash) in enumerate(self.inner_counter):\n        self.inner_counter[i] = (ttl - 1, hash)\n</code></pre>"},{"location":"reference/video_sampler/utils/","title":"Utils","text":""},{"location":"reference/video_sampler/utils/#video_sampler.utils.batched","title":"<code>batched(iterable, n)</code>","text":"<p>Batch data into tuples of length n. The last batch may be shorter. from https://docs.python.org/3/library/itertools.html#itertools-recipes</p> Source code in <code>video_sampler/utils.py</code> <pre><code>def batched(iterable, n):\n    \"\"\"\n    Batch data into tuples of length n. The last batch may be shorter.\n    from https://docs.python.org/3/library/itertools.html#itertools-recipes\n    \"\"\"\n    if n &lt; 1:\n        raise ValueError(\"n must be at least one\")\n    it = iter(iterable)\n    while batch := tuple(islice(it, n)):\n        yield batch\n</code></pre>"},{"location":"reference/video_sampler/utils/#video_sampler.utils.slugify","title":"<code>slugify(value, allow_unicode=False)</code>","text":"<p>Taken from https://github.com/django/django/blob/master/django/utils/text.py Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated dashes to single dashes. Remove characters that aren't alphanumerics, underscores, or hyphens. Convert to lowercase. Also strip leading and trailing whitespace, dashes, and underscores.</p> Source code in <code>video_sampler/utils.py</code> <pre><code>def slugify(value, allow_unicode=False):\n    \"\"\"\n    Taken from https://github.com/django/django/blob/master/django/utils/text.py\n    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated\n    dashes to single dashes. Remove characters that aren't alphanumerics,\n    underscores, or hyphens. Convert to lowercase. Also strip leading and\n    trailing whitespace, dashes, and underscores.\n    \"\"\"\n    value = str(value)\n    if allow_unicode:\n        value = unicodedata.normalize(\"NFKC\", value)\n    else:\n        value = (\n            unicodedata.normalize(\"NFKD\", value)\n            .encode(\"ascii\", \"ignore\")\n            .decode(\"ascii\")\n        )\n    value = re.sub(r\"[^\\w\\s-]\", \"\", value.lower())\n    return re.sub(r\"[-\\s]+\", \"-\", value).strip(\"-_\")\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/","title":"Integrations","text":""},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin","title":"<code>YTDLPPlugin</code>","text":"<p>A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.</p> <p>Methods:</p> Name Description <code>generate_urls</code> <p>Generates URLs and corresponding titles from the given URL.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>class YTDLPPlugin:\n    \"\"\"\n    A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.\n\n    Methods:\n        generate_urls(url, extra_yt_constr_args=None, extra_info_extract_opts=None) -&gt; Iterable[str]:\n            Generates URLs and corresponding titles from the given URL.\n\n    \"\"\"\n\n    def __init__(self, ie_key: str = \"Generic\"):\n        \"\"\"\n        Initialize the YTDLPPlugin instance.\n        \"\"\"\n        self.ie_key = ie_key\n        self.ydl_opts = {\n            \"format\": best_video_only,\n        }\n\n    def generate_urls(\n        self,\n        url: str,\n        extra_info_extract_opts: dict = None,\n    ) -&gt; Iterable[str]:\n        \"\"\"Generate URLs and download subtitles for a given video URL.\n\n        Args:\n            url (str): The URL of the video to download subtitles for.\n            extra_info_extract_opts (dict, optional): Additional options for extracting video information.\n\n        Yields:\n            tuple: A tuple containing the video title, video format URL, and downloaded subtitles.\n        \"\"\"\n        if extra_info_extract_opts is None:\n            extra_info_extract_opts = {}\n        extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n        with YoutubeDL(params=(self.ydl_opts | extra_info_extract_opts)) as ydl:\n            info = ydl.extract_info(url, download=False, **extr_args)\n            if \"entries\" not in info:\n                req_format = info[\"requested_formats\"][0]\n                yield info[\"title\"], req_format[\"url\"]\n            else:\n                for entry in info.get(\"entries\", []):\n                    req_format = entry[\"requested_formats\"][0]\n                    yield entry[\"title\"], req_format[\"url\"]\n\n    def get_subtitles_opts(self, no_download: bool = False) -&gt; dict:\n        return {\n            \"postprocessors\": [\n                {\n                    \"format\": \"srt\",\n                    \"key\": \"FFmpegSubtitlesConvertor\",\n                    \"when\": \"before_dl\",\n                }\n            ],\n            \"format\": best_video_only,\n            \"subtitleslangs\": [\"en.*\"],\n            \"writeautomaticsub\": True,\n            \"writesubtitles\": True,\n        }\n\n    def generate_urls_by_subs(\n        self,\n        url: str,\n        extra_info_extract_opts: dict = None,\n    ):\n        \"\"\"Generate URLs and download subtitles for a given video URL.\n\n        Args:\n            url (str): The URL of the video to download subtitles for.\n            extra_info_extract_opts (dict, optional): Additional options for extracting video information.\n\n        Yields:\n            tuple: A tuple containing the video title, video format URL, and downloaded subtitles.\n        \"\"\"\n        if extra_info_extract_opts is None:\n            extra_info_extract_opts = {}\n        extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n        with YoutubeDL(\n            params=(self.ydl_opts | extra_info_extract_opts | self.get_subtitles_opts())\n        ) as ydl:\n            info = ydl.extract_info(url, download=False, **extr_args)\n            import json\n\n            json.dump(info, open(\"info.json\", \"w\"))\n            if \"entries\" not in info:\n                req_subs = list(info[\"requested_subtitles\"].values())[0]\n                req_format = info[\"requested_formats\"][0]\n                yield info[\"title\"], req_format[\"url\"], download_sub(req_subs[\"url\"])\n            else:\n                for entry in info.get(\"entries\", []):\n                    req_format = entry[\"requested_formats\"][0]\n                    req_subs = list(entry[\"requested_subtitles\"].values())[0]\n                    yield entry[\"title\"], req_format[\"url\"], download_sub(\n                        req_subs[\"url\"]\n                    )\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.__init__","title":"<code>__init__(ie_key='Generic')</code>","text":"<p>Initialize the YTDLPPlugin instance.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def __init__(self, ie_key: str = \"Generic\"):\n    \"\"\"\n    Initialize the YTDLPPlugin instance.\n    \"\"\"\n    self.ie_key = ie_key\n    self.ydl_opts = {\n        \"format\": best_video_only,\n    }\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls","title":"<code>generate_urls(url, extra_info_extract_opts=None)</code>","text":"<p>Generate URLs and download subtitles for a given video URL.</p> <p>Parameters:</p> Name Type Description Default <code>url</code> <code>str</code> <p>The URL of the video to download subtitles for.</p> required <code>extra_info_extract_opts</code> <code>dict</code> <p>Additional options for extracting video information.</p> <code>None</code> <p>Yields:</p> Name Type Description <code>tuple</code> <code>Iterable[str]</code> <p>A tuple containing the video title, video format URL, and downloaded subtitles.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def generate_urls(\n    self,\n    url: str,\n    extra_info_extract_opts: dict = None,\n) -&gt; Iterable[str]:\n    \"\"\"Generate URLs and download subtitles for a given video URL.\n\n    Args:\n        url (str): The URL of the video to download subtitles for.\n        extra_info_extract_opts (dict, optional): Additional options for extracting video information.\n\n    Yields:\n        tuple: A tuple containing the video title, video format URL, and downloaded subtitles.\n    \"\"\"\n    if extra_info_extract_opts is None:\n        extra_info_extract_opts = {}\n    extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n    with YoutubeDL(params=(self.ydl_opts | extra_info_extract_opts)) as ydl:\n        info = ydl.extract_info(url, download=False, **extr_args)\n        if \"entries\" not in info:\n            req_format = info[\"requested_formats\"][0]\n            yield info[\"title\"], req_format[\"url\"]\n        else:\n            for entry in info.get(\"entries\", []):\n                req_format = entry[\"requested_formats\"][0]\n                yield entry[\"title\"], req_format[\"url\"]\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.YTDLPPlugin.generate_urls_by_subs","title":"<code>generate_urls_by_subs(url, extra_info_extract_opts=None)</code>","text":"<p>Generate URLs and download subtitles for a given video URL.</p> <p>Parameters:</p> Name Type Description Default <code>url</code> <code>str</code> <p>The URL of the video to download subtitles for.</p> required <code>extra_info_extract_opts</code> <code>dict</code> <p>Additional options for extracting video information.</p> <code>None</code> <p>Yields:</p> Name Type Description <code>tuple</code> <p>A tuple containing the video title, video format URL, and downloaded subtitles.</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def generate_urls_by_subs(\n    self,\n    url: str,\n    extra_info_extract_opts: dict = None,\n):\n    \"\"\"Generate URLs and download subtitles for a given video URL.\n\n    Args:\n        url (str): The URL of the video to download subtitles for.\n        extra_info_extract_opts (dict, optional): Additional options for extracting video information.\n\n    Yields:\n        tuple: A tuple containing the video title, video format URL, and downloaded subtitles.\n    \"\"\"\n    if extra_info_extract_opts is None:\n        extra_info_extract_opts = {}\n    extr_args = {\"ie_key\": self.ie_key} if \"ytsearch\" not in url else {}\n    with YoutubeDL(\n        params=(self.ydl_opts | extra_info_extract_opts | self.get_subtitles_opts())\n    ) as ydl:\n        info = ydl.extract_info(url, download=False, **extr_args)\n        import json\n\n        json.dump(info, open(\"info.json\", \"w\"))\n        if \"entries\" not in info:\n            req_subs = list(info[\"requested_subtitles\"].values())[0]\n            req_format = info[\"requested_formats\"][0]\n            yield info[\"title\"], req_format[\"url\"], download_sub(req_subs[\"url\"])\n        else:\n            for entry in info.get(\"entries\", []):\n                req_format = entry[\"requested_formats\"][0]\n                req_subs = list(entry[\"requested_subtitles\"].values())[0]\n                yield entry[\"title\"], req_format[\"url\"], download_sub(\n                    req_subs[\"url\"]\n                )\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.best_video_best_audio","title":"<code>best_video_best_audio(ctx)</code>","text":"<p>Taken from the yt-dlp documentation as-is</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def best_video_best_audio(ctx):\n    \"\"\"Taken from the yt-dlp documentation as-is\"\"\"\n    \"\"\"Select the best video and the best audio that won't result in an mkv.\n    NOTE: This is just an example and does not handle all cases\"\"\"\n\n    # formats are already sorted worst to best\n    formats = ctx.get(\"formats\")[::-1]\n\n    # acodec='none' means there is no audio\n    best_video = next(\n        f for f in formats if f[\"vcodec\"] != \"none\" and f[\"acodec\"] == \"none\"\n    )\n\n    # find compatible audio extension\n    audio_ext = {\"mp4\": \"m4a\", \"webm\": \"webm\"}[best_video[\"ext\"]]\n    # vcodec='none' means there is no video\n    best_audio = next(\n        f\n        for f in formats\n        if (f[\"acodec\"] != \"none\" and f[\"vcodec\"] == \"none\" and f[\"ext\"] == audio_ext)\n    )\n\n    # These are the minimum required fields for a merged format\n    yield {\n        \"format_id\": f'{best_video[\"format_id\"]}+{best_audio[\"format_id\"]}',\n        \"ext\": best_video[\"ext\"],\n        \"requested_formats\": [best_video, best_audio],\n        # Must be + separated list of protocols\n        \"protocol\": f'{best_video[\"protocol\"]}+{best_audio[\"protocol\"]}',\n    }\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.best_video_only","title":"<code>best_video_only(ctx)</code>","text":"<p>Just best video -- save bandwidth</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def best_video_only(ctx):\n    \"\"\"Just best video -- save bandwidth\"\"\"\n    # formats are already sorted worst to best\n    formats = ctx.get(\"formats\")[::-1]\n\n    # acodec='none' means there is no audio\n    best_video = next(f for f in formats if f[\"vcodec\"] != \"none\")\n    # These are the minimum required fields for a merged format\n    yield {\n        \"format_id\": f'{best_video[\"format_id\"]}',\n        \"ext\": best_video[\"ext\"],\n        \"requested_formats\": [best_video],\n        # Must be + separated list of protocols\n        \"protocol\": f'{best_video[\"protocol\"]}',\n    }\n</code></pre>"},{"location":"reference/video_sampler/integrations/yt_dlp_plugin/#video_sampler.integrations.yt_dlp_plugin.no_shorts","title":"<code>no_shorts(info, *, incomplete)</code>","text":"<p>Filter out short videos</p> Source code in <code>video_sampler/integrations/yt_dlp_plugin.py</code> <pre><code>def no_shorts(info, *, incomplete):\n    \"\"\"Filter out short videos\"\"\"\n    if url := info.get(\"url\", \"\"):\n        if \"/shorts\" in url:\n            return \"This is a short video\"\n</code></pre>"},{"location":"reference/video_sampler/language/keyword_capture/","title":"Language","text":""},{"location":"reference/video_sampler/language/keyword_capture/#video_sampler.language.keyword_capture.download_sub","title":"<code>download_sub(sub_url)</code>","text":"<p>Download a VTT subtitle file to a string.</p> Source code in <code>video_sampler/language/keyword_capture.py</code> <pre><code>def download_sub(sub_url: str):\n    \"\"\"Download a VTT subtitle file to a string.\"\"\"\n    response = requests.get(url=sub_url)\n    return parse_srt_subtitle(response.text)\n</code></pre>"},{"location":"reference/video_sampler/visualisation/clustering/","title":"Visualisation","text":""},{"location":"reference/video_sampler/visualisation/clustering/#video_sampler.visualisation.clustering.build_feature_model","title":"<code>build_feature_model(model_str)</code>","text":"<p>Build a feature extraction model.</p> <p>Parameters:</p> Name Type Description Default <code>model_str</code> <code>str</code> <p>Model name.</p> required <p>Returns:</p> Name Type Description <code>tuple</code> <p>Tuple of (model, extractor).</p> Source code in <code>video_sampler/visualisation/clustering.py</code> <pre><code>def build_feature_model(model_str: str):\n    \"\"\"Build a feature extraction model.\n\n    Args:\n        model_str (str): Model name.\n\n    Returns:\n        tuple: Tuple of (model, extractor).\n    \"\"\"\n    extractor = AutoFeatureExtractor.from_pretrained(model_str)\n    model = ResNetModel.from_pretrained(model_str)\n    return model, extractor\n</code></pre>"},{"location":"reference/video_sampler/visualisation/clustering/#video_sampler.visualisation.clustering.cluster_features","title":"<code>cluster_features(features, max_clusters=50)</code>","text":"<p>Cluster features using t-SNE and KMeans</p> <p>Parameters:</p> Name Type Description Default <code>features</code> <code>ndarray</code> <p>dict with keys \"embeds\" and \"paths\"</p> required <code>max_clusters</code> <code>int</code> <p>maximum number of clusters</p> <code>50</code> Retruns <p>tuple: of (X, cluster_labels)</p> Source code in <code>video_sampler/visualisation/clustering.py</code> <pre><code>def cluster_features(\n    features,\n    max_clusters=50,\n):\n    \"\"\"Cluster features using t-SNE and KMeans\n\n    Args:\n        features (np.ndarray): dict with keys \"embeds\" and \"paths\"\n        max_clusters (int): maximum number of clusters\n\n    Retruns:\n      tuple: of (X, cluster_labels)\n    \"\"\"\n    proj = TSNE(n_components=2, perplexity=35, metric=\"cosine\")\n    Xorg = np.asarray(features[\"embeds\"])\n    X = proj.fit_transform(Xorg)\n\n    # take about 10% of the frame as the number of clusters\n    n_clusters = min(int(0.1 * len(features[\"embeds\"])), max_clusters)\n    cluster_model = KMeans(n_clusters=n_clusters, random_state=0).fit(Xorg)\n    return X, cluster_model.labels_\n</code></pre>"},{"location":"reference/video_sampler/visualisation/clustering/#video_sampler.visualisation.clustering.extract_features","title":"<code>extract_features(model_str, image_folder, mkey='pixel_values', batch_size=8)</code>","text":"<p>Extract features from a folder of images.</p> <p>Parameters:</p> Name Type Description Default <code>model_str</code> <code>str</code> <p>Model name.</p> required <code>image_folder</code> <code>Path</code> <p>Folder with images.</p> required <code>mkey</code> <code>str</code> <p>Key for the pixel values. Defaults to \"pixel_values\".</p> <code>'pixel_values'</code> <code>batch_size</code> <code>int</code> <p>Batch size. Defaults to 8.</p> <code>8</code> <p>Returns:</p> Name Type Description <code>dict</code> <p>Dictionary with keys \"embeds\" and \"paths\".</p> Source code in <code>video_sampler/visualisation/clustering.py</code> <pre><code>def extract_features(\n    model_str: str, image_folder: Path, mkey=\"pixel_values\", batch_size: int = 8\n):\n    \"\"\"Extract features from a folder of images.\n\n    Args:\n        model_str (str): Model name.\n        image_folder (Path): Folder with images.\n        mkey (str, optional): Key for the pixel values. Defaults to \"pixel_values\".\n        batch_size (int, optional): Batch size. Defaults to 8.\n\n    Returns:\n        dict: Dictionary with keys \"embeds\" and \"paths\".\n    \"\"\"\n\n    out_features = defaultdict(list)\n    model, extractor = build_feature_model(model_str)\n    with torch.no_grad():\n        all_files = list(image_folder.iterdir())\n        for batch in tqdm(\n            batched(all_files, batch_size), total=len(all_files) // batch_size\n        ):\n            # load images\n            batch_imgs = [Image.open(img_path).convert(\"RGB\") for img_path in batch]\n            # extract features\n            batch_imgs = extractor(batch_imgs, return_tensors=\"pt\")[mkey]\n            batch_features = model(batch_imgs).pooler_output.squeeze()\n            if len(batch) == 1:\n                batch_features = batch_features.expand(1, -1)\n            batch_features = torch.functional.F.normalize(batch_features, p=2, dim=1)\n            out_features[\"embeds\"].extend(batch_features)\n            out_features[\"paths\"].extend([img_path.name for img_path in batch])\n    return out_features\n</code></pre>"}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 921c045..c64e4af 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ