Define minimum length per filter (#124)

* A minimum length can now be defined per filter * Move the default minimum length to a variable * Added per filter min length to docs * Global min_length can overwrite filter specific min_length * Adhere to formatting spec * Removed default min_length from tmux plugin * Added default value for min_length in FilterDef * Determine min_length value during creation of the filer
laktak · Jun 30, 2024 · b297d45 · b297d45
1 parent a50d382
commit b297d45
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 17 deletions.
diff --git a/HELP.md b/HELP.md
@@ -5,7 +5,7 @@ You can give feedback or star extrakto at https://github.com/laktak/extrakto
 Extrakto uses fzf. You only need to type a few keys to find your selection with a fuzzy match.
 
 - Press *ctrl-f* to change to the next filter mode (*filter_key*)
-  - *word*, the default filter allows you to select words (min length=5)
+  - *word*, the default filter allows you to select words (default min length=5)
   - *all*, runs all filters and allows you select quotes, url, paths, etc. \
     You can define your own filters as well.
   - *line*, select full lines

diff --git a/extrakto.conf b/extrakto.conf
@@ -8,13 +8,14 @@
 
 # define a section per filter
 # each filter must have at least a regex containing one or more capture groups
-# regex:   a python regex expression
-# enabled: is filter active (default True)
-# in_all:  is included in --all (default True)
-# lstrip:  characters to strip from left result
-# rstrip:  characters to strip from right result
-# exclude: exclude result if matching
-# alt2-9:  alternate result (see url)
+# regex:      a python regex expression
+# enabled:    is filter active (default True)
+# in_all:     is included in --all (default True)
+# lstrip:     characters to strip from left result
+# rstrip:     characters to strip from right result
+# exclude:    exclude result if matching
+# alt2-9:     alternate result (see url)
+# min_length: minimum length of the result (default 5)
 
 [word]
 # "words" consist of anything but the following characters:

diff --git a/extrakto.py b/extrakto.py
@@ -23,13 +23,15 @@
 # and whitespace ( \t\n\r)
 RE_WORD = "[^][(){}=$\u2500-\u27BF\uE000-\uF8FF \\t\\n\\r]+"
 
+MIN_LENGTH_DEFAULT = 5
+
 
 class ExtraktoException(Exception):
     pass
 
 
 class Extrakto:
-    def __init__(self, *, min_length=5, alt=False, prefix_name=False):
+    def __init__(self, *, min_length=None, alt=False, prefix_name=False):
         conf = ConfigParser(interpolation=None)
         default_conf = os.path.join(SCRIPT_DIR, "extrakto.conf")
         user_conf = os.path.join(
@@ -71,6 +73,12 @@ def __init__(self, *, min_length=5, alt=False, prefix_name=False):
                     lstrip=sect.get("lstrip", ""),
                     rstrip=sect.get("rstrip", ""),
                     alt=alt,
+                    # prefer global min_length, fallback to filter specific
+                    min_length=(
+                        self.min_length
+                        if self.min_length is not None
+                        else sect.getint("min_length", MIN_LENGTH_DEFAULT)
+                    ),
                 )
 
     def __getitem__(self, key):
@@ -86,14 +94,26 @@ def keys(self):
 
 
 class FilterDef:
-    def __init__(self, extrakto, name, *, regex, exclude, lstrip, rstrip, alt):
+    def __init__(
+        self,
+        extrakto,
+        name,
+        *,
+        regex,
+        exclude,
+        lstrip,
+        rstrip,
+        alt,
+        min_length=MIN_LENGTH_DEFAULT,
+    ):
         self.extrakto = extrakto
         self.name = name
         self.regex = regex
         self.exclude = exclude
         self.lstrip = lstrip
         self.rstrip = rstrip
         self.alt = alt
+        self.min_length = min_length
 
     def filter(self, text):
         res = list()
@@ -111,7 +131,7 @@ def filter(self, text):
             if self.rstrip:
                 item = item.rstrip(self.rstrip)
 
-            if len(item) >= self.extrakto.min_length:
+            if len(item) >= self.min_length:
                 if not self.exclude or not re.search(self.exclude, item, re.I):
                     if self.extrakto.alt:
                         for i, altre in enumerate(self.alt):
@@ -122,7 +142,7 @@ def filter(self, text):
         return res
 
 
-def get_lines(text, *, min_length=5, prefix_name=False):
+def get_lines(text, *, min_length=MIN_LENGTH_DEFAULT, prefix_name=False):
     lines = []
 
     for raw_line in text.splitlines():
@@ -209,9 +229,7 @@ def main(parser):
 
     parser.add_argument("-r", "--reverse", action="store_true", help="reverse output")
 
-    parser.add_argument(
-        "-m", "--min-length", default=5, help="minimum token length", type=int
-    )
+    parser.add_argument("-m", "--min-length", help="minimum token length", type=int)
 
     parser.add_argument(
         "--warn-empty", action="store_true", help="warn if result is empty"

diff --git a/extrakto_plugin.py b/extrakto_plugin.py
@@ -78,12 +78,12 @@ def get_cap(mode, data):
     run_list = []
 
     if mode == "all":
-        extrakto = Extrakto(min_length=5, alt=True, prefix_name=True)
+        extrakto = Extrakto(alt=True, prefix_name=True)
         run_list = extrakto.all()
     elif mode == "line":
         res += get_lines(data)
     else:
-        extrakto = Extrakto(min_length=5)
+        extrakto = Extrakto()
         run_list = [mode]
 
     for name in run_list: