Skip to content

Commit

Permalink
Define minimum length per filter (#124)
Browse files Browse the repository at this point in the history
* A minimum length can now be defined per filter

* Move the default minimum length to a variable

* Added per filter min length to docs

* Global min_length can overwrite filter specific min_length

* Adhere to formatting spec

* Removed default min_length from tmux plugin

* Added default value for min_length in FilterDef

* Determine min_length value during creation of the filer
  • Loading branch information
LukasPietzschmann authored Jun 30, 2024
1 parent a50d382 commit b297d45
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 17 deletions.
2 changes: 1 addition & 1 deletion HELP.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ You can give feedback or star extrakto at https://github.com/laktak/extrakto
Extrakto uses fzf. You only need to type a few keys to find your selection with a fuzzy match.

- Press *ctrl-f* to change to the next filter mode (*filter_key*)
- *word*, the default filter allows you to select words (min length=5)
- *word*, the default filter allows you to select words (default min length=5)
- *all*, runs all filters and allows you select quotes, url, paths, etc. \
You can define your own filters as well.
- *line*, select full lines
Expand Down
15 changes: 8 additions & 7 deletions extrakto.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@

# define a section per filter
# each filter must have at least a regex containing one or more capture groups
# regex: a python regex expression
# enabled: is filter active (default True)
# in_all: is included in --all (default True)
# lstrip: characters to strip from left result
# rstrip: characters to strip from right result
# exclude: exclude result if matching
# alt2-9: alternate result (see url)
# regex: a python regex expression
# enabled: is filter active (default True)
# in_all: is included in --all (default True)
# lstrip: characters to strip from left result
# rstrip: characters to strip from right result
# exclude: exclude result if matching
# alt2-9: alternate result (see url)
# min_length: minimum length of the result (default 5)

[word]
# "words" consist of anything but the following characters:
Expand Down
32 changes: 25 additions & 7 deletions extrakto.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@
# and whitespace ( \t\n\r)
RE_WORD = "[^][(){}=$\u2500-\u27BF\uE000-\uF8FF \\t\\n\\r]+"

MIN_LENGTH_DEFAULT = 5


class ExtraktoException(Exception):
pass


class Extrakto:
def __init__(self, *, min_length=5, alt=False, prefix_name=False):
def __init__(self, *, min_length=None, alt=False, prefix_name=False):
conf = ConfigParser(interpolation=None)
default_conf = os.path.join(SCRIPT_DIR, "extrakto.conf")
user_conf = os.path.join(
Expand Down Expand Up @@ -71,6 +73,12 @@ def __init__(self, *, min_length=5, alt=False, prefix_name=False):
lstrip=sect.get("lstrip", ""),
rstrip=sect.get("rstrip", ""),
alt=alt,
# prefer global min_length, fallback to filter specific
min_length=(
self.min_length
if self.min_length is not None
else sect.getint("min_length", MIN_LENGTH_DEFAULT)
),
)

def __getitem__(self, key):
Expand All @@ -86,14 +94,26 @@ def keys(self):


class FilterDef:
def __init__(self, extrakto, name, *, regex, exclude, lstrip, rstrip, alt):
def __init__(
self,
extrakto,
name,
*,
regex,
exclude,
lstrip,
rstrip,
alt,
min_length=MIN_LENGTH_DEFAULT,
):
self.extrakto = extrakto
self.name = name
self.regex = regex
self.exclude = exclude
self.lstrip = lstrip
self.rstrip = rstrip
self.alt = alt
self.min_length = min_length

def filter(self, text):
res = list()
Expand All @@ -111,7 +131,7 @@ def filter(self, text):
if self.rstrip:
item = item.rstrip(self.rstrip)

if len(item) >= self.extrakto.min_length:
if len(item) >= self.min_length:
if not self.exclude or not re.search(self.exclude, item, re.I):
if self.extrakto.alt:
for i, altre in enumerate(self.alt):
Expand All @@ -122,7 +142,7 @@ def filter(self, text):
return res


def get_lines(text, *, min_length=5, prefix_name=False):
def get_lines(text, *, min_length=MIN_LENGTH_DEFAULT, prefix_name=False):
lines = []

for raw_line in text.splitlines():
Expand Down Expand Up @@ -209,9 +229,7 @@ def main(parser):

parser.add_argument("-r", "--reverse", action="store_true", help="reverse output")

parser.add_argument(
"-m", "--min-length", default=5, help="minimum token length", type=int
)
parser.add_argument("-m", "--min-length", help="minimum token length", type=int)

parser.add_argument(
"--warn-empty", action="store_true", help="warn if result is empty"
Expand Down
4 changes: 2 additions & 2 deletions extrakto_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def get_cap(mode, data):
run_list = []

if mode == "all":
extrakto = Extrakto(min_length=5, alt=True, prefix_name=True)
extrakto = Extrakto(alt=True, prefix_name=True)
run_list = extrakto.all()
elif mode == "line":
res += get_lines(data)
else:
extrakto = Extrakto(min_length=5)
extrakto = Extrakto()
run_list = [mode]

for name in run_list:
Expand Down

0 comments on commit b297d45

Please sign in to comment.