Skip to content

Commit

Permalink
Remove create_view and aggregate from feature extraction analyzer (#3047
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jkppr authored Mar 14, 2024
1 parent d17f984 commit 169e6c3
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 42 deletions.
16 changes: 1 addition & 15 deletions data/regex_features.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# re_flags: []
# emojis: []
# tags: []
# create_view: False
# aggregate: False
# overwrite_store_as: True
# overwrite_and_merge_store_as: False
# store_type_list: False
Expand All @@ -36,13 +34,6 @@
# The field store_as defines the name of the attribute the feature is
# stored as.
#
# The create_view is an optional boolean that determines whether a view
# should be created if there are hits.
#
# The aggregate is an optional boolean that determines if we want to
# create an aggregation of the results and store it (ATM this does
# nothing, but once aggregations are supported it will).
#
# The overwrite_store_as is an optional boolean that determines if
# we want to overwrite the field store_as if it already exists.
#
Expand All @@ -59,8 +50,6 @@
# the regular expression is run against the attribute to extract a value.
# The first value extracted is then stored inside the "store_as" attribute.
# If there are emojis or tags defined they are also applied to that event.
# In the end, if a view is supposed to be created a view searching for
# the added tag is added (only if there are results).
# ------------------------------------------------------------------------

email_addresses:
Expand All @@ -69,15 +58,14 @@ email_addresses:
store_as: 'email_address'
re: '([a-zA-Z0-9_\.+\-]+@[a-zA-Z0-9\-]+\.[a-zA-Z0-9\-\.]+)'
re_flags: []
aggregate: True
tags: ['email-address']

gmail_accounts:
query_string: 'source_short:"WEBHIST" AND url:"mail.google.com"'
attribute: 'message'
store_as: 'found_account'
re: '[a-zA-Z0-9_\.+\-]+@(gmail|googlemail)\.com'
re_flags: []
aggregate: True
tags: ['gmail-account']
emojis: ['ID_BUTTON']

Expand All @@ -88,7 +76,6 @@ github_accounts:
store_as: 'found_account'
re: 'https://github.com/users/([A-z-\d]{1,39})'
re_flags: []
aggregate: True
tags: ['github-account']
emojis: ['ID_BUTTON']

Expand All @@ -99,7 +86,6 @@ linkedin_accounts:
attribute: 'url'
store_as: 'found_account'
re: 'https://www.linkedin.com/in/([A-z-\d]{5,32})/edit/'
aggregate: True
tags: ['linkedin-account']
emojis: ['ID_BUTTON']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,33 +298,6 @@ def extract_feature(self, name, config):
# Commit the event to the datastore.
event.commit()

aggregate_results = config.get("aggregate", False)
create_view = config.get("create_view", False)

# If aggregation is turned on, we automatically create an aggregation.
if aggregate_results:
create_view = True

if create_view and event_counter:
view = self.sketch.add_view(
name, self.NAME, query_string=query, query_dsl=query_dsl
)

if aggregate_results:
params = {
"field": store_as,
"limit": 20,
"index": [self.timeline_id],
}
self.sketch.add_aggregation(
name="Top 20 for: {0:s} [{1:s}]".format(store_as, name),
agg_name="field_bucket",
agg_params=params,
description="Created by the feature extraction analyzer",
view_id=view.id,
chart_type="hbarchart",
)

return "Feature extraction [{0:s}] extracted {1:d} features.".format(
name, event_counter
)
Expand Down

0 comments on commit 169e6c3

Please sign in to comment.