Skip to content

Commit

Permalink
TIM \ REGEX \ URLFormatter \ Allow parenthesis in the query part of t…
Browse files Browse the repository at this point in the history
…he URL (demisto#37562)

* URL REGEX FIX

* FormatURL changes

* Small fix

* test

* RN

* readme file

* RN

* RN

* Bump pack from version CommonScripts to 1.17.3.

* Bump pack from version CommonScripts to 1.17.4.

* Bump pack from version ApiModules to 2.2.32.

* Bump pack from version CommonScripts to 1.17.5.

* Bump pack from version ApiModules to 2.2.33.

---------

Co-authored-by: Content Bot <[email protected]>
  • Loading branch information
Ni-Knight and Content Bot authored Dec 11, 2024
1 parent bb56155 commit 8b5d28d
Show file tree
Hide file tree
Showing 13 changed files with 42 additions and 18 deletions.
4 changes: 4 additions & 0 deletions Packs/ApiModules/ReleaseNotes/2_2_33.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#### Scripts

##### FormatURLApiModule
Fixed an issue with nested parentheses in URLs causing it to return a wrong result.
24 changes: 12 additions & 12 deletions Packs/ApiModules/Scripts/FormatURLApiModule/FormatURLApiModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(self, original_url: str):
self.base = 0 # This attribute increases as the url is being parsed
self.output = ''

self.inside_brackets = False
self.inside_brackets = 0
self.opening_bracket = ''
self.port = False
self.query = False
Expand Down Expand Up @@ -244,14 +244,14 @@ def host_check(self):
elif self.modified_url[index] == "%" and not self.hex_check(index):
raise URLError(f"Invalid character {self.modified_url[index]} at position {index}")

elif self.modified_url[index] == ":" and not self.inside_brackets:
elif self.modified_url[index] == ":" and self.inside_brackets == 0:
# ":" are only allowed if host is ipv6 in which case inside_brackets equals True
if index == len(self.modified_url) - 1:
raise URLError(f"Invalid character {self.modified_url[index]} at position {index}")

elif index <= 4:
# This might be an IPv6 with no scheme
self.inside_brackets = True
self.inside_brackets += 1
self.output = f"[{self.output}" # Reading the bracket that was removed by the cleaner

else:
Expand All @@ -263,18 +263,18 @@ def host_check(self):
return # Going back to main to handle port part

elif self.modified_url[index] == "[":
if not self.inside_brackets and index == self.base:
if self.inside_brackets == 0 and index == self.base:
# if index==base we're at the first char of the host in which "[" is ok
self.output += self.modified_url[index]
index += 1
self.inside_brackets = True
self.inside_brackets += 1

else:
raise URLError(f"Invalid character {self.modified_url[index]} at position {index}")

elif self.modified_url[index] == "]":

if not self.inside_brackets:
if self.inside_brackets == 0:
if self.check_domain(host) and all(char in self.brackets for char in self.modified_url[index:]):
# Domain is valid with trailing "]" and brackets, the formatter will remove the extra chars
self.done = True
Expand All @@ -291,10 +291,10 @@ def host_check(self):
except ValueError:
raise URLError(f"Only IPv6 is allowed within square brackets, not {host}")

if self.inside_brackets and ip.version == 6:
if self.inside_brackets != 0 and ip.version == 6:
self.output += self.modified_url[index]
index += 1
self.inside_brackets = False
self.inside_brackets -= 1
break

raise URLError(f"Only IPv6 is allowed within square brackets, not {host}")
Expand Down Expand Up @@ -430,19 +430,19 @@ def check_valid_character(self, index: int) -> tuple[int, str]:
elif char in self.brackets:
# char is a type of bracket or quotation mark

if index == len(self.modified_url) - 1 and not self.inside_brackets:
if index == len(self.modified_url) - 1 and self.inside_brackets == 0:
# Edge case of a bracket or quote at the end of the URL but not part of it
return len(self.modified_url), part

elif self.inside_brackets and char == self.bracket_pairs[self.opening_bracket]:
elif self.inside_brackets != 0 and char == self.bracket_pairs.get(self.opening_bracket, ''):
# If the char is a closing bracket check that it matches the opening one.
self.inside_brackets = False
self.inside_brackets -= 1
part += char
index += 1

elif char in self.bracket_pairs:
# If the char is an opening bracket set `inside_brackets` flag to True
self.inside_brackets = True
self.inside_brackets += 1
self.opening_bracket = char
part += char
index += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
('[[https://www.test.com', 'https://www.test.com'), # disable-secrets-detection
('\'https://www.test.com/test\'', 'https://www.test.com/test'), # disable-secrets-detection
('\'https://www.test.com/?a=\'b\'\'', 'https://www.test.com/?a=\'b\''), # disable-secrets-detection
('https://www.test.com/?q=((A)%20and%20(B))', 'https://www.test.com/?q=((A) and (B))'), # disable-secrets-detection)
]

ATP_REDIRECTS = [
Expand Down
3 changes: 3 additions & 0 deletions Packs/ApiModules/Scripts/FormatURLApiModule/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# API Module: URL Formatting

This API module handles the URL formatting after auto-extraction in Cortex TIM.
2 changes: 1 addition & 1 deletion Packs/ApiModules/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "ApiModules",
"description": "API Modules",
"support": "xsoar",
"currentVersion": "2.2.32",
"currentVersion": "2.2.33",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
Expand Down
5 changes: 5 additions & 0 deletions Packs/CommonScripts/ReleaseNotes/1_17_5.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

#### Scripts

##### FormatURL
Fixed an issue where URLs were not parsed correctly when handling nested parenthesis in the URL query.
2 changes: 1 addition & 1 deletion Packs/CommonScripts/Scripts/FormatURL/FormatURL.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def main():
output = [{
'Type': entryTypes['note'],
'ContentsFormat': formats['json'],
'Contents': [urls],
'Contents': [urls.replace("==", "\\==")], # This is used to escape MD in XSOAR
'EntryContext': {'URL': urls},
} for urls in formatted_urls]

Expand Down
2 changes: 1 addition & 1 deletion Packs/CommonScripts/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Common Scripts",
"description": "Frequently used scripts pack.",
"support": "xsoar",
"currentVersion": "1.17.4",
"currentVersion": "1.17.5",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
Expand Down
2 changes: 1 addition & 1 deletion Packs/CommonTypes/IndicatorTypes/reputation-url.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"commitMessage": "",
"shouldPublish": false,
"shouldCommit": false,
"regex": "(?i)\\b(?:(?P<full_url>(?P<scheme>(?:https?|hxxps?|s?ftps?|meows?)(?:%3A|\\[?[:-]]?)(?://|\\\\|3A__)|mailto:)?(?P<host>(?P<auth>[\\p{L}][-_\\p{L}\\d\\[\\]]+(?::[\\p{L}][-_\\p{L}\\d\\[\\]]+)?@)?(?P<simple_domain>(?:[-\\d\\p{L}]+\\[?\\.\\]?)+(?P<tld>[\\p{L}][-\\p{L}\\d\\[\\]]+)[.]?)|(?P<IPv4>(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?P<IPv6>\\[?(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(?:\\])?))(?P<port>:[\\d]+)?(?P<path>[/*#?]+(?:[?\\d\\p{L}+&@'#%=~_\\/$!:,.;*–—―\\-|\\[\\]]*|[(][\\-\\d\\p{L}+&@#\\/%=~_$?!:,;*–—―|\\[\\]]*[)])*))|(?P<scheme_and_domain_only>(?:(?:https?|hxxps?|s?ftps?|meows?)\\[?[:-]]?(?://|\\\\|3A__)|mailto:)(?:(?:[\\p{L}][-_\\p{L}\\d\\[\\]]+@)?(?:(?:[-\\d\\p{L}]+\\[?\\.\\]?)+(?:[\\p{L}][-\\p{L}\\d\\[\\]]+)[.]?)|(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:\\[?(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(?:\\])?))))",
"regex": "(?i)\\b(?:(?P<full_url>(?P<scheme>(?:https?|hxxps?|s?ftps?|meows?)(?:%3A|\\[?[:-]]?)(?://|\\\\|3A__)|mailto:)?(?P<host>(?P<auth>[\\p{L}][-_\\p{L}\\d\\[\\]]+(?::[\\p{L}][-_\\p{L}\\d\\[\\]]+)?@)?(?P<simple_domain>(?:[-\\d\\p{L}]+\\[?\\.\\]?)+(?P<tld>[\\p{L}][-\\p{L}\\d\\[\\]]+)[.]?)|(?P<IPv4>(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?P<IPv6>\\[?(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(?:\\])?))(?P<port>:[\\d]+)?(?P<path>[/*#?]+(?:[?\\d\\p{L}+&@'#%=~_\\/$!:,.;*–—―\\-|\\[\\]()]*|[(][\\-\\d\\p{L}+&@#\\/%=~_$?!:,;*–—―|\\[\\]]*[)])*))|(?P<scheme_and_domain_only>(?:(?:https?|hxxps?|s?ftps?|meows?)\\[?[:-]]?(?://|\\\\|3A__)|mailto:)(?:(?:[\\p{L}][-_\\p{L}\\d\\[\\]]+@)?(?:(?:[-\\d\\p{L}]+\\[?\\.\\]?)+(?:[\\p{L}][-\\p{L}\\d\\[\\]]+)[.]?)|(?:(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:\\[?(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(?:\\])?))))",
"details": "URL",
"prevDetails": "URL",
"reputationScriptName": "",
Expand Down
5 changes: 5 additions & 0 deletions Packs/CommonTypes/ReleaseNotes/3_6_1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

#### Indicator Types

##### URL
Fixed an issue with the URL regex not allowing parenthesis in the query part of it.
2 changes: 1 addition & 1 deletion Packs/CommonTypes/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Common Types",
"description": "This Content Pack will get you up and running in no-time and provide you with the most commonly used incident & indicator fields and types.",
"support": "xsoar",
"currentVersion": "3.6.0",
"currentVersion": "3.6.1",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
Expand Down
6 changes: 6 additions & 0 deletions Packs/Phishing/ReleaseNotes/3_6_28.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

#### Scripts

##### FindDuplicateEmailIncidents
Updated the FormatURLApiModule.

2 changes: 1 addition & 1 deletion Packs/Phishing/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Phishing",
"description": "Phishing emails still hooking your end users? This Content Pack can drastically reduce the time your security team spends on phishing alerts.",
"support": "xsoar",
"currentVersion": "3.6.27",
"currentVersion": "3.6.28",
"serverMinVersion": "6.0.0",
"videos": [
"https://www.youtube.com/watch?v=SY-3L348PoY"
Expand Down

0 comments on commit 8b5d28d

Please sign in to comment.