Skip to content

Commit

Permalink
Merge pull request #1229 from amadolid/bugfix/webtool
Browse files Browse the repository at this point in the history
[BUGFIX]: Webtool optional headers
  • Loading branch information
marsninja authored Oct 24, 2023
2 parents 279fe14 + b3db6cd commit b81dd03
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
6 changes: 4 additions & 2 deletions jaseci_core/jaseci/extens/act_lib/tests/fixtures/webtool.jac
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ walker get_meta_valid {
}

walker get_meta_need_auth {
has url = "https://docs.google.com/presentation/d/1lIYEuzzhZZ9PJaG_u3XgrFXX5Y6xd0zHV-aB2F8bXXU/edit";
has url = "https://github.com/settings/profile";
can webtool.get_page_meta;
report webtool.get_page_meta(url);
}
Expand All @@ -26,5 +26,7 @@ walker get_meta_timeout {
walker get_meta_need_header {
has url = "https://www.invaluable.com/blog/what-is-a-mandala/";
can webtool.get_page_meta;
report webtool.get_page_meta(url);
report webtool.get_page_meta(url, headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0"
});
}
6 changes: 2 additions & 4 deletions jaseci_core/jaseci/extens/act_lib/webtool.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@jaseci_action()
def get_page_meta(url: str, timeout: int = 3, parser: str = "lxml"):
def get_page_meta(url: str, timeout: int = 3, parser: str = "lxml", headers: dict = {}):
"""
Util to parse metadata out of urls and html documents
Parser option: lxml (default), html5lib, html.parser
Expand All @@ -15,9 +15,7 @@ def get_page_meta(url: str, timeout: int = 3, parser: str = "lxml"):
webpage = requests.get(
url,
timeout=timeout,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0"
},
headers=headers,
)
soup = BeautifulSoup(webpage.content, features=parser)
meta = soup.find_all("meta")
Expand Down

0 comments on commit b81dd03

Please sign in to comment.