From c3424c6778ddda6765a9e09be41cfbe50f71a92e Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Sun, 12 Jan 2025 18:46:53 -0800 Subject: [PATCH] [apps/browser] improve navigation to complex website with many actions and escape selectors --- apps/browser/app.py | 9 ++++++++- apps/browser/extract.js | 4 +++- apps/browser/siteuse.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/apps/browser/app.py b/apps/browser/app.py index f9a36837..096e78df 100644 --- a/apps/browser/app.py +++ b/apps/browser/app.py @@ -5,6 +5,7 @@ import hal9 as h9 import shutil import time +import re from sitefind import site_find from siteuse import site_use @@ -38,7 +39,13 @@ async def main(): await page.setUserAgent(custom_user_agent) prompt = h9.input() - site = site_find(prompt) + + original_url_match = re.search(r"https?://\S+|\b\S+\.com\b", prompt) + if original_url_match: + site = original_url_match.group(0) + prompt = prompt.replace(site, "", 1) + else: + site = site_find(prompt) await page.goto(site) elements = await extract_elements(page) diff --git a/apps/browser/extract.js b/apps/browser/extract.js index 4fa16f2e..871d6c93 100644 --- a/apps/browser/extract.js +++ b/apps/browser/extract.js @@ -35,7 +35,9 @@ let selector = currentElement.tagName.toLowerCase(); if (currentElement.className) { - selector += '.' + currentElement.className.trim().split(/\s+/).join('.'); + selector += '.' + currentElement.className.trim().split(/\s+/).map(e => { + return e.replace(/([:.[\]#,+~*'"()\\\/=])/g, '\\$1'); + }).join('.'); } path.unshift(selector); diff --git a/apps/browser/siteuse.py b/apps/browser/siteuse.py index 68975113..2255911f 100644 --- a/apps/browser/siteuse.py +++ b/apps/browser/siteuse.py @@ -47,7 +47,7 @@ def site_use(prompt, current, elements): """ } ] - completion = OpenAI().chat.completions.create(model = "gpt-4", messages = messages) + completion = OpenAI().chat.completions.create(model = "gpt-4-0125-preview", messages = messages) content = completion.choices[0].message.content extracted = h9.extract(content, language = "*") if not extracted or len(extracted) == 0: