From 7c924acd9a594b2c4d22c135326b456e3f5339bb Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 23 Oct 2024 23:37:23 -0400 Subject: [PATCH 1/4] Update UC Mode / CDP Mode --- seleniumbase/core/browser_launcher.py | 4 +--- seleniumbase/undetected/__init__.py | 7 +++++-- seleniumbase/undetected/cdp_driver/cdp_util.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index f92a0a7acf4..c176f9f77ea 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -525,9 +525,6 @@ def uc_open_with_cdp_mode(driver, url=None): js_utils.call_me_later(driver, script, 3) time.sleep(0.012) driver.close() - driver.clear_cdp_listeners() - driver.delete_all_cookies() - driver.delete_network_conditions() driver.disconnect() cdp_details = driver._get_cdp_details() @@ -546,6 +543,7 @@ def uc_open_with_cdp_mode(driver, url=None): cdp_util.start(host=cdp_host, port=cdp_port) ) page = loop.run_until_complete(driver.cdp_base.get(url)) + loop.run_until_complete(page.activate()) if not safe_url: time.sleep(constants.UC.CDP_MODE_OPEN_WAIT) cdp = types.SimpleNamespace() diff --git a/seleniumbase/undetected/__init__.py b/seleniumbase/undetected/__init__.py index f9b0b44845b..ecbdaa006b6 100644 --- a/seleniumbase/undetected/__init__.py +++ b/seleniumbase/undetected/__init__.py @@ -133,8 +133,11 @@ def __init__( options = ChromeOptions() try: if hasattr(options, "_session") and options._session is not None: - # Prevent reuse of options - raise RuntimeError("You cannot reuse the ChromeOptions object") + # Prevent reuse of options. + # (Probably a port overlap. Quit existing driver and continue.) + logger.debug("You cannot reuse the ChromeOptions object") + with suppress(Exception): + options._session.quit() except AttributeError: pass options._session = self diff --git a/seleniumbase/undetected/cdp_driver/cdp_util.py b/seleniumbase/undetected/cdp_driver/cdp_util.py index 98f80767714..1307343625a 100644 --- a/seleniumbase/undetected/cdp_driver/cdp_util.py +++ b/seleniumbase/undetected/cdp_driver/cdp_util.py @@ -37,6 +37,8 @@ async def start( Helper function to launch a browser. It accepts several keyword parameters. Conveniently, you can just call it bare (no parameters) to quickly launch an instance with best practice defaults. + Note: Due to a Chrome-130 bug, use start_async or start_sync instead. + (Calling this method directly could lead to an unresponsive browser) Note: New args are expected: Use kwargs only! Note: This should be called ``await start()`` :param user_data_dir: @@ -88,6 +90,20 @@ async def start( return await Browser.create(config) +async def start_async(*args, **kwargs) -> Browser: + headless = False + if "headless" in kwargs: + headless = kwargs["headless"] + decoy_args = kwargs + decoy_args["headless"] = True + driver = await start(**decoy_args) + kwargs["headless"] = headless + kwargs["user_data_dir"] = driver.config.user_data_dir + driver.stop() # Due to Chrome-130, must stop & start + time.sleep(0.15) + return await start(*args, **kwargs) + + def start_sync(*args, **kwargs) -> Browser: loop = asyncio.get_event_loop() headless = False From dd6659be868ca6ad1aa1662f7d6d6ed458ea0b81 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 23 Oct 2024 23:38:07 -0400 Subject: [PATCH 2/4] Update CDP Mode examples --- examples/cdp_mode/raw_async.py | 4 ++-- examples/cdp_mode/raw_footlocker.py | 10 +++++----- examples/cdp_mode/raw_hyatt.py | 2 +- examples/cdp_mode/raw_pokemon.py | 4 +++- examples/cdp_mode/raw_req_async.py | 2 +- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/cdp_mode/raw_async.py b/examples/cdp_mode/raw_async.py index 109d50378ff..865210de939 100644 --- a/examples/cdp_mode/raw_async.py +++ b/examples/cdp_mode/raw_async.py @@ -5,7 +5,7 @@ async def main(): - driver = await cdp_driver.cdp_util.start() + driver = await cdp_driver.cdp_util.start_async() page = await driver.get("https://www.priceline.com/") time.sleep(3) print(await page.evaluate("document.title")) @@ -21,7 +21,7 @@ async def main(): loop.run_until_complete(main()) # Call everything without using async / await - driver = loop.run_until_complete(cdp_driver.cdp_util.start()) + driver = cdp_driver.cdp_util.start_sync() page = loop.run_until_complete(driver.get("https://www.pokemon.com/us")) time.sleep(3) print(loop.run_until_complete(page.evaluate("document.title"))) diff --git a/examples/cdp_mode/raw_footlocker.py b/examples/cdp_mode/raw_footlocker.py index 0ee5876da66..33463b0cf2a 100644 --- a/examples/cdp_mode/raw_footlocker.py +++ b/examples/cdp_mode/raw_footlocker.py @@ -4,14 +4,14 @@ url = "https://www.footlocker.com/" sb.activate_cdp_mode(url) sb.sleep(3) - sb.cdp.click_if_visible("button#touAgreeBtn") - sb.sleep(1) + sb.cdp.click_if_visible('button[id*="Agree"]') + sb.sleep(1.5) + sb.cdp.mouse_click('input[aria-label="Search"]') + sb.sleep(1.5) search = "Nike Shoes" - sb.cdp.click('input[aria-label="Search"]') - sb.sleep(1) sb.cdp.press_keys('input[aria-label="Search"]', search) sb.sleep(2) - sb.cdp.click('ul[id*="typeahead"] li div') + sb.cdp.mouse_click('ul[id*="typeahead"] li div') sb.sleep(2) elements = sb.cdp.select_all("a.ProductCard-link") if elements: diff --git a/examples/cdp_mode/raw_hyatt.py b/examples/cdp_mode/raw_hyatt.py index d5ecbc1dfae..ecdf3fc4dd6 100644 --- a/examples/cdp_mode/raw_hyatt.py +++ b/examples/cdp_mode/raw_hyatt.py @@ -3,7 +3,7 @@ with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.hyatt.com/" sb.activate_cdp_mode(url) - sb.sleep(1) + sb.sleep(1.5) sb.cdp.click_if_visible('button[aria-label="Close"]') sb.sleep(0.5) sb.cdp.click('span:contains("Explore")') diff --git a/examples/cdp_mode/raw_pokemon.py b/examples/cdp_mode/raw_pokemon.py index 6c3eefed7f6..6706dcdb5fa 100644 --- a/examples/cdp_mode/raw_pokemon.py +++ b/examples/cdp_mode/raw_pokemon.py @@ -3,13 +3,15 @@ with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.pokemon.com/us" sb.activate_cdp_mode(url) - sb.sleep(1) + sb.sleep(1.5) sb.cdp.click_if_visible("button#onetrust-reject-all-handler") + sb.sleep(0.5) sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]') sb.sleep(1) sb.cdp.click('b:contains("Show Advanced Search")') sb.sleep(1) sb.cdp.click('span[data-type="type"][data-value="electric"]') + sb.sleep(0.5) sb.cdp.click("a#advSearch") sb.sleep(1) sb.cdp.click('img[src*="img/pokedex/detail/025.png"]') diff --git a/examples/cdp_mode/raw_req_async.py b/examples/cdp_mode/raw_req_async.py index 86ed3ed173e..ea2150fd90a 100644 --- a/examples/cdp_mode/raw_req_async.py +++ b/examples/cdp_mode/raw_req_async.py @@ -23,7 +23,7 @@ async def request_paused_handler(self, event, tab): ) async def start_test(self): - driver = await cdp_driver.cdp_util.start(incognito=True) + driver = await cdp_driver.cdp_util.start_async(incognito=True) tab = await driver.get("about:blank") tab.add_handler(mycdp.fetch.RequestPaused, self.request_paused_handler) url = "https://gettyimages.com/photos/firefly-2003-nathan" From 40dd528d5e30cea95c729e5a644ddc9b3b7b5e50 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 23 Oct 2024 23:38:28 -0400 Subject: [PATCH 3/4] Update the CDP Mode docs --- examples/cdp_mode/ReadMe.md | 50 +++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/examples/cdp_mode/ReadMe.md b/examples/cdp_mode/ReadMe.md index cf9c1a360c4..2c2315b5229 100644 --- a/examples/cdp_mode/ReadMe.md +++ b/examples/cdp_mode/ReadMe.md @@ -6,7 +6,7 @@ 👤 UC Mode avoids bot-detection by first disconnecting WebDriver from the browser at strategic times, calling special PyAutoGUI methods to bypass CAPTCHAs (as needed), and finally reconnecting the driver afterwards so that WebDriver actions can be performed again. Although this approach works for bypassing simple CAPTCHAs, more flexibility is needed for bypassing bot-detection on websites with advanced protection. (That's where CDP Mode comes in.) -🐙 CDP Mode is based on python-cdp, trio-cdp, and nodriver. trio-cdp was an early implementation of python-cdp, whereas nodriver is a modern implementation of python-cdp. (Refactored CDP code is imported from MyCDP.) +🐙 CDP Mode is based on python-cdp, trio-cdp, and nodriver. trio-cdp is an early implementation of python-cdp, and nodriver is a modern implementation of python-cdp. (Refactored Python-CDP code is imported from MyCDP.) 🐙 CDP Mode includes multiple updates to the above, such as: @@ -19,12 +19,41 @@ -------- -### 🐙 CDP Mode initialization: +### 🐙 CDP Mode usage: -* `sb.activate_cdp_mode(url)` +* **`sb.activate_cdp_mode(url)`** > (Call that from a **UC Mode** script) +That disconnects WebDriver from Chrome (which prevents detection), and gives you access to `sb.cdp` methods (which don't trigger anti-bot checks). + +### 🐙 Here are some common `sb.cdp` methods: + +* `sb.cdp.click(selector)` +* `sb.cdp.click_if_visible(selector)` +* `sb.cdp.type(selector, text)` +* `sb.cdp.press_keys(selector, text)` +* `sb.cdp.select_all(selector)` +* `sb.cdp.get_text(selector)` + +When `type()` is too fast, use the slower `press_keys()` to avoid detection. You can also use `sb.sleep(seconds)` to slow things down. + +To use WebDriver methods again, call: + +* **`sb.reconnect()`** or **`sb.connect()`** + +(Note that reconnecting allows anti-bots to detect you, so only reconnect if it is safe to do so.) + +To disconnect again, call: + +* **`sb.disconnect()`** + +While disconnected, if you accidentally call a WebDriver method, then SeleniumBase will attempt to use the CDP Mode version of that method (if available). For example, if you accidentally call `sb.click(selector)` instead of `sb.cdp.click(selector)`, then your WebDriver call will automatically be redirected to the CDP Mode version. Not all WebDriver methods have a matching CDP Mode method. In that scenario, calling a WebDriver method while disconnected could raise an error, or make WebDriver automatically reconnect first. + +To find out if WebDriver is connected or disconnected, call: + +* **`sb.is_connected()`** + -------- ### 🐙 CDP Mode examples: @@ -45,13 +74,15 @@ from seleniumbase import SB with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.pokemon.com/us" sb.activate_cdp_mode(url) - sb.sleep(1) + sb.sleep(1.5) sb.cdp.click_if_visible("button#onetrust-reject-all-handler") + sb.sleep(0.5) sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]') sb.sleep(1) sb.cdp.click('b:contains("Show Advanced Search")') sb.sleep(1) sb.cdp.click('span[data-type="type"][data-value="electric"]') + sb.sleep(0.5) sb.cdp.click("a#advSearch") sb.sleep(1) sb.cdp.click('img[src*="img/pokedex/detail/025.png"]') @@ -99,7 +130,7 @@ from seleniumbase import SB with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.hyatt.com/" sb.activate_cdp_mode(url) - sb.sleep(1) + sb.sleep(1.5) sb.cdp.click_if_visible('button[aria-label="Close"]') sb.sleep(0.5) sb.cdp.click('span:contains("Explore")') @@ -188,10 +219,14 @@ with SB(uc=True, test=True, locale_code="en") as sb: ```python sb.cdp.get(url) -sb.cdp.reload() +sb.cdp.open(url) +sb.cdp.reload(ignore_cache=True, script_to_evaluate_on_load=None) sb.cdp.refresh() +sb.cdp.get_event_loop() sb.cdp.add_handler(event, handler) sb.cdp.find_element(selector) +sb.cdp.find(selector) +sb.cdp.locator(selector) sb.cdp.find_all(selector) sb.cdp.find_elements_by_text(text, tag_name=None) sb.cdp.select(selector) @@ -205,6 +240,7 @@ sb.cdp.load_cookies(*args, **kwargs) sb.cdp.clear_cookies(*args, **kwargs) sb.cdp.sleep(seconds) sb.cdp.bring_active_window_to_front() +sb.cdp.bring_to_front() sb.cdp.get_active_element() sb.cdp.get_active_element_css() sb.cdp.click(selector) @@ -231,7 +267,7 @@ sb.cdp.medimize() sb.cdp.set_window_rect() sb.cdp.reset_window_size() sb.cdp.get_window() -sb.cdp.get_text() +sb.cdp.get_text(selector) sb.cdp.get_title() sb.cdp.get_current_url() sb.cdp.get_origin() From 2d7a1844c339deabe0b58883b69979f9aeb4ab99 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Wed, 23 Oct 2024 23:38:46 -0400 Subject: [PATCH 4/4] Version 4.32.1 --- seleniumbase/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index 73cd0a1e3e6..d2c74c338d1 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.32.0" +__version__ = "4.32.1"