Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added functionality to locate elements based on their tagname, attribute, and value. #32

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 240 additions & 0 deletions zendriver/core/tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,47 @@ async def open_external_inspector(self):
import webbrowser

webbrowser.open(self.inspector_url)

async def locate(
self,
tagname: str,
attribute: str,
value: str,
timeout: Union[int, float] = 10,
) -> Element:
"""
locate a single element by tagname, attribute, and value
can also be used to wait for such element to appear.

:param tagname: tagname of the element to search for
:type tagname: str
:param attribute: the attribute we will be filtering the element by
:type attribute: str
:param value: the value we will be checking the attribute to narrow our list of elements as much as possible
:type value: str

:param timeout: raise timeout exception when after this many seconds nothing is found.
:type timeout: float,int
"""
loop = asyncio.get_running_loop()
start_time = loop.time()

tagname, attribute, value = tagname.strip().upper(), attribute.strip(), value.strip()

item = await self.locate_element_by_tagname_attribute_value(
tagname, attribute, value
)
while not item:
await self.wait()
item = await self.locate_element_by_tagname_attribute_value(
tagname, attribute, value
)
if loop.time() - start_time > timeout:
raise asyncio.TimeoutError(
f"time ran out while waiting for element: {tagname}[{attribute}=\"{value}\"]"
)
await self.sleep(0.5)
return item

async def find(
self,
Expand Down Expand Up @@ -254,6 +295,52 @@ async def select(
await self.sleep(0.5)
return item

async def locate_all(
self,
tagname: str,
attribute: str,
value: str,
timeout: Union[int, float] = 10,
) -> List[Element]:
"""
locate multiple elements by tagname, attribute, and value
can also be used to wait for such element to appear.

:param tagname: tagname of the element to search for
:type tagname: str
:param attribute: the attribute we will be filtering the element by
:type attribute: str
:param value: the value we will be checking the attribute to narrow our list of elements as much as possible
:type value: str

:param timeout: raise timeout exception when after this many seconds nothing is found.
:type timeout: float,int
"""
loop = asyncio.get_running_loop()
now = loop.time()

tagname, attribute, value = tagname.strip().upper(), attribute.strip(), value.strip()

items = await self.locate_elements_by_tagname_attribute_value(
tagname = tagname,
attribute = attribute,
value = value
)

while not items:
await self.wait()
items = await self.locate_elements_by_tagname_attribute_value(
tagname = tagname,
attribute = attribute,
value = value
)
if loop.time() - now > timeout:
raise asyncio.TimeoutError(
f"time ran out while waiting for elements: {tagname}[{attribute}=\"{value}\"]"
)
await self.sleep(0.5)
return items

async def find_all(
self,
text: str,
Expand Down Expand Up @@ -462,6 +549,159 @@ async def query_selector(
return
return element.create(node, self, doc)

async def locate_element_by_tagname_attribute_value(
self,
tagname: str,
attribute: str,
value: str,
) -> Element | None:
"""
locates and returns the first element containing <text>, or best match

:param tagname: The name of the HTML tag to search for (e.g., 'button', 'input'..).
:type tagname: str
:param attribute: The attribute to match (e.g., 'id', 'name'..).
:type attribute: str
:param value: The value of the attribute to match.
:type value: str

:return: A single element
:rtype: Element
"""
async def traverse(node, parent_tree):
"""
recursive traversal of the DOM and shadow DOM to find out targeted element.
"""
if not node:
return None

# check if the node matches the tag and attribute criteria
if (
node.node_type == 1 # element node
and node.node_name.lower() == tagname.lower()
and node.attributes
and attribute in node.attributes
and any(
node.attributes[i] == attribute and node.attributes[i + 1] == value
for i in range(0, len(node.attributes), 2)
)
):
return element.create(node, self, parent_tree)

tasks = list()

# traverse shadow roots if they exist
if node.shadow_roots:
tasks.extend(traverse(shadow_root, parent_tree) for shadow_root in node.shadow_roots)

# traverse child nodes
if node.children:
tasks.extend(traverse(child, parent_tree) for child in node.children)

for task in asyncio.as_completed(tasks):
result = await task
if result:
return result

return None

# fetch the document root
doc = await self.send(cdp.dom.get_document(depth=-1, pierce=True))

# start traversing the DOM tree
result = await traverse(doc, doc)
if result:
return result

# search within iframes concurrently
iframes = util.filter_recurse_all(doc, lambda node: node.node_name == "IFRAME")
iframe_tasks = [
traverse(iframe.content_document, iframe.content_document)
for iframe in iframes
if iframe.content_document
]

for iframe_task in asyncio.as_completed(iframe_tasks):
result = await iframe_task
if result:
return result

return None

async def locate_elements_by_tagname_attribute_value(
self,
tagname: str,
attribute: str,
value: str,
) -> list[Element]:
"""
locates and returns all elements with the specified tagname, attribute, and value.

:param tagname: The name of the HTML tag to search for (e.g., 'button', 'input'..).
:type tagname: str
:param attribute: The attribute to match (e.g., 'id', 'name'..).
:type attribute: str
:param value: The value of the attribute to match.
:type value: str

:return: List of matching elements.
:rtype: list[Element]
"""
results = []

async def traverse(node, parent_tree):
"""
recursive traversal of the DOM, including shadow DOM and iframes, to collect all matching elements.
"""
if not node:
return

# Check if the node matches the tag and attribute criteria
if (
node.node_type == 1 # element node
and node.node_name.lower() == tagname.lower()
and node.attributes
and attribute in node.attributes
and any(
node.attributes[i] == attribute and value in node.attributes[i + 1].split() # searches inside the attributes of the node and checks whether our targeted attribute contains our targeted value, this would also work if we have a Div element with the attribute Class equaling "Class1 Class2" and we're only targeting the value Class1
for i in range(0, len(node.attributes), 2)
)
): # if we find a match element, we append it to our list of results
results.append(element.create(node, self, parent_tree))

tasks = list()

# traverse shadow roots if present
if node.shadow_roots:
tasks.extend(traverse(shadow_root, parent_tree) for shadow_root in node.shadow_roots)

# traverse child nodes
if node.children:
tasks.extend(traverse(child, parent_tree) for child in node.children)

# process all tasks concurrently
if tasks:
await asyncio.gather(*tasks)

# fetch the document root
doc = await self.send(cdp.dom.get_document(depth=-1, pierce=True))

# start traversing the main document
await traverse(doc, doc)

# search within iframes concurrently
iframes = util.filter_recurse_all(doc, lambda node: node.node_name == "IFRAME")
iframe_tasks = [
traverse(iframe.content_document, iframe.content_document)
for iframe in iframes
if iframe.content_document
]

if iframe_tasks:
await asyncio.gather(*iframe_tasks)

return results

async def find_elements_by_text(
self,
text: str,
Expand Down