Skip to content

Commit

Permalink
Merge pull request #453 from pzaino/develop
Browse files Browse the repository at this point in the history
More improvements for the config schema. Added data format parsing, more descriptions and examples
  • Loading branch information
pzaino authored Sep 30, 2024
2 parents 000d310 + 7709e20 commit 2b2c0e0
Show file tree
Hide file tree
Showing 6 changed files with 1,365 additions and 1,049 deletions.
283 changes: 166 additions & 117 deletions doc/features.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion rules/AcceptCookies-ruleset.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"selector": "//button[contains(text(), 'Accept') or contains(text(), 'Akzeptieren') or contains(text(), 'Aceptar') or contains(text(), 'Accepter') or contains(text(), 'Accetta')]"
},
{
"selector_type": "class",
"selector_type": "class_name",
"selector": "disclaimerOK"
},
{
Expand Down
1,703 changes: 921 additions & 782 deletions schemas/crowler-config-schema.json

Large diffs are not rendered by default.

248 changes: 184 additions & 64 deletions schemas/crowler-config-schema.yaml

Large diffs are not rendered by default.

102 changes: 53 additions & 49 deletions schemas/ruleset-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,10 @@
"xpath",
"id",
"class_name",
"class",
"name",
"tag_name",
"element",
"link_text",
"partial_link_text",
"regex",
Expand Down Expand Up @@ -243,6 +245,55 @@
"type": "string",
"description": "A unique name identifying the action rule."
},
"url": {
"type": "string",
"format": "uri",
"description": "Optional. The specific URL to which this action applies or the URL to navigate to, applicable for navigate action. Do not use this field for 'navigate_to_url' action type, use instead the value field to specify the url to go to, url field is only to match the rule."
},
"wait_conditions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"condition_type": {
"type": "string",
"enum": [
"element_presence",
"element_visible",
"plugin_call",
"delay"
]
},
"value": {
"type": "string",
"description": "a generic value to use with the condition, e.g., a delay in seconds, applicable for delay condition type. For delay type you can also use the CROWler exprterpreter to generate delay values at runtime, e.g., 'random(1, 3)' or 'random(random(1,3), random(5,8))'."
},
"selector": {
"type": "string",
"description": "The CSS selector for the element, applicable for element_presence and element_visible conditions. If you're using plugin_call, then this field is used for the plugin name."
}
}
},
"description": "Conditions to wait for, that must be met before the action is executed. These conditions are designed to ensure that the page or elements are ready (e.g., waiting for an element to appear, or a delay). Do not use this field to wait after an action is performed, as it only applies before the action is executed."
},
"conditions": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"element",
"language",
"plugin_call"
]
},
"selector": {
"type": "string",
"description": "The CSS selector to check if a given element exists, applicable for 'element'. The language id to check if a page is in a certain language, applicable for 'language'. The plugin's name if you're using plugin_call."
}
},
"description": "Conditions that must be met for the action to be executed. For example, you can check if a certain element exists on the page before performing an action. See this as something to do after we waited for the wait_conditions and we verify that the page is ready to perform the action."
},
"action_type": {
"type": "string",
"enum": [
Expand Down Expand Up @@ -287,8 +338,10 @@
"xpath",
"id",
"class_name",
"class",
"name",
"tag_name",
"element",
"link_text",
"partial_link_text",
"plugin_call"
Expand Down Expand Up @@ -329,55 +382,6 @@
"type": "string",
"description": "The value to use with the action, e.g., text to input, applicable for input_text."
},
"url": {
"type": "string",
"format": "uri",
"description": "Optional. The specific URL to which this action applies or the URL to navigate to, applicable for navigate action. Do not use this field for 'navigate_to_url' action type, use instead the value field to specify the url to go to, url field is only to match the rule."
},
"wait_conditions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"condition_type": {
"type": "string",
"enum": [
"element_presence",
"element_visible",
"plugin_call",
"delay"
]
},
"value": {
"type": "string",
"description": "a generic value to use with the condition, e.g., a delay in seconds, applicable for delay condition type. For delay type you can also use the CROWler exprterpreter to generate delay values at runtime, e.g., 'random(1, 3)' or 'random(random(1,3), random(5,8))'."
},
"selector": {
"type": "string",
"description": "The CSS selector for the element, applicable for element_presence and element_visible conditions. If you're using plugin_call, then this field is used for the plugin name."
}
}
},
"description": "Conditions to wait before being able to perform the action. This to ensure page readiness."
},
"conditions": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"element",
"language",
"plugin_call"
]
},
"selector": {
"type": "string",
"description": "The CSS selector to check if a given element exists, applicable for 'element'. The language id to check if a page is in a certain language, applicable for 'language'. The plugin's name if you're using plugin_call."
}
},
"description": "Conditions that must be met for the action to be executed."
},
"error_handling": {
"type": "object",
"properties": {
Expand Down
76 changes: 40 additions & 36 deletions schemas/ruleset-schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,10 @@ items:
- "xpath"
- "id"
- "class_name"
- "class"
- "name"
- "tag_name"
- "element"
- "link_text"
- "partial_link_text"
- "regex"
Expand Down Expand Up @@ -186,6 +188,42 @@ items:
rule_name:
type: "string"
description: "A unique name identifying the action rule."
url:
type: "string"
format: "uri"
description: "Optional. The specific URL to which this action applies or the URL to navigate to, applicable for navigate action. Do not use this field for 'navigate_to_url' action type, use instead the value field to specify the url to go to, url field is only to match the rule."
wait_conditions:
type: "array"
items:
type: "object"
properties:
condition_type:
type: "string"
enum:
- "element_presence"
- "element_visible"
- "plugin_call"
- "delay"
value:
type: "string"
description: "a generic value to use with the condition, e.g., a delay in seconds, applicable for delay condition type. For delay type you can also use the CROWler exprterpreter to generate delay values at runtime, e.g., 'random(1, 3)' or 'random(random(1,3), random(5,8))'."
selector:
type: "string"
description: "The CSS selector for the element, applicable for element_presence and element_visible conditions. If you're using plugin_call, then this field is used for the plugin name."
description: "Conditions to wait for, that must be met before the action is executed. These conditions are designed to ensure that the page or elements are ready (e.g., waiting for an element to appear, or a delay). Do not use this field to wait after an action is performed, as it only applies before the action is executed."
conditions:
type: "object"
properties:
type:
type: "string"
enum:
- "element"
- "language"
- "plugin_call"
selector:
type: "string"
description: "The CSS selector to check if a given element exists, applicable for 'element'. The language id to check if a page is in a certain language, applicable for 'language'. The plugin's name if you're using plugin_call."
description: "Conditions that must be met for the action to be executed. For example, you can check if a certain element exists on the page before performing an action. See this as something to do after we waited for the wait_conditions and we verify that the page is ready to perform the action."
action_type:
type: "string"
enum:
Expand Down Expand Up @@ -228,8 +266,10 @@ items:
- "xpath"
- "id"
- "class_name"
- "class"
- "name"
- "tag_name"
- "element"
- "link_text"
- "partial_link_text"
- "plugin_call"
Expand Down Expand Up @@ -257,42 +297,6 @@ items:
value:
type: "string"
description: "The value to use with the action, e.g., text to input, applicable for input_text."
url:
type: "string"
format: "uri"
description: "Optional. The specific URL to which this action applies or the URL to navigate to, applicable for navigate action. Do not use this field for 'navigate_to_url' action type, use instead the value field to specify the url to go to, url field is only to match the rule."
wait_conditions:
type: "array"
items:
type: "object"
properties:
condition_type:
type: "string"
enum:
- "element_presence"
- "element_visible"
- "plugin_call"
- "delay"
value:
type: "string"
description: "a generic value to use with the condition, e.g., a delay in seconds, applicable for delay condition type. For delay type you can also use the CROWler exprterpreter to generate delay values at runtime, e.g., 'random(1, 3)' or 'random(random(1,3), random(5,8))'."
selector:
type: "string"
description: "The CSS selector for the element, applicable for element_presence and element_visible conditions. If you're using plugin_call, then this field is used for the plugin name."
description: "Conditions to wait before being able to perform the action. This to ensure page readiness."
conditions:
type: "object"
properties:
type:
type: "string"
enum:
- "element"
- "language"
- "plugin_call"
selector:
type: "string"
description: "The CSS selector to check if a given element exists, applicable for 'element'. The language id to check if a page is in a certain language, applicable for 'language'. The plugin's name if you're using plugin_call."
description: "Conditions that must be met for the action to be executed."
error_handling:
type: "object"
properties:
Expand Down

0 comments on commit 2b2c0e0

Please sign in to comment.