Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

web/deprecate-lxml #926

Merged
merged 6 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
11.16.22 (unreleased)
=====================

General
-------

- Replace ``lxml`` dependency with ``BeautifulSoup`` for submission/login html error parsing [#926]


11.16.21 (2023-03-09)
=====================

Expand Down
75 changes: 35 additions & 40 deletions crds/submit/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import os
import io

from bs4 import BeautifulSoup
from crds.core import log, utils
from crds.core.exceptions import CrdsError, CrdsWebError
from . import background
Expand All @@ -15,11 +15,6 @@
except (ImportError, RuntimeError):
log.verbose_warning("Import of 'requests' failed. submit disabled.")
DISABLED.append("requests")
try:
from lxml import html
except (ImportError, RuntimeError):
log.verbose_warning("Import of 'lxml' failed. submit disabled.")
DISABLED.append("lxml")

# ==================================================================================================

Expand Down Expand Up @@ -115,11 +110,9 @@ def repost_start(self, relative_url, *post_dicts, **post_vars):
the resulting thread and queue.
"""
response = self.get(relative_url)
csrf_values= html.fromstring(response.text).xpath(
'//input[@name="csrfmiddlewaretoken"]/@value'
)
if csrf_values:
post_vars['csrfmiddlewaretoken'] = csrf_values[0]
csrf = response.cookies['csrftoken']
if csrf:
post_vars['csrfmiddlewaretoken'] = csrf
return self.post_start(relative_url, *post_dicts, **post_vars)

def repost_confirm_or_cancel(self, ready_url, action="confirm"):
Expand Down Expand Up @@ -164,45 +157,47 @@ def login(self, next="/"):

def check_error(self, response):
"""Note an error + exception if response contains an error_message <div>."""
self._check_error(response, '//div[@id="error_message"]', "CRDS server error:")
self._check_error(response, '//div[@class="error_message"]', "CRDS server new form error:")
self._check_error(response, parse={"div":"id.error_message"}, error_prefix="CRDS server error: ")
self._check_error(response, parse={"div":"class.error_message"}, error_prefix="CRDS server new form error: ")

def check_login(self, response):
"""Note an error + exception if response contains content indicating login error."""
self._check_error(
response, '//div[@id="error_login"]',
"Error logging into CRDS server:")
self._check_error(
response, '//div[@id="error_message"]',
"Error logging into CRDS server:")
self._check_error(
response, '//title[contains(text(), "MyST SSO Portal")]',
"Error logging into CRDS server:")

def _check_error(self, response, xpath_spec, error_prefix):
"""Extract the `xpath_spec` text from `response`, if present issue a
log ERROR with `error_prefix` and the response `xpath_spec` text
then raise an exception. This may result in multiple ERROR messages.

Issue a log ERROR for each form error, then raise an exception
if any errors found.

returns None
response,
parse={
"title":"title.",
"div":"id.error_login",
"div":"id.error_message",
},
error_prefix="Error logging into the CRDS Server: "
)

def _check_error(self, response, parse={"div":"id.error_message"}, error_prefix=""):
"""Parse error-related html text from `response`. If present issue a
log ERROR with `error_prefix` and the response text, then raise an exception.
This may result in multiple ERROR messages.

Issue a log ERROR for each form error, then raise an exception if any errors found.
"""
errors = 0
if response.ok:
error_msg_parse = html.fromstring(response.text).xpath(xpath_spec)
for parse in error_msg_parse:
error_message = parse.text.strip().replace("\n","")
if error_message:
if error_message.startswith("ERROR: "):
error_message = error_message[len("ERROR: ")]
errors += 1
log.error(error_prefix, error_message)
soup = BeautifulSoup(response.text, 'html.parser')
for element, attrs in parse.items():
k, v = attrs.split(".")
try:
msg = soup.find(element, attrs={k:v})
err_msg = msg.string if msg.string is not None else msg.findChild().string
except AttributeError:
err_msg = None
if err_msg is not None:
if k == "title" and err_msg != "MyST SSO Portal":
continue
else:
errors += 1
log.error(error_prefix, err_msg)
else:
log.error("CRDS server responded with HTTP error status", response.status_code)
errors += 1

if errors:
raise CrdsWebError("A web transaction with the CRDS server had errors.")

Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ dependencies = [
"filelock",
"asdf",
"requests",
"lxml",
"parsley",
]
dynamic = ["version"]
Expand Down