Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: show math in plain text in library cards #36055

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from rest_framework.exceptions import NotFound

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content.search.plain_text_math import process_mathjax
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
Expand Down Expand Up @@ -220,7 +221,7 @@ class implementation returns only:
# Generate description from the content
description = _get_description_from_block_content(block_type, content_data)
if description:
block_data[Fields.description] = description
block_data[Fields.description] = process_mathjax(description)

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
Expand Down
161 changes: 161 additions & 0 deletions openedx/core/djangoapps/content/search/plain_text_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Helper class to convert mathjax equations to plain text.
"""

import re

import unicodeit


class InvalidMathEquation(Exception):
"""Raised when mathjax equation is invalid. This is used to skip all transformations."""


class EqnPatternNotFound(Exception):
"""Raised when a pattern is not found in equation. This is used to skip a specific transformation."""


class PlainTextMath:
"""
Converts mathjax equations to plain text using unicodeit and some preprocessing.
"""
equation_pattern = re.compile(
r'\[mathjaxinline\](.*?)\[\/mathjaxinline\]|\[mathjax\](.*?)\[\/mathjax\]|\\\((.*?)\\\)|\\\[(.*?)\\\]'
)
eqn_replacements = (
# just remove prefix `\`
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
("\\sin", "sin"),
("\\cos", "cos"),
("\\tan", "tan"),
("\\arcsin", "arcsin"),
("\\arccos", "arccos"),
("\\arctan", "arctan"),
("\\cot", "cot"),
("\\sec", "sec"),
("\\csc", "csc"),
# Is used for matching brackets in mathjax, should not be required in plain text.
("\\left", ""),
("\\right", ""),
)
regex_replacements = (
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
# Makes text bold, so not required in plain text.
(re.compile(r'{\\bf (.*?)}'), r"\1"),
)
extract_inner_texts = (
# Replaces any eqn: `\name{inner_text}` with `inner_text`
"\\mathbf{",
"\\bm{",
)
frac_open_close_pattern = re.compile(r"}\s*{")

@staticmethod
def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str:
r"""
Matches opening and closing brackets in given string.

Args:
equation: string
opening_pattern: for example, `\mathbf{`

Returns:
String inside the eqn brackets
"""
start = equation.find(opening_pattern)
if start == -1:
raise EqnPatternNotFound()
open_count = 0
inner_start = start + len(opening_pattern)
for i, char in enumerate(equation[inner_start:]):
if char == "{":
open_count += 1
if char == "}":
if open_count == 0:
break
open_count -= 1
else:
raise InvalidMathEquation()
# In below example `|` symbol is used to denote index position
# |\mathbf{, \mathbf{|, \mathbf{some_text|}, \mathbf{some_text}|
return (start, inner_start, inner_start + i, inner_start + i + 1)

def _fraction_handler(self, equation: str) -> str:
r"""
Converts `\frac{x}{y}` to `(x/y)` while handling nested `{}`.

For example: `\frac{2}{\sqrt{1+y}}` is converted to `(2/\sqrt{1+y})`.

Args:
equation: string

Returns:
String with `\frac` replaced by normal `/` symbol.
"""
try:
n_start, n_inner_start, n_inner_end, n_end = self._nested_bracket_matcher(equation, "\\frac{")
except EqnPatternNotFound:
return equation

numerator = equation[n_inner_start:n_inner_end]
# Handle nested fractions
numerator = self._fraction_handler(numerator)

try:
_, d_inner_start, d_inner_end, d_end = self._nested_bracket_matcher(equation[n_end:], "{")
except EqnPatternNotFound:
return equation

denominator = equation[n_end + d_inner_start:n_end + d_inner_end]
# Handle nested fractions
denominator = self._fraction_handler(denominator)
# Now re-create the equation with `(numerator / denominator)`
equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:]
return equation

def _nested_text_extractor(self, equation: str, pattern: str) -> str:
"""
Recursively extracts text from equation for given pattern
"""
try:
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
inner_text = equation[inner_start:inner_end]
inner_text = self._nested_text_extractor(inner_text, pattern)
equation = equation[:start] + inner_text + equation[end:]
except EqnPatternNotFound:
pass
return equation

def _handle_replacements(self, equation: str) -> str:
"""
Makes a bunch of replacements in equation string.
"""
for q, replacement in self.eqn_replacements:
equation = equation.replace(q, replacement)
for pattern in self.extract_inner_texts:
equation = self._nested_text_extractor(equation, pattern)
for pattern, replacement in self.regex_replacements:
equation = re.sub(pattern, replacement, equation)
return equation

def run(self, eqn_matches: re.Match) -> str:
"""
Takes re.Match object and runs conversion process on each match group.
"""
groups = eqn_matches.groups()
for group in groups:
if not group:
continue
original = group
try:
group = self._handle_replacements(group)
group = self._fraction_handler(group)
return unicodeit.replace(group)
except Exception: # pylint: disable=broad-except
return original
return None


processor = PlainTextMath()


def process_mathjax(content: str) -> str:
return re.sub(processor.equation_pattern, processor.run, content)
118 changes: 118 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,3 +477,121 @@ def test_collection_with_published_library(self):
"num_children": 1
}
}

def test_mathjax_plain_text_conversion_for_search(self):
"""
Test how an HTML block with mathjax equations gets converted to plain text in search description.
"""
# pylint: disable=line-too-long
eqns = [
# (input, expected output)
('Simple addition: \\( 2 + 3 \\)', 'Simple addition: 2 + 3'),
bradenmacdonald marked this conversation as resolved.
Show resolved Hide resolved
('Simple subtraction: \\( 5 - 2 \\)', 'Simple subtraction: 5 − 2'),
('Simple multiplication: \\( 4 * 6 \\)', 'Simple multiplication: 4 * 6'),
('Simple division: \\( 8 / 2 \\)', 'Simple division: 8 / 2'),
('Mixed arithmetic: \\( 2 + 3 4 \\)', 'Mixed arithmetic: 2 + 3 4'),
('Simple exponentiation: \\[ 2^3 \\]', 'Simple exponentiation: 2³'),
('Root extraction: \\[ 16^{1/2} \\]', 'Root extraction: 16¹^/²'),
('Exponent with multiple terms: \\[ (2 + 3)^2 \\]', 'Exponent with multiple terms: (2 + 3)²'),
('Nested exponents: \\[ 2^(3^2) \\]', 'Nested exponents: 2⁽3²)'),
('Mixed roots: \\[ 8^{1/2} 3^2 \\]', 'Mixed roots: 8¹^/² 3²'),
('Simple fraction: [mathjaxinline] 3/4 [/mathjaxinline]', 'Simple fraction: 3/4'),
(
'Decimal to fraction conversion: [mathjaxinline] 0.75 = 3/4 [/mathjaxinline]',
'Decimal to fraction conversion: 0.75 = 3/4',
),
('Mixed fractions: [mathjaxinline] 1 1/2 = 3/2 [/mathjaxinline]', 'Mixed fractions: 1 1/2 = 3/2'),
(
'Converting decimals to mixed fractions: [mathjaxinline] 2.5 = 5/2 [/mathjaxinline]',
'Converting decimals to mixed fractions: 2.5 = 5/2',
),
(
'Trig identities: [mathjaxinline] \\sin(x + y) = \\sin(x) \\cos(y) + \\cos(x) \\sin(y) [/mathjaxinline]',
'Trig identities: sin(x + y) = sin(x) cos(y) + cos(x) sin(y)',
),
(
'Sine, cosine, and tangent: [mathjaxinline] \\sin(x) [/mathjaxinline] [mathjaxinline] \\cos(x) [/mathjaxinline] [mathjaxinline] \\tan(x) [/mathjaxinline]',
'Sine, cosine, and tangent: sin(x) cos(x) tan(x)',
),
(
'Hyperbolic trig functions: [mathjaxinline] \\sinh(x) [/mathjaxinline] [mathjaxinline] \\cosh(x) [/mathjaxinline]',
'Hyperbolic trig functions: sinh(x) cosh(x)',
),
(
"Simple derivative: [mathjax] f(x) = x^2, f'(x) = 2x [/mathjax]",
"Simple derivative: f(x) = x², f'(x) = 2x",
),
('Double integral: [mathjax] int\\int (x + y) dxdy [/mathjax]', 'Double integral: int∫ (x + y) dxdy'),
(
'Partial derivatives: [mathjax] f(x,y) = xy, \\frac{\\partial f}{\\partial x} = y [/mathjax] [mathjax] \\frac{\\partial f}{\\partial y} = x [/mathjax]',
'Partial derivatives: f(x,y) = xy, (∂ f/∂ x) = y (∂ f/∂ y) = x',
),
(
'Mean and standard deviation: [mathjax] mu = 2, \\sigma = 1 [/mathjax]',
'Mean and standard deviation: mu = 2, σ = 1',
),
(
'Binomial probability: [mathjax] P(X = k) = (\\binom{n}{k} p^k (1-p)^{n-k}) [/mathjax]',
'Binomial probability: P(X = k) = (\\binom{n}{k} pᵏ (1−p)ⁿ⁻ᵏ)',
),
('Gaussian distribution: [mathjax] N(\\mu, \\sigma^2) [/mathjax]', 'Gaussian distribution: N(μ, σ²)'),
(
'Greek letters: [mathjaxinline] \\alpha [/mathjaxinline] [mathjaxinline] \\beta [/mathjaxinline] [mathjaxinline] \\gamma [/mathjaxinline]',
'Greek letters: α β γ',
),
(
'Subscripted variables: [mathjaxinline] x_i [/mathjaxinline] [mathjaxinline] y_j [/mathjaxinline]',
'Subscripted variables: xᵢ yⱼ',
),
('Superscripted variables: [mathjaxinline] x^{i} [/mathjaxinline]', 'Superscripted variables: xⁱ'),
(
'Not supported: \\( \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I \\)',
'Not supported: \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I',
),
(
'Bold text: \\( {\\bf a} \\cdot {\\bf b} = |{\\bf a}| |{\\bf b}| \\cos(\\theta) \\)',
'Bold text: a ⋅ b = |a| |b| cos(θ)',
),
('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'),
('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'),
(
'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)',
'Nested Bold text 2: a ⋅ (a × b)'
),
(
'Nested Bold text 3: \\( \\mathbf{a \\cdot (a \\bm{\\times} b)} \\)',
'Nested Bold text 3: a ⋅ (a × b)'
),
('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'),
('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'),
('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'),
('Fraction test 1: \\( \\frac{2} {3} \\)', 'Fraction test 1: (2/3)'),
('Fraction test 2: \\( \\frac{2}{3} \\)', 'Fraction test 2: (2/3)'),
('Fraction test 3: \\( \\frac{\\frac{2}{3}}{4} \\)', 'Fraction test 3: ((2/3)/4)'),
('Fraction test 4: \\( \\frac{\\frac{2} {3}}{4} \\)', 'Fraction test 4: ((2/3)/4)'),
('Fraction test 5: \\( \\frac{\\frac{2} {3}}{\\frac{4}{3}} \\)', 'Fraction test 5: ((2/3)/(4/3))'),
bradenmacdonald marked this conversation as resolved.
Show resolved Hide resolved
# Invalid equations.
('Fraction error: \\( \\frac{2} \\)', 'Fraction error: \\frac{2}'),
('Fraction error 2: \\( \\frac{\\frac{2}{3}{4} \\)', 'Fraction error 2: \\frac{\\frac{2}{3}{4}'),
('Unclosed: [mathjaxinline]x^2', 'Unclosed: [mathjaxinline]x^2'),
(
'Missing closing bracket: \\( \\frac{\\frac{2} {3}{\\frac{4}{3}} \\)',
'Missing closing bracket: \\frac{\\frac{2} {3}{\\frac{4}{3}}'
),
('No equation: normal text', 'No equation: normal text'),
]
# pylint: enable=line-too-long
block = BlockFactory.create(
parent_location=self.toy_course.location,
category="html",
display_name="Non-default HTML Block",
editor="raw",
use_latex_compiler=True,
data="|||".join(e[0] for e in eqns),
)
doc = {}
doc.update(searchable_doc_for_course_block(block))
doc.update(searchable_doc_tags(block.usage_key))
result = doc['description'].split('|||')
for i, eqn in enumerate(result):
assert eqn.strip() == eqns[i][1]
2 changes: 2 additions & 0 deletions requirements/edx/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/kernel.in
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/kernel.in
uritemplate==4.1.1
# via
# drf-spectacular
Expand Down
4 changes: 4 additions & 0 deletions requirements/edx/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2160,6 +2160,10 @@ unicodecsv==0.14.1
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-enterprise
unicodeit==0.7.5
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
unidiff==0.7.5
# via -r requirements/edx/testing.txt
uritemplate==4.1.1
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1520,6 +1520,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/base.txt
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/base.txt
uritemplate==4.1.1
# via
# -r requirements/edx/base.txt
Expand Down
1 change: 1 addition & 0 deletions requirements/edx/kernel.in
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,4 @@ web-fragments # Provides the ability to render fragments o
wrapt # Better functools.wrapped. TODO: functools has since improved, maybe we can switch?
XBlock[django] # Courseware component architecture
xss-utils # https://github.com/openedx/edx-platform/pull/20633 Fix XSS via Translations
unicodeit # Converts mathjax equation to plain text by using unicode symbols
2 changes: 2 additions & 0 deletions requirements/edx/testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/base.txt
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/base.txt
unidiff==0.7.5
# via -r requirements/edx/testing.in
uritemplate==4.1.1
Expand Down
Loading