feat: show math in plain text in library cards (#36055)
Converts mathjax equations to unicode to be rendered as plain text in library card previews
This commit is contained in:
@@ -14,6 +14,7 @@ from opaque_keys.edx.locator import LibraryLocatorV2
|
||||
from rest_framework.exceptions import NotFound
|
||||
|
||||
from openedx.core.djangoapps.content.search.models import SearchAccess
|
||||
from openedx.core.djangoapps.content.search.plain_text_math import process_mathjax
|
||||
from openedx.core.djangoapps.content_libraries import api as lib_api
|
||||
from openedx.core.djangoapps.content_tagging import api as tagging_api
|
||||
from openedx.core.djangoapps.xblock import api as xblock_api
|
||||
@@ -220,7 +221,7 @@ def _fields_from_block(block) -> dict:
|
||||
# Generate description from the content
|
||||
description = _get_description_from_block_content(block_type, content_data)
|
||||
if description:
|
||||
block_data[Fields.description] = description
|
||||
block_data[Fields.description] = process_mathjax(description)
|
||||
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
|
||||
|
||||
161
openedx/core/djangoapps/content/search/plain_text_math.py
Normal file
161
openedx/core/djangoapps/content/search/plain_text_math.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Helper class to convert mathjax equations to plain text.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import unicodeit
|
||||
|
||||
|
||||
class InvalidMathEquation(Exception):
|
||||
"""Raised when mathjax equation is invalid. This is used to skip all transformations."""
|
||||
|
||||
|
||||
class EqnPatternNotFound(Exception):
|
||||
"""Raised when a pattern is not found in equation. This is used to skip a specific transformation."""
|
||||
|
||||
|
||||
class PlainTextMath:
|
||||
"""
|
||||
Converts mathjax equations to plain text using unicodeit and some preprocessing.
|
||||
"""
|
||||
equation_pattern = re.compile(
|
||||
r'\[mathjaxinline\](.*?)\[\/mathjaxinline\]|\[mathjax\](.*?)\[\/mathjax\]|\\\((.*?)\\\)|\\\[(.*?)\\\]'
|
||||
)
|
||||
eqn_replacements = (
|
||||
# just remove prefix `\`
|
||||
("\\sin", "sin"),
|
||||
("\\cos", "cos"),
|
||||
("\\tan", "tan"),
|
||||
("\\arcsin", "arcsin"),
|
||||
("\\arccos", "arccos"),
|
||||
("\\arctan", "arctan"),
|
||||
("\\cot", "cot"),
|
||||
("\\sec", "sec"),
|
||||
("\\csc", "csc"),
|
||||
# Is used for matching brackets in mathjax, should not be required in plain text.
|
||||
("\\left", ""),
|
||||
("\\right", ""),
|
||||
)
|
||||
regex_replacements = (
|
||||
# Makes text bold, so not required in plain text.
|
||||
(re.compile(r'{\\bf (.*?)}'), r"\1"),
|
||||
)
|
||||
extract_inner_texts = (
|
||||
# Replaces any eqn: `\name{inner_text}` with `inner_text`
|
||||
"\\mathbf{",
|
||||
"\\bm{",
|
||||
)
|
||||
frac_open_close_pattern = re.compile(r"}\s*{")
|
||||
|
||||
@staticmethod
|
||||
def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str:
|
||||
r"""
|
||||
Matches opening and closing brackets in given string.
|
||||
|
||||
Args:
|
||||
equation: string
|
||||
opening_pattern: for example, `\mathbf{`
|
||||
|
||||
Returns:
|
||||
String inside the eqn brackets
|
||||
"""
|
||||
start = equation.find(opening_pattern)
|
||||
if start == -1:
|
||||
raise EqnPatternNotFound()
|
||||
open_count = 0
|
||||
inner_start = start + len(opening_pattern)
|
||||
for i, char in enumerate(equation[inner_start:]):
|
||||
if char == "{":
|
||||
open_count += 1
|
||||
if char == "}":
|
||||
if open_count == 0:
|
||||
break
|
||||
open_count -= 1
|
||||
else:
|
||||
raise InvalidMathEquation()
|
||||
# In below example `|` symbol is used to denote index position
|
||||
# |\mathbf{, \mathbf{|, \mathbf{some_text|}, \mathbf{some_text}|
|
||||
return (start, inner_start, inner_start + i, inner_start + i + 1)
|
||||
|
||||
def _fraction_handler(self, equation: str) -> str:
|
||||
r"""
|
||||
Converts `\frac{x}{y}` to `(x/y)` while handling nested `{}`.
|
||||
|
||||
For example: `\frac{2}{\sqrt{1+y}}` is converted to `(2/\sqrt{1+y})`.
|
||||
|
||||
Args:
|
||||
equation: string
|
||||
|
||||
Returns:
|
||||
String with `\frac` replaced by normal `/` symbol.
|
||||
"""
|
||||
try:
|
||||
n_start, n_inner_start, n_inner_end, n_end = self._nested_bracket_matcher(equation, "\\frac{")
|
||||
except EqnPatternNotFound:
|
||||
return equation
|
||||
|
||||
numerator = equation[n_inner_start:n_inner_end]
|
||||
# Handle nested fractions
|
||||
numerator = self._fraction_handler(numerator)
|
||||
|
||||
try:
|
||||
_, d_inner_start, d_inner_end, d_end = self._nested_bracket_matcher(equation[n_end:], "{")
|
||||
except EqnPatternNotFound:
|
||||
return equation
|
||||
|
||||
denominator = equation[n_end + d_inner_start:n_end + d_inner_end]
|
||||
# Handle nested fractions
|
||||
denominator = self._fraction_handler(denominator)
|
||||
# Now re-create the equation with `(numerator / denominator)`
|
||||
equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:]
|
||||
return equation
|
||||
|
||||
def _nested_text_extractor(self, equation: str, pattern: str) -> str:
|
||||
"""
|
||||
Recursively extracts text from equation for given pattern
|
||||
"""
|
||||
try:
|
||||
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
|
||||
inner_text = equation[inner_start:inner_end]
|
||||
inner_text = self._nested_text_extractor(inner_text, pattern)
|
||||
equation = equation[:start] + inner_text + equation[end:]
|
||||
except EqnPatternNotFound:
|
||||
pass
|
||||
return equation
|
||||
|
||||
def _handle_replacements(self, equation: str) -> str:
|
||||
"""
|
||||
Makes a bunch of replacements in equation string.
|
||||
"""
|
||||
for q, replacement in self.eqn_replacements:
|
||||
equation = equation.replace(q, replacement)
|
||||
for pattern in self.extract_inner_texts:
|
||||
equation = self._nested_text_extractor(equation, pattern)
|
||||
for pattern, replacement in self.regex_replacements:
|
||||
equation = re.sub(pattern, replacement, equation)
|
||||
return equation
|
||||
|
||||
def run(self, eqn_matches: re.Match) -> str:
|
||||
"""
|
||||
Takes re.Match object and runs conversion process on each match group.
|
||||
"""
|
||||
groups = eqn_matches.groups()
|
||||
for group in groups:
|
||||
if not group:
|
||||
continue
|
||||
original = group
|
||||
try:
|
||||
group = self._handle_replacements(group)
|
||||
group = self._fraction_handler(group)
|
||||
return unicodeit.replace(group)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
return original
|
||||
return None
|
||||
|
||||
|
||||
processor = PlainTextMath()
|
||||
|
||||
|
||||
def process_mathjax(content: str) -> str:
|
||||
return re.sub(processor.equation_pattern, processor.run, content)
|
||||
@@ -477,3 +477,121 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
|
||||
"num_children": 1
|
||||
}
|
||||
}
|
||||
|
||||
def test_mathjax_plain_text_conversion_for_search(self):
|
||||
"""
|
||||
Test how an HTML block with mathjax equations gets converted to plain text in search description.
|
||||
"""
|
||||
# pylint: disable=line-too-long
|
||||
eqns = [
|
||||
# (input, expected output)
|
||||
('Simple addition: \\( 2 + 3 \\)', 'Simple addition: 2 + 3'),
|
||||
('Simple subtraction: \\( 5 - 2 \\)', 'Simple subtraction: 5 − 2'),
|
||||
('Simple multiplication: \\( 4 * 6 \\)', 'Simple multiplication: 4 * 6'),
|
||||
('Simple division: \\( 8 / 2 \\)', 'Simple division: 8 / 2'),
|
||||
('Mixed arithmetic: \\( 2 + 3 4 \\)', 'Mixed arithmetic: 2 + 3 4'),
|
||||
('Simple exponentiation: \\[ 2^3 \\]', 'Simple exponentiation: 2³'),
|
||||
('Root extraction: \\[ 16^{1/2} \\]', 'Root extraction: 16¹^/²'),
|
||||
('Exponent with multiple terms: \\[ (2 + 3)^2 \\]', 'Exponent with multiple terms: (2 + 3)²'),
|
||||
('Nested exponents: \\[ 2^(3^2) \\]', 'Nested exponents: 2⁽3²)'),
|
||||
('Mixed roots: \\[ 8^{1/2} 3^2 \\]', 'Mixed roots: 8¹^/² 3²'),
|
||||
('Simple fraction: [mathjaxinline] 3/4 [/mathjaxinline]', 'Simple fraction: 3/4'),
|
||||
(
|
||||
'Decimal to fraction conversion: [mathjaxinline] 0.75 = 3/4 [/mathjaxinline]',
|
||||
'Decimal to fraction conversion: 0.75 = 3/4',
|
||||
),
|
||||
('Mixed fractions: [mathjaxinline] 1 1/2 = 3/2 [/mathjaxinline]', 'Mixed fractions: 1 1/2 = 3/2'),
|
||||
(
|
||||
'Converting decimals to mixed fractions: [mathjaxinline] 2.5 = 5/2 [/mathjaxinline]',
|
||||
'Converting decimals to mixed fractions: 2.5 = 5/2',
|
||||
),
|
||||
(
|
||||
'Trig identities: [mathjaxinline] \\sin(x + y) = \\sin(x) \\cos(y) + \\cos(x) \\sin(y) [/mathjaxinline]',
|
||||
'Trig identities: sin(x + y) = sin(x) cos(y) + cos(x) sin(y)',
|
||||
),
|
||||
(
|
||||
'Sine, cosine, and tangent: [mathjaxinline] \\sin(x) [/mathjaxinline] [mathjaxinline] \\cos(x) [/mathjaxinline] [mathjaxinline] \\tan(x) [/mathjaxinline]',
|
||||
'Sine, cosine, and tangent: sin(x) cos(x) tan(x)',
|
||||
),
|
||||
(
|
||||
'Hyperbolic trig functions: [mathjaxinline] \\sinh(x) [/mathjaxinline] [mathjaxinline] \\cosh(x) [/mathjaxinline]',
|
||||
'Hyperbolic trig functions: sinh(x) cosh(x)',
|
||||
),
|
||||
(
|
||||
"Simple derivative: [mathjax] f(x) = x^2, f'(x) = 2x [/mathjax]",
|
||||
"Simple derivative: f(x) = x², f'(x) = 2x",
|
||||
),
|
||||
('Double integral: [mathjax] int\\int (x + y) dxdy [/mathjax]', 'Double integral: int∫ (x + y) dxdy'),
|
||||
(
|
||||
'Partial derivatives: [mathjax] f(x,y) = xy, \\frac{\\partial f}{\\partial x} = y [/mathjax] [mathjax] \\frac{\\partial f}{\\partial y} = x [/mathjax]',
|
||||
'Partial derivatives: f(x,y) = xy, (∂ f/∂ x) = y (∂ f/∂ y) = x',
|
||||
),
|
||||
(
|
||||
'Mean and standard deviation: [mathjax] mu = 2, \\sigma = 1 [/mathjax]',
|
||||
'Mean and standard deviation: mu = 2, σ = 1',
|
||||
),
|
||||
(
|
||||
'Binomial probability: [mathjax] P(X = k) = (\\binom{n}{k} p^k (1-p)^{n-k}) [/mathjax]',
|
||||
'Binomial probability: P(X = k) = (\\binom{n}{k} pᵏ (1−p)ⁿ⁻ᵏ)',
|
||||
),
|
||||
('Gaussian distribution: [mathjax] N(\\mu, \\sigma^2) [/mathjax]', 'Gaussian distribution: N(μ, σ²)'),
|
||||
(
|
||||
'Greek letters: [mathjaxinline] \\alpha [/mathjaxinline] [mathjaxinline] \\beta [/mathjaxinline] [mathjaxinline] \\gamma [/mathjaxinline]',
|
||||
'Greek letters: α β γ',
|
||||
),
|
||||
(
|
||||
'Subscripted variables: [mathjaxinline] x_i [/mathjaxinline] [mathjaxinline] y_j [/mathjaxinline]',
|
||||
'Subscripted variables: xᵢ yⱼ',
|
||||
),
|
||||
('Superscripted variables: [mathjaxinline] x^{i} [/mathjaxinline]', 'Superscripted variables: xⁱ'),
|
||||
(
|
||||
'Not supported: \\( \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I \\)',
|
||||
'Not supported: \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I',
|
||||
),
|
||||
(
|
||||
'Bold text: \\( {\\bf a} \\cdot {\\bf b} = |{\\bf a}| |{\\bf b}| \\cos(\\theta) \\)',
|
||||
'Bold text: a ⋅ b = |a| |b| cos(θ)',
|
||||
),
|
||||
('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'),
|
||||
('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'),
|
||||
(
|
||||
'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)',
|
||||
'Nested Bold text 2: a ⋅ (a × b)'
|
||||
),
|
||||
(
|
||||
'Nested Bold text 3: \\( \\mathbf{a \\cdot (a \\bm{\\times} b)} \\)',
|
||||
'Nested Bold text 3: a ⋅ (a × b)'
|
||||
),
|
||||
('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'),
|
||||
('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'),
|
||||
('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'),
|
||||
('Fraction test 1: \\( \\frac{2} {3} \\)', 'Fraction test 1: (2/3)'),
|
||||
('Fraction test 2: \\( \\frac{2}{3} \\)', 'Fraction test 2: (2/3)'),
|
||||
('Fraction test 3: \\( \\frac{\\frac{2}{3}}{4} \\)', 'Fraction test 3: ((2/3)/4)'),
|
||||
('Fraction test 4: \\( \\frac{\\frac{2} {3}}{4} \\)', 'Fraction test 4: ((2/3)/4)'),
|
||||
('Fraction test 5: \\( \\frac{\\frac{2} {3}}{\\frac{4}{3}} \\)', 'Fraction test 5: ((2/3)/(4/3))'),
|
||||
# Invalid equations.
|
||||
('Fraction error: \\( \\frac{2} \\)', 'Fraction error: \\frac{2}'),
|
||||
('Fraction error 2: \\( \\frac{\\frac{2}{3}{4} \\)', 'Fraction error 2: \\frac{\\frac{2}{3}{4}'),
|
||||
('Unclosed: [mathjaxinline]x^2', 'Unclosed: [mathjaxinline]x^2'),
|
||||
(
|
||||
'Missing closing bracket: \\( \\frac{\\frac{2} {3}{\\frac{4}{3}} \\)',
|
||||
'Missing closing bracket: \\frac{\\frac{2} {3}{\\frac{4}{3}}'
|
||||
),
|
||||
('No equation: normal text', 'No equation: normal text'),
|
||||
]
|
||||
# pylint: enable=line-too-long
|
||||
block = BlockFactory.create(
|
||||
parent_location=self.toy_course.location,
|
||||
category="html",
|
||||
display_name="Non-default HTML Block",
|
||||
editor="raw",
|
||||
use_latex_compiler=True,
|
||||
data="|||".join(e[0] for e in eqns),
|
||||
)
|
||||
doc = {}
|
||||
doc.update(searchable_doc_for_course_block(block))
|
||||
doc.update(searchable_doc_tags(block.usage_key))
|
||||
result = doc['description'].split('|||')
|
||||
for i, eqn in enumerate(result):
|
||||
assert eqn.strip() == eqns[i][1]
|
||||
|
||||
@@ -1208,6 +1208,8 @@ unicodecsv==0.14.1
|
||||
# via
|
||||
# -r requirements/edx/kernel.in
|
||||
# edx-enterprise
|
||||
unicodeit==0.7.5
|
||||
# via -r requirements/edx/kernel.in
|
||||
uritemplate==4.1.1
|
||||
# via
|
||||
# drf-spectacular
|
||||
|
||||
@@ -2160,6 +2160,10 @@ unicodecsv==0.14.1
|
||||
# -r requirements/edx/doc.txt
|
||||
# -r requirements/edx/testing.txt
|
||||
# edx-enterprise
|
||||
unicodeit==0.7.5
|
||||
# via
|
||||
# -r requirements/edx/doc.txt
|
||||
# -r requirements/edx/testing.txt
|
||||
unidiff==0.7.5
|
||||
# via -r requirements/edx/testing.txt
|
||||
uritemplate==4.1.1
|
||||
|
||||
@@ -1521,6 +1521,8 @@ unicodecsv==0.14.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
# edx-enterprise
|
||||
unicodeit==0.7.5
|
||||
# via -r requirements/edx/base.txt
|
||||
uritemplate==4.1.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
|
||||
@@ -163,3 +163,4 @@ web-fragments # Provides the ability to render fragments o
|
||||
wrapt # Better functools.wrapped. TODO: functools has since improved, maybe we can switch?
|
||||
XBlock[django] # Courseware component architecture
|
||||
xss-utils # https://github.com/openedx/edx-platform/pull/20633 Fix XSS via Translations
|
||||
unicodeit # Converts mathjax equation to plain text by using unicode symbols
|
||||
|
||||
@@ -1605,6 +1605,8 @@ unicodecsv==0.14.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
# edx-enterprise
|
||||
unicodeit==0.7.5
|
||||
# via -r requirements/edx/base.txt
|
||||
unidiff==0.7.5
|
||||
# via -r requirements/edx/testing.in
|
||||
uritemplate==4.1.1
|
||||
|
||||
Reference in New Issue
Block a user