Files
edx-platform/lms/lib/symmath/formula.py
2013-01-22 01:40:13 -07:00

608 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# File: formula.py
# Date: 04-May-12 (creation)
# Author: I. Chuang <ichuang@mit.edu>
#
# flexible python representation of a symbolic mathematical formula.
# Acceptes Presentation MathML, Content MathML (and could also do OpenMath)
# Provides sympy representation.
import os
import sys
import string
import re
import logging
import operator
import sympy
from sympy.printing.latex import LatexPrinter
from sympy.printing.str import StrPrinter
from sympy import latex, sympify
from sympy.physics.quantum.qubit import *
from sympy.physics.quantum.state import *
# from sympy import exp, pi, I
# from sympy.core.operations import LatticeOp
# import sympy.physics.quantum.qubit
import urllib
from xml.sax.saxutils import escape, unescape
import sympy
import unicodedata
from lxml import etree
#import subprocess
import requests
from copy import deepcopy
log = logging.getLogger(__name__)
log.warning("Dark code. Needs review before enabling in prod.")
os.environ['PYTHONIOENCODING'] = 'utf-8'
#-----------------------------------------------------------------------------
class dot(sympy.operations.LatticeOp): # my dot product
zero = sympy.Symbol('dotzero')
identity = sympy.Symbol('dotidentity')
#class dot(sympy.Mul): # my dot product
# is_Mul = False
def _print_dot(self, expr):
return '{((%s) \cdot (%s))}' % (expr.args[0], expr.args[1])
LatexPrinter._print_dot = _print_dot
#-----------------------------------------------------------------------------
# unit vectors (for 8.02)
def _print_hat(self, expr): return '\\hat{%s}' % str(expr.args[0]).lower()
LatexPrinter._print_hat = _print_hat
StrPrinter._print_hat = _print_hat
#-----------------------------------------------------------------------------
# helper routines
def to_latex(x):
if x == None: return ''
# LatexPrinter._print_dot = _print_dot
xs = latex(x)
xs = xs.replace(r'\XI', 'XI') # workaround for strange greek
#return '<math>%s{}{}</math>' % (xs[1:-1])
if xs[0] == '$':
return '[mathjax]%s[/mathjax]<br>' % (xs[1:-1]) # for sympy v6
return '[mathjax]%s[/mathjax]<br>' % (xs) # for sympy v7
def my_evalf(expr, chop=False):
if type(expr) == list:
try:
return [x.evalf(chop=chop) for x in expr]
except:
return expr
try:
return expr.evalf(chop=chop)
except:
return expr
#-----------------------------------------------------------------------------
# my version of sympify to import expression into sympy
def my_sympify(expr, normphase=False, matrix=False, abcsym=False, do_qubit=False, symtab=None):
# make all lowercase real?
if symtab:
varset = symtab
else:
varset = {'p': sympy.Symbol('p'),
'g': sympy.Symbol('g'),
'e': sympy.E, # for exp
'i': sympy.I, # lowercase i is also sqrt(-1)
'Q': sympy.Symbol('Q'), # otherwise it is a sympy "ask key"
'I': sympy.Symbol('I'), # otherwise it is sqrt(-1)
#'X':sympy.sympify('Matrix([[0,1],[1,0]])'),
#'Y':sympy.sympify('Matrix([[0,-I],[I,0]])'),
#'Z':sympy.sympify('Matrix([[1,0],[0,-1]])'),
'ZZ': sympy.Symbol('ZZ'), # otherwise it is the PythonIntegerRing
'XI': sympy.Symbol('XI'), # otherwise it is the capital \XI
'hat': sympy.Function('hat'), # for unit vectors (8.02)
}
if do_qubit: # turn qubit(...) into Qubit instance
varset.update({'qubit': sympy.physics.quantum.qubit.Qubit,
'Ket': sympy.physics.quantum.state.Ket,
'dot': dot,
'bit': sympy.Function('bit'),
})
if abcsym: # consider all lowercase letters as real symbols, in the parsing
for letter in string.lowercase:
if letter in varset: # exclude those already done
continue
varset.update({letter: sympy.Symbol(letter, real=True)})
sexpr = sympify(expr, locals=varset)
if normphase: # remove overall phase if sexpr is a list
if type(sexpr) == list:
if sexpr[0].is_number:
ophase = sympy.sympify('exp(-I*arg(%s))' % sexpr[0])
sexpr = [sympy.Mul(x, ophase) for x in sexpr]
def to_matrix(x): # if x is a list of lists, and is rectangular, then return Matrix(x)
if not type(x) == list:
return x
for row in x:
if (not type(row) == list):
return x
rdim = len(x[0])
for row in x:
if not len(row) == rdim:
return x
return sympy.Matrix(x)
if matrix:
sexpr = to_matrix(sexpr)
return sexpr
#-----------------------------------------------------------------------------
# class for symbolic mathematical formulas
class formula(object):
'''
Representation of a mathematical formula object. Accepts mathml math expression
for constructing, and can produce sympy translation. The formula may or may not
include an assignment (=).
'''
def __init__(self, expr, asciimath='', options=None):
self.expr = expr.strip()
self.asciimath = asciimath
self.the_cmathml = None
self.the_sympy = None
self.options = options
def is_presentation_mathml(self):
return '<mstyle' in self.expr
def is_mathml(self):
return '<math ' in self.expr
def fix_greek_in_mathml(self, xml):
def gettag(x):
return re.sub('{http://[^}]+}', '', x.tag)
for k in xml:
tag = gettag(k)
if tag == 'mi' or tag == 'ci':
usym = unicode(k.text)
try:
udata = unicodedata.name(usym)
except Exception, err:
udata = None
#print "usym = %s, udata=%s" % (usym,udata)
if udata: # eg "GREEK SMALL LETTER BETA"
if 'GREEK' in udata:
usym = udata.split(' ')[-1]
if 'SMALL' in udata: usym = usym.lower()
#print "greek: ",usym
k.text = usym
self.fix_greek_in_mathml(k)
return xml
def preprocess_pmathml(self, xml):
'''
Pre-process presentation MathML from ASCIIMathML to make it more
acceptable for SnuggleTeX, and also to accomodate some sympy
conventions (eg hat(i) for \hat{i}).
This method would be a good spot to look for an integral and convert
it, if possible...
'''
if type(xml) == str or type(xml) == unicode:
xml = etree.fromstring(xml) # TODO: wrap in try
xml = self.fix_greek_in_mathml(xml) # convert greek utf letters to greek spelled out in ascii
def gettag(x):
return re.sub('{http://[^}]+}', '', x.tag)
# f and g are processed as functions by asciimathml, eg "f-2" turns into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>"
# this is really terrible for turning into cmathml.
# undo this here.
def fix_pmathml(xml):
for k in xml:
tag = gettag(k)
if tag == 'mrow':
if len(k) == 2:
if gettag(k[0]) == 'mi' and k[0].text in ['f', 'g'] and gettag(k[1]) == 'mo':
idx = xml.index(k)
xml.insert(idx, deepcopy(k[0])) # drop the <mrow> container
xml.insert(idx + 1, deepcopy(k[1]))
xml.remove(k)
fix_pmathml(k)
fix_pmathml(xml)
# hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle this into <mi>hat(f)</mi>
# hat i also somtimes turned into <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>
def fix_hat(xml):
for k in xml:
tag = gettag(k)
if tag == 'mover':
if len(k) == 2:
if gettag(k[0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
newk = etree.Element('mi')
newk.text = 'hat(%s)' % k[0].text
xml.replace(k, newk)
if gettag(k[0]) == 'mrow' and gettag(k[0][0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
newk = etree.Element('mi')
newk.text = 'hat(%s)' % k[0][0].text
xml.replace(k, newk)
fix_hat(k)
fix_hat(xml)
self.xml = xml
return self.xml
def get_content_mathml(self):
if self.the_cmathml: return self.the_cmathml
# pre-process the presentation mathml before sending it to snuggletex to convert to content mathml
try:
xml = self.preprocess_pmathml(self.expr)
except Exception, err:
return "<html>Error! Cannot process pmathml</html>"
pmathml = etree.tostring(xml, pretty_print=True)
self.the_pmathml = pmathml
# convert to cmathml
self.the_cmathml = self.GetContentMathML(self.asciimath, pmathml)
return self.the_cmathml
cmathml = property(get_content_mathml, None, None, 'content MathML representation')
def make_sympy(self, xml=None):
'''
Return sympy expression for the math formula.
The math formula is converted to Content MathML then that is parsed.
This is a recursive function, called on every CMML node. Support for
more functions can be added by modifying opdict, abould halfway down
'''
if self.the_sympy: return self.the_sympy
if xml == None: # root
if not self.is_mathml():
return my_sympify(self.expr)
if self.is_presentation_mathml():
cmml = None
try:
cmml = self.cmathml
xml = etree.fromstring(str(cmml))
except Exception, err:
if 'conversion from Presentation MathML to Content MathML was not successful' in cmml:
msg = "Illegal math expression"
else:
msg = 'Err %s while converting cmathml to xml; cmml=%s' % (err, cmml)
raise Exception, msg
xml = self.fix_greek_in_mathml(xml)
self.the_sympy = self.make_sympy(xml[0])
else:
xml = etree.fromstring(self.expr)
xml = self.fix_greek_in_mathml(xml)
self.the_sympy = self.make_sympy(xml[0])
return self.the_sympy
def gettag(x):
return re.sub('{http://[^}]+}', '', x.tag)
# simple math
def op_divide(*args):
if not len(args) == 2:
raise Exception, 'divide given wrong number of arguments!'
# print "divide: arg0=%s, arg1=%s" % (args[0],args[1])
return sympy.Mul(args[0], sympy.Pow(args[1], -1))
def op_plus(*args): return args[0] if len(args) == 1 else op_plus(*args[:-1]) + args[-1]
def op_times(*args): return reduce(operator.mul, args)
def op_minus(*args):
if len(args) == 1:
return -args[0]
if not len(args) == 2:
raise Exception, 'minus given wrong number of arguments!'
#return sympy.Add(args[0],-args[1])
return args[0] - args[1]
opdict = {'plus': op_plus,
'divide': operator.div,
'times': op_times,
'minus': op_minus,
#'plus': sympy.Add,
#'divide' : op_divide,
#'times' : sympy.Mul,
'minus': op_minus,
'root': sympy.sqrt,
'power': sympy.Pow,
'sin': sympy.sin,
'cos': sympy.cos,
'tan': sympy.tan,
'cot': sympy.cot,
'sinh': sympy.sinh,
'cosh': sympy.cosh,
'coth': sympy.coth,
'tanh': sympy.tanh,
'asin': sympy.asin,
'acos': sympy.acos,
'atan': sympy.atan,
'atan2': sympy.atan2,
'acot': sympy.acot,
'asinh': sympy.asinh,
'acosh': sympy.acosh,
'atanh': sympy.atanh,
'acoth': sympy.acoth,
'exp': sympy.exp,
'log': sympy.log,
'ln': sympy.ln,
}
# simple sumbols
nums1dict = {'pi': sympy.pi,
}
def parsePresentationMathMLSymbol(xml):
'''
Parse <msub>, <msup>, <mi>, and <mn>
'''
tag = gettag(xml)
if tag == 'mn': return xml.text
elif tag == 'mi': return xml.text
elif tag == 'msub': return '_'.join([parsePresentationMathMLSymbol(y) for y in xml])
elif tag == 'msup': return '^'.join([parsePresentationMathMLSymbol(y) for y in xml])
raise Exception, '[parsePresentationMathMLSymbol] unknown tag %s' % tag
# parser tree for Content MathML
tag = gettag(xml)
# print "tag = ",tag
# first do compound objects
if tag == 'apply': # apply operator
opstr = gettag(xml[0])
if opstr in opdict:
op = opdict[opstr]
args = [self.make_sympy(x) for x in xml[1:]]
try:
res = op(*args)
except Exception, err:
self.args = args
self.op = op
raise Exception, '[formula] error=%s failed to apply %s to args=%s' % (err, opstr, args)
return res
else:
raise Exception, '[formula]: unknown operator tag %s' % (opstr)
elif tag == 'list': # square bracket list
if gettag(xml[0]) == 'matrix':
return self.make_sympy(xml[0])
else:
return [self.make_sympy(x) for x in xml]
elif tag == 'matrix':
return sympy.Matrix([self.make_sympy(x) for x in xml])
elif tag == 'vector':
return [self.make_sympy(x) for x in xml]
# atoms are below
elif tag == 'cn': # number
return sympy.sympify(xml.text)
return float(xml.text)
elif tag == 'ci': # variable (symbol)
if len(xml) > 0 and (gettag(xml[0]) == 'msub' or gettag(xml[0]) == 'msup'): # subscript or superscript
usym = parsePresentationMathMLSymbol(xml[0])
sym = sympy.Symbol(str(usym))
else:
usym = unicode(xml.text)
if 'hat' in usym:
sym = my_sympify(usym)
else:
if usym == 'i' and self.options is not None and 'imaginary' in self.options: # i = sqrt(-1)
sym = sympy.I
else:
sym = sympy.Symbol(str(usym))
return sym
else: # unknown tag
raise Exception, '[formula] unknown tag %s' % tag
sympy = property(make_sympy, None, None, 'sympy representation')
def GetContentMathML(self, asciimath, mathml):
# URL = 'http://192.168.1.2:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
# URL = 'http://127.0.0.1:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
URL = 'https://math-xserver.mitx.mit.edu/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
if 1:
payload = {'asciiMathInput': asciimath,
'asciiMathML': mathml,
#'asciiMathML':unicode(mathml).encode('utf-8'),
}
headers = {'User-Agent': "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13"}
r = requests.post(URL, data=payload, headers=headers, verify=False)
r.encoding = 'utf-8'
ret = r.text
#print "encoding: ",r.encoding
# return ret
mode = 0
cmathml = []
for k in ret.split('\n'):
if 'conversion to Content MathML' in k:
mode = 1
continue
if mode == 1:
if '<h3>Maxima Input Form</h3>' in k:
mode = 0
continue
cmathml.append(k)
# return '\n'.join(cmathml)
cmathml = '\n'.join(cmathml[2:])
cmathml = '<math xmlns="http://www.w3.org/1998/Math/MathML">\n' + unescape(cmathml) + '\n</math>'
# print cmathml
#return unicode(cmathml)
return cmathml
#-----------------------------------------------------------------------------
def test1():
xmlstr = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<apply>
<plus/>
<cn>1</cn>
<cn>2</cn>
</apply>
</math>
'''
return formula(xmlstr)
def test2():
xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<apply>
<plus/>
<cn>1</cn>
<apply>
<times/>
<cn>2</cn>
<ci>α</ci>
</apply>
</apply>
</math>
'''
return formula(xmlstr)
def test3():
xmlstr = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<apply>
<divide/>
<cn>1</cn>
<apply>
<plus/>
<cn>2</cn>
<ci>γ</ci>
</apply>
</apply>
</math>
'''
return formula(xmlstr)
def test4():
xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mstyle displaystyle="true">
<mn>1</mn>
<mo>+</mo>
<mfrac>
<mn>2</mn>
<mi>α</mi>
</mfrac>
</mstyle>
</math>
'''
return formula(xmlstr)
def test5(): # sum of two matrices
xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mstyle displaystyle="true">
<mrow>
<mi>cos</mi>
<mrow>
<mo>(</mo>
<mi>&#x3B8;</mi>
<mo>)</mo>
</mrow>
</mrow>
<mo>&#x22C5;</mo>
<mrow>
<mo>[</mo>
<mtable>
<mtr>
<mtd>
<mn>1</mn>
</mtd>
<mtd>
<mn>0</mn>
</mtd>
</mtr>
<mtr>
<mtd>
<mn>0</mn>
</mtd>
<mtd>
<mn>1</mn>
</mtd>
</mtr>
</mtable>
<mo>]</mo>
</mrow>
<mo>+</mo>
<mrow>
<mo>[</mo>
<mtable>
<mtr>
<mtd>
<mn>0</mn>
</mtd>
<mtd>
<mn>1</mn>
</mtd>
</mtr>
<mtr>
<mtd>
<mn>1</mn>
</mtd>
<mtd>
<mn>0</mn>
</mtd>
</mtr>
</mtable>
<mo>]</mo>
</mrow>
</mstyle>
</math>
'''
return formula(xmlstr)
def test6(): # imaginary numbers
xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mstyle displaystyle="true">
<mn>1</mn>
<mo>+</mo>
<mi>i</mi>
</mstyle>
</math>
'''
return formula(xmlstr, options='imaginaryi')