462 lines
15 KiB
Python
462 lines
15 KiB
Python
#!/usr/bin/python
|
||
# -*- coding: utf-8 -*-
|
||
#
|
||
# File: formula.py
|
||
# Date: 04-May-12
|
||
# Author: I. Chuang <ichuang@mit.edu>
|
||
#
|
||
# flexible python representation of a symbolic mathematical formula.
|
||
# Acceptes Presentation MathML, Content MathML (and could also do OpenMath)
|
||
# Provides sympy representation.
|
||
|
||
import os, sys, string, re
|
||
import operator
|
||
import sympy
|
||
from sympy.printing.latex import LatexPrinter
|
||
from sympy.printing.str import StrPrinter
|
||
from sympy import latex, sympify
|
||
from sympy.physics.quantum.qubit import *
|
||
from sympy.physics.quantum.state import *
|
||
# from sympy import exp, pi, I
|
||
# from sympy.core.operations import LatticeOp
|
||
# import sympy.physics.quantum.qubit
|
||
|
||
import urllib
|
||
from xml.sax.saxutils import escape, unescape
|
||
import sympy
|
||
import unicodedata
|
||
from lxml import etree
|
||
#import subprocess
|
||
import requests
|
||
from copy import deepcopy
|
||
|
||
print "[lib.sympy_check.formula] Warning: Dark code. Needs review before enabling in prod."
|
||
|
||
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||
|
||
#-----------------------------------------------------------------------------
|
||
|
||
class dot(sympy.operations.LatticeOp): # my dot product
|
||
zero = sympy.Symbol('dotzero')
|
||
identity = sympy.Symbol('dotidentity')
|
||
|
||
#class dot(sympy.Mul): # my dot product
|
||
# is_Mul = False
|
||
|
||
def _print_dot(self,expr):
|
||
return '{((%s) \cdot (%s))}' % (expr.args[0],expr.args[1])
|
||
|
||
LatexPrinter._print_dot = _print_dot
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# unit vectors (for 8.02)
|
||
|
||
def _print_hat(self,expr): return '\\hat{%s}' % str(expr.args[0]).lower()
|
||
|
||
LatexPrinter._print_hat = _print_hat
|
||
StrPrinter._print_hat = _print_hat
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# helper routines
|
||
|
||
def to_latex(x):
|
||
if x==None: return ''
|
||
# LatexPrinter._print_dot = _print_dot
|
||
xs = latex(x)
|
||
xs = xs.replace(r'\XI','XI') # workaround for strange greek
|
||
#return '<math>%s{}{}</math>' % (xs[1:-1])
|
||
if xs[0]=='$':
|
||
return '[mathjax]%s[/mathjax]<br>' % (xs[1:-1]) # for sympy v6
|
||
return '[mathjax]%s[/mathjax]<br>' % (xs) # for sympy v7
|
||
|
||
def my_evalf(expr,chop=False):
|
||
if type(expr)==list:
|
||
try:
|
||
return [x.evalf(chop=chop) for x in expr]
|
||
except:
|
||
return expr
|
||
try:
|
||
return expr.evalf(chop=chop)
|
||
except:
|
||
return expr
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# my version of sympify to import expression into sympy
|
||
|
||
def my_sympify(expr,normphase=False,matrix=False,abcsym=False,do_qubit=False,symtab=None):
|
||
# make all lowercase real?
|
||
if symtab:
|
||
varset = symtab
|
||
else:
|
||
varset = {'p':sympy.Symbol('p'),
|
||
'g':sympy.Symbol('g'),
|
||
'e':sympy.E, # for exp
|
||
'i':sympy.I, # lowercase i is also sqrt(-1)
|
||
'Q':sympy.Symbol('Q'), # otherwise it is a sympy "ask key"
|
||
#'X':sympy.sympify('Matrix([[0,1],[1,0]])'),
|
||
#'Y':sympy.sympify('Matrix([[0,-I],[I,0]])'),
|
||
#'Z':sympy.sympify('Matrix([[1,0],[0,-1]])'),
|
||
'ZZ':sympy.Symbol('ZZ'), # otherwise it is the PythonIntegerRing
|
||
'XI':sympy.Symbol('XI'), # otherwise it is the capital \XI
|
||
'hat':sympy.Function('hat'), # for unit vectors (8.02)
|
||
}
|
||
if do_qubit: # turn qubit(...) into Qubit instance
|
||
varset.update({'qubit':sympy.physics.quantum.qubit.Qubit,
|
||
'Ket':sympy.physics.quantum.state.Ket,
|
||
'dot':dot,
|
||
'bit':sympy.Function('bit'),
|
||
})
|
||
if abcsym: # consider all lowercase letters as real symbols, in the parsing
|
||
for letter in string.lowercase:
|
||
if letter in varset: # exclude those already done
|
||
continue
|
||
varset.update({letter:sympy.Symbol(letter,real=True)})
|
||
|
||
sexpr = sympify(expr,locals=varset)
|
||
if normphase: # remove overall phase if sexpr is a list
|
||
if type(sexpr)==list:
|
||
if sexpr[0].is_number:
|
||
ophase = sympy.sympify('exp(-I*arg(%s))' % sexpr[0])
|
||
sexpr = [ sympy.Mul(x,ophase) for x in sexpr ]
|
||
|
||
def to_matrix(x): # if x is a list of lists, and is rectangular, then return Matrix(x)
|
||
if not type(x)==list:
|
||
return x
|
||
for row in x:
|
||
if (not type(row)==list):
|
||
return x
|
||
rdim = len(x[0])
|
||
for row in x:
|
||
if not len(row)==rdim:
|
||
return x
|
||
return sympy.Matrix(x)
|
||
|
||
if matrix:
|
||
sexpr = to_matrix(sexpr)
|
||
return sexpr
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# class for symbolic mathematical formulas
|
||
|
||
class formula(object):
|
||
'''
|
||
Representation of a mathematical formula object. Accepts mathml math expression for constructing,
|
||
and can produce sympy translation. The formula may or may not include an assignment (=).
|
||
'''
|
||
def __init__(self,expr,asciimath=''):
|
||
self.expr = expr.strip()
|
||
self.asciimath = asciimath
|
||
self.the_cmathml = None
|
||
self.the_sympy = None
|
||
|
||
def is_presentation_mathml(self):
|
||
return '<mstyle' in self.expr
|
||
|
||
def is_mathml(self):
|
||
return '<math ' in self.expr
|
||
|
||
def fix_greek_in_mathml(self,xml):
|
||
def gettag(x):
|
||
return re.sub('{http://[^}]+}','',x.tag)
|
||
|
||
for k in xml:
|
||
tag = gettag(k)
|
||
if tag=='mi' or tag=='ci':
|
||
usym = unicode(k.text)
|
||
try:
|
||
udata = unicodedata.name(usym)
|
||
except Exception,err:
|
||
udata = None
|
||
#print "usym = %s, udata=%s" % (usym,udata)
|
||
if udata: # eg "GREEK SMALL LETTER BETA"
|
||
if 'GREEK' in udata:
|
||
usym = udata.split(' ')[-1]
|
||
if 'SMALL' in udata: usym = usym.lower()
|
||
#print "greek: ",usym
|
||
k.text = usym
|
||
self.fix_greek_in_mathml(k)
|
||
return xml
|
||
|
||
def preprocess_pmathml(self,xml):
|
||
'''
|
||
Pre-process presentation MathML from ASCIIMathML to make it more acceptable for SnuggleTeX, and also
|
||
to accomodate some sympy conventions (eg hat(i) for \hat{i}).
|
||
'''
|
||
|
||
if type(xml)==str or type(xml)==unicode:
|
||
xml = etree.fromstring(xml) # TODO: wrap in try
|
||
|
||
xml = self.fix_greek_in_mathml(xml) # convert greek utf letters to greek spelled out in ascii
|
||
|
||
def gettag(x):
|
||
return re.sub('{http://[^}]+}','',x.tag)
|
||
|
||
# f and g are processed as functions by asciimathml, eg "f-2" turns into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>"
|
||
# this is really terrible for turning into cmathml.
|
||
# undo this here.
|
||
def fix_pmathml(xml):
|
||
for k in xml:
|
||
tag = gettag(k)
|
||
if tag=='mrow':
|
||
if len(k)==2:
|
||
if gettag(k[0])=='mi' and k[0].text in ['f','g'] and gettag(k[1])=='mo':
|
||
idx = xml.index(k)
|
||
xml.insert(idx,deepcopy(k[0])) # drop the <mrow> container
|
||
xml.insert(idx+1,deepcopy(k[1]))
|
||
xml.remove(k)
|
||
fix_pmathml(k)
|
||
|
||
fix_pmathml(xml)
|
||
|
||
# hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle this into <mi>hat(f)</mi>
|
||
# hat i also somtimes turned into <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>
|
||
|
||
def fix_hat(xml):
|
||
for k in xml:
|
||
tag = gettag(k)
|
||
if tag=='mover':
|
||
if len(k)==2:
|
||
if gettag(k[0])=='mi' and gettag(k[1])=='mo' and str(k[1].text)=='^':
|
||
newk = etree.Element('mi')
|
||
newk.text = 'hat(%s)' % k[0].text
|
||
xml.replace(k,newk)
|
||
if gettag(k[0])=='mrow' and gettag(k[0][0])=='mi' and gettag(k[1])=='mo' and str(k[1].text)=='^':
|
||
newk = etree.Element('mi')
|
||
newk.text = 'hat(%s)' % k[0][0].text
|
||
xml.replace(k,newk)
|
||
fix_hat(k)
|
||
fix_hat(xml)
|
||
|
||
self.xml = xml
|
||
return self.xml
|
||
|
||
def get_content_mathml(self):
|
||
if self.the_cmathml: return self.the_cmathml
|
||
|
||
# pre-process the presentation mathml before sending it to snuggletex to convert to content mathml
|
||
xml = self.preprocess_pmathml(self.expr)
|
||
pmathml = etree.tostring(xml,pretty_print=True)
|
||
self.the_pmathml = pmathml
|
||
|
||
# convert to cmathml
|
||
self.the_cmathml = self.GetContentMathML(self.asciimath,pmathml)
|
||
return self.the_cmathml
|
||
|
||
cmathml = property(get_content_mathml,None,None,'content MathML representation')
|
||
|
||
def make_sympy(self,xml=None):
|
||
'''
|
||
Return sympy expression for the math formula
|
||
'''
|
||
|
||
if self.the_sympy: return self.the_sympy
|
||
|
||
if xml==None: # root
|
||
if not self.is_mathml():
|
||
return my_sympify(self.expr)
|
||
if self.is_presentation_mathml():
|
||
xml = etree.fromstring(str(self.cmathml))
|
||
xml = self.fix_greek_in_mathml(xml)
|
||
self.the_sympy = self.make_sympy(xml[0])
|
||
else:
|
||
xml = etree.fromstring(self.expr)
|
||
xml = self.fix_greek_in_mathml(xml)
|
||
self.the_sympy = self.make_sympy(xml[0])
|
||
return self.the_sympy
|
||
|
||
def gettag(x):
|
||
return re.sub('{http://[^}]+}','',x.tag)
|
||
|
||
# simple math
|
||
def op_divide(*args):
|
||
if not len(args)==2:
|
||
raise Exception,'divide given wrong number of arguments!'
|
||
# print "divide: arg0=%s, arg1=%s" % (args[0],args[1])
|
||
return sympy.Mul(args[0],sympy.Pow(args[1],-1))
|
||
|
||
def op_plus(*args): return sum(args)
|
||
def op_times(*args): return reduce(operator.mul,args)
|
||
|
||
def op_minus(*args):
|
||
if len(args)==1:
|
||
return -args[0]
|
||
if not len(args)==2:
|
||
raise Exception,'minus given wrong number of arguments!'
|
||
#return sympy.Add(args[0],-args[1])
|
||
return args[0]-args[1]
|
||
|
||
opdict = {'plus': op_plus,
|
||
'divide' : operator.div,
|
||
'times' : op_times,
|
||
'minus' : op_minus,
|
||
#'plus': sympy.Add,
|
||
#'divide' : op_divide,
|
||
#'times' : sympy.Mul,
|
||
'minus' : op_minus,
|
||
'root' : sympy.sqrt,
|
||
'power' : sympy.Pow,
|
||
'sin': sympy.sin,
|
||
'cos': sympy.cos,
|
||
}
|
||
|
||
# simple sumbols
|
||
nums1dict = {'pi': sympy.pi,
|
||
}
|
||
|
||
def parsePresentationMathMLSymbol(xml):
|
||
'''
|
||
Parse <msub>, <msup>, <mi>, and <mn>
|
||
'''
|
||
tag = gettag(xml)
|
||
if tag=='mn': return xml.text
|
||
elif tag=='mi': return xml.text
|
||
elif tag=='msub': return '_'.join([parsePresentationMathMLSymbol(y) for y in xml])
|
||
elif tag=='msup': return '^'.join([parsePresentationMathMLSymbol(y) for y in xml])
|
||
raise Exception,'[parsePresentationMathMLSymbol] unknown tag %s' % tag
|
||
|
||
# parser tree for content MathML
|
||
tag = gettag(xml)
|
||
print "tag = ",tag
|
||
|
||
# first do compound objects
|
||
|
||
if tag=='apply': # apply operator
|
||
opstr = gettag(xml[0])
|
||
if opstr in opdict:
|
||
op = opdict[opstr]
|
||
args = [ self.make_sympy(x) for x in xml[1:]]
|
||
return op(*args)
|
||
else:
|
||
raise Exception,'[formula]: unknown operator tag %s' % (opstr)
|
||
|
||
elif tag=='list': # square bracket list
|
||
if gettag(xml[0])=='matrix':
|
||
return self.make_sympy(xml[0])
|
||
else:
|
||
return [ self.make_sympy(x) for x in xml ]
|
||
|
||
elif tag=='matrix':
|
||
return sympy.Matrix([ self.make_sympy(x) for x in xml ])
|
||
|
||
elif tag=='vector':
|
||
return [ self.make_sympy(x) for x in xml ]
|
||
|
||
# atoms are below
|
||
|
||
elif tag=='cn': # number
|
||
return sympy.sympify(xml.text)
|
||
return float(xml.text)
|
||
|
||
elif tag=='ci': # variable (symbol)
|
||
if len(xml)>0 and (gettag(xml[0])=='msub' or gettag(xml[0])=='msup'):
|
||
usym = parsePresentationMathMLSymbol(xml[0])
|
||
sym = sympy.Symbol(str(usym))
|
||
else:
|
||
usym = unicode(xml.text)
|
||
if 'hat' in usym:
|
||
sym = my_sympify(usym)
|
||
else:
|
||
sym = sympy.Symbol(str(usym))
|
||
return sym
|
||
|
||
else: # unknown tag
|
||
raise Exception,'[formula] unknown tag %s' % tag
|
||
|
||
sympy = property(make_sympy,None,None,'sympy representation')
|
||
|
||
def GetContentMathML(self,asciimath,mathml):
|
||
# URL = 'http://192.168.1.2:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
|
||
URL = 'http://127.0.0.1:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
|
||
|
||
if 1:
|
||
payload = {'asciiMathInput':asciimath,
|
||
'asciiMathML':mathml,
|
||
#'asciiMathML':unicode(mathml).encode('utf-8'),
|
||
}
|
||
headers = {'User-Agent':"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13"}
|
||
r = requests.post(URL,data=payload,headers=headers)
|
||
r.encoding = 'utf-8'
|
||
ret = r.text
|
||
#print "encoding: ",r.encoding
|
||
|
||
# return ret
|
||
|
||
mode = 0
|
||
cmathml = []
|
||
for k in ret.split('\n'):
|
||
if 'conversion to Content MathML' in k:
|
||
mode = 1
|
||
continue
|
||
if mode==1:
|
||
if '<h3>Maxima Input Form</h3>' in k:
|
||
mode = 0
|
||
continue
|
||
cmathml.append(k)
|
||
# return '\n'.join(cmathml)
|
||
cmathml = '\n'.join(cmathml[2:])
|
||
cmathml = '<math xmlns="http://www.w3.org/1998/Math/MathML">\n' + unescape(cmathml) + '\n</math>'
|
||
# print cmathml
|
||
#return unicode(cmathml)
|
||
return cmathml
|
||
|
||
#-----------------------------------------------------------------------------
|
||
|
||
def test1():
|
||
xmlstr = '''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<apply>
|
||
<plus/>
|
||
<cn>1</cn>
|
||
<cn>2</cn>
|
||
</apply>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
def test2():
|
||
xmlstr = u'''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<apply>
|
||
<plus/>
|
||
<cn>1</cn>
|
||
<apply>
|
||
<times/>
|
||
<cn>2</cn>
|
||
<ci>α</ci>
|
||
</apply>
|
||
</apply>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
def test3():
|
||
xmlstr = '''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<apply>
|
||
<divide/>
|
||
<cn>1</cn>
|
||
<apply>
|
||
<plus/>
|
||
<cn>2</cn>
|
||
<ci>γ</ci>
|
||
</apply>
|
||
</apply>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
def test4():
|
||
xmlstr = u'''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<mstyle displaystyle="true">
|
||
<mn>1</mn>
|
||
<mo>+</mo>
|
||
<mfrac>
|
||
<mn>2</mn>
|
||
<mi>α</mi>
|
||
</mfrac>
|
||
</mstyle>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|