608 lines
19 KiB
Python
608 lines
19 KiB
Python
#!/usr/bin/python
|
||
# -*- coding: utf-8 -*-
|
||
#
|
||
# File: formula.py
|
||
# Date: 04-May-12 (creation)
|
||
# Author: I. Chuang <ichuang@mit.edu>
|
||
#
|
||
# flexible python representation of a symbolic mathematical formula.
|
||
# Acceptes Presentation MathML, Content MathML (and could also do OpenMath)
|
||
# Provides sympy representation.
|
||
|
||
import os
|
||
import sys
|
||
import string
|
||
import re
|
||
import logging
|
||
import operator
|
||
import sympy
|
||
from sympy.printing.latex import LatexPrinter
|
||
from sympy.printing.str import StrPrinter
|
||
from sympy import latex, sympify
|
||
from sympy.physics.quantum.qubit import *
|
||
from sympy.physics.quantum.state import *
|
||
# from sympy import exp, pi, I
|
||
# from sympy.core.operations import LatticeOp
|
||
# import sympy.physics.quantum.qubit
|
||
|
||
import urllib
|
||
from xml.sax.saxutils import escape, unescape
|
||
import sympy
|
||
import unicodedata
|
||
from lxml import etree
|
||
#import subprocess
|
||
import requests
|
||
from copy import deepcopy
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
log.warning("Dark code. Needs review before enabling in prod.")
|
||
|
||
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||
|
||
#-----------------------------------------------------------------------------
|
||
|
||
|
||
class dot(sympy.operations.LatticeOp): # my dot product
|
||
zero = sympy.Symbol('dotzero')
|
||
identity = sympy.Symbol('dotidentity')
|
||
|
||
#class dot(sympy.Mul): # my dot product
|
||
# is_Mul = False
|
||
|
||
|
||
def _print_dot(self, expr):
|
||
return '{((%s) \cdot (%s))}' % (expr.args[0], expr.args[1])
|
||
|
||
LatexPrinter._print_dot = _print_dot
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# unit vectors (for 8.02)
|
||
|
||
|
||
def _print_hat(self, expr): return '\\hat{%s}' % str(expr.args[0]).lower()
|
||
|
||
LatexPrinter._print_hat = _print_hat
|
||
StrPrinter._print_hat = _print_hat
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# helper routines
|
||
|
||
|
||
def to_latex(x):
|
||
if x == None: return ''
|
||
# LatexPrinter._print_dot = _print_dot
|
||
xs = latex(x)
|
||
xs = xs.replace(r'\XI', 'XI') # workaround for strange greek
|
||
#return '<math>%s{}{}</math>' % (xs[1:-1])
|
||
if xs[0] == '$':
|
||
return '[mathjax]%s[/mathjax]<br>' % (xs[1:-1]) # for sympy v6
|
||
return '[mathjax]%s[/mathjax]<br>' % (xs) # for sympy v7
|
||
|
||
|
||
def my_evalf(expr, chop=False):
|
||
if type(expr) == list:
|
||
try:
|
||
return [x.evalf(chop=chop) for x in expr]
|
||
except:
|
||
return expr
|
||
try:
|
||
return expr.evalf(chop=chop)
|
||
except:
|
||
return expr
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# my version of sympify to import expression into sympy
|
||
|
||
|
||
def my_sympify(expr, normphase=False, matrix=False, abcsym=False, do_qubit=False, symtab=None):
|
||
# make all lowercase real?
|
||
if symtab:
|
||
varset = symtab
|
||
else:
|
||
varset = {'p': sympy.Symbol('p'),
|
||
'g': sympy.Symbol('g'),
|
||
'e': sympy.E, # for exp
|
||
'i': sympy.I, # lowercase i is also sqrt(-1)
|
||
'Q': sympy.Symbol('Q'), # otherwise it is a sympy "ask key"
|
||
'I': sympy.Symbol('I'), # otherwise it is sqrt(-1)
|
||
#'X':sympy.sympify('Matrix([[0,1],[1,0]])'),
|
||
#'Y':sympy.sympify('Matrix([[0,-I],[I,0]])'),
|
||
#'Z':sympy.sympify('Matrix([[1,0],[0,-1]])'),
|
||
'ZZ': sympy.Symbol('ZZ'), # otherwise it is the PythonIntegerRing
|
||
'XI': sympy.Symbol('XI'), # otherwise it is the capital \XI
|
||
'hat': sympy.Function('hat'), # for unit vectors (8.02)
|
||
}
|
||
if do_qubit: # turn qubit(...) into Qubit instance
|
||
varset.update({'qubit': sympy.physics.quantum.qubit.Qubit,
|
||
'Ket': sympy.physics.quantum.state.Ket,
|
||
'dot': dot,
|
||
'bit': sympy.Function('bit'),
|
||
})
|
||
if abcsym: # consider all lowercase letters as real symbols, in the parsing
|
||
for letter in string.lowercase:
|
||
if letter in varset: # exclude those already done
|
||
continue
|
||
varset.update({letter: sympy.Symbol(letter, real=True)})
|
||
|
||
sexpr = sympify(expr, locals=varset)
|
||
if normphase: # remove overall phase if sexpr is a list
|
||
if type(sexpr) == list:
|
||
if sexpr[0].is_number:
|
||
ophase = sympy.sympify('exp(-I*arg(%s))' % sexpr[0])
|
||
sexpr = [sympy.Mul(x, ophase) for x in sexpr]
|
||
|
||
def to_matrix(x): # if x is a list of lists, and is rectangular, then return Matrix(x)
|
||
if not type(x) == list:
|
||
return x
|
||
for row in x:
|
||
if (not type(row) == list):
|
||
return x
|
||
rdim = len(x[0])
|
||
for row in x:
|
||
if not len(row) == rdim:
|
||
return x
|
||
return sympy.Matrix(x)
|
||
|
||
if matrix:
|
||
sexpr = to_matrix(sexpr)
|
||
return sexpr
|
||
|
||
#-----------------------------------------------------------------------------
|
||
# class for symbolic mathematical formulas
|
||
|
||
|
||
class formula(object):
|
||
'''
|
||
Representation of a mathematical formula object. Accepts mathml math expression
|
||
for constructing, and can produce sympy translation. The formula may or may not
|
||
include an assignment (=).
|
||
'''
|
||
def __init__(self, expr, asciimath='', options=None):
|
||
self.expr = expr.strip()
|
||
self.asciimath = asciimath
|
||
self.the_cmathml = None
|
||
self.the_sympy = None
|
||
self.options = options
|
||
|
||
def is_presentation_mathml(self):
|
||
return '<mstyle' in self.expr
|
||
|
||
def is_mathml(self):
|
||
return '<math ' in self.expr
|
||
|
||
def fix_greek_in_mathml(self, xml):
|
||
def gettag(x):
|
||
return re.sub('{http://[^}]+}', '', x.tag)
|
||
|
||
for k in xml:
|
||
tag = gettag(k)
|
||
if tag == 'mi' or tag == 'ci':
|
||
usym = unicode(k.text)
|
||
try:
|
||
udata = unicodedata.name(usym)
|
||
except Exception, err:
|
||
udata = None
|
||
#print "usym = %s, udata=%s" % (usym,udata)
|
||
if udata: # eg "GREEK SMALL LETTER BETA"
|
||
if 'GREEK' in udata:
|
||
usym = udata.split(' ')[-1]
|
||
if 'SMALL' in udata: usym = usym.lower()
|
||
#print "greek: ",usym
|
||
k.text = usym
|
||
self.fix_greek_in_mathml(k)
|
||
return xml
|
||
|
||
def preprocess_pmathml(self, xml):
|
||
'''
|
||
Pre-process presentation MathML from ASCIIMathML to make it more
|
||
acceptable for SnuggleTeX, and also to accomodate some sympy
|
||
conventions (eg hat(i) for \hat{i}).
|
||
|
||
This method would be a good spot to look for an integral and convert
|
||
it, if possible...
|
||
'''
|
||
|
||
if type(xml) == str or type(xml) == unicode:
|
||
xml = etree.fromstring(xml) # TODO: wrap in try
|
||
|
||
xml = self.fix_greek_in_mathml(xml) # convert greek utf letters to greek spelled out in ascii
|
||
|
||
def gettag(x):
|
||
return re.sub('{http://[^}]+}', '', x.tag)
|
||
|
||
# f and g are processed as functions by asciimathml, eg "f-2" turns into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>"
|
||
# this is really terrible for turning into cmathml.
|
||
# undo this here.
|
||
def fix_pmathml(xml):
|
||
for k in xml:
|
||
tag = gettag(k)
|
||
if tag == 'mrow':
|
||
if len(k) == 2:
|
||
if gettag(k[0]) == 'mi' and k[0].text in ['f', 'g'] and gettag(k[1]) == 'mo':
|
||
idx = xml.index(k)
|
||
xml.insert(idx, deepcopy(k[0])) # drop the <mrow> container
|
||
xml.insert(idx + 1, deepcopy(k[1]))
|
||
xml.remove(k)
|
||
fix_pmathml(k)
|
||
|
||
fix_pmathml(xml)
|
||
|
||
# hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle this into <mi>hat(f)</mi>
|
||
# hat i also somtimes turned into <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>
|
||
|
||
def fix_hat(xml):
|
||
for k in xml:
|
||
tag = gettag(k)
|
||
if tag == 'mover':
|
||
if len(k) == 2:
|
||
if gettag(k[0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
|
||
newk = etree.Element('mi')
|
||
newk.text = 'hat(%s)' % k[0].text
|
||
xml.replace(k, newk)
|
||
if gettag(k[0]) == 'mrow' and gettag(k[0][0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
|
||
newk = etree.Element('mi')
|
||
newk.text = 'hat(%s)' % k[0][0].text
|
||
xml.replace(k, newk)
|
||
fix_hat(k)
|
||
fix_hat(xml)
|
||
|
||
self.xml = xml
|
||
return self.xml
|
||
|
||
def get_content_mathml(self):
|
||
if self.the_cmathml: return self.the_cmathml
|
||
|
||
# pre-process the presentation mathml before sending it to snuggletex to convert to content mathml
|
||
try:
|
||
xml = self.preprocess_pmathml(self.expr)
|
||
except Exception, err:
|
||
return "<html>Error! Cannot process pmathml</html>"
|
||
pmathml = etree.tostring(xml, pretty_print=True)
|
||
self.the_pmathml = pmathml
|
||
|
||
# convert to cmathml
|
||
self.the_cmathml = self.GetContentMathML(self.asciimath, pmathml)
|
||
return self.the_cmathml
|
||
|
||
cmathml = property(get_content_mathml, None, None, 'content MathML representation')
|
||
|
||
def make_sympy(self, xml=None):
|
||
'''
|
||
Return sympy expression for the math formula.
|
||
The math formula is converted to Content MathML then that is parsed.
|
||
|
||
This is a recursive function, called on every CMML node. Support for
|
||
more functions can be added by modifying opdict, abould halfway down
|
||
'''
|
||
|
||
if self.the_sympy: return self.the_sympy
|
||
|
||
if xml == None: # root
|
||
if not self.is_mathml():
|
||
return my_sympify(self.expr)
|
||
if self.is_presentation_mathml():
|
||
cmml = None
|
||
try:
|
||
cmml = self.cmathml
|
||
xml = etree.fromstring(str(cmml))
|
||
except Exception, err:
|
||
if 'conversion from Presentation MathML to Content MathML was not successful' in cmml:
|
||
msg = "Illegal math expression"
|
||
else:
|
||
msg = 'Err %s while converting cmathml to xml; cmml=%s' % (err, cmml)
|
||
raise Exception, msg
|
||
xml = self.fix_greek_in_mathml(xml)
|
||
self.the_sympy = self.make_sympy(xml[0])
|
||
else:
|
||
xml = etree.fromstring(self.expr)
|
||
xml = self.fix_greek_in_mathml(xml)
|
||
self.the_sympy = self.make_sympy(xml[0])
|
||
return self.the_sympy
|
||
|
||
def gettag(x):
|
||
return re.sub('{http://[^}]+}', '', x.tag)
|
||
|
||
# simple math
|
||
def op_divide(*args):
|
||
if not len(args) == 2:
|
||
raise Exception, 'divide given wrong number of arguments!'
|
||
# print "divide: arg0=%s, arg1=%s" % (args[0],args[1])
|
||
return sympy.Mul(args[0], sympy.Pow(args[1], -1))
|
||
|
||
def op_plus(*args): return args[0] if len(args) == 1 else op_plus(*args[:-1]) + args[-1]
|
||
|
||
def op_times(*args): return reduce(operator.mul, args)
|
||
|
||
def op_minus(*args):
|
||
if len(args) == 1:
|
||
return -args[0]
|
||
if not len(args) == 2:
|
||
raise Exception, 'minus given wrong number of arguments!'
|
||
#return sympy.Add(args[0],-args[1])
|
||
return args[0] - args[1]
|
||
|
||
opdict = {'plus': op_plus,
|
||
'divide': operator.div,
|
||
'times': op_times,
|
||
'minus': op_minus,
|
||
#'plus': sympy.Add,
|
||
#'divide' : op_divide,
|
||
#'times' : sympy.Mul,
|
||
'minus': op_minus,
|
||
'root': sympy.sqrt,
|
||
'power': sympy.Pow,
|
||
'sin': sympy.sin,
|
||
'cos': sympy.cos,
|
||
'tan': sympy.tan,
|
||
'cot': sympy.cot,
|
||
'sinh': sympy.sinh,
|
||
'cosh': sympy.cosh,
|
||
'coth': sympy.coth,
|
||
'tanh': sympy.tanh,
|
||
'asin': sympy.asin,
|
||
'acos': sympy.acos,
|
||
'atan': sympy.atan,
|
||
'atan2': sympy.atan2,
|
||
'acot': sympy.acot,
|
||
'asinh': sympy.asinh,
|
||
'acosh': sympy.acosh,
|
||
'atanh': sympy.atanh,
|
||
'acoth': sympy.acoth,
|
||
'exp': sympy.exp,
|
||
'log': sympy.log,
|
||
'ln': sympy.ln,
|
||
}
|
||
|
||
# simple sumbols
|
||
nums1dict = {'pi': sympy.pi,
|
||
}
|
||
|
||
def parsePresentationMathMLSymbol(xml):
|
||
'''
|
||
Parse <msub>, <msup>, <mi>, and <mn>
|
||
'''
|
||
tag = gettag(xml)
|
||
if tag == 'mn': return xml.text
|
||
elif tag == 'mi': return xml.text
|
||
elif tag == 'msub': return '_'.join([parsePresentationMathMLSymbol(y) for y in xml])
|
||
elif tag == 'msup': return '^'.join([parsePresentationMathMLSymbol(y) for y in xml])
|
||
raise Exception, '[parsePresentationMathMLSymbol] unknown tag %s' % tag
|
||
|
||
# parser tree for Content MathML
|
||
tag = gettag(xml)
|
||
# print "tag = ",tag
|
||
|
||
# first do compound objects
|
||
|
||
if tag == 'apply': # apply operator
|
||
opstr = gettag(xml[0])
|
||
if opstr in opdict:
|
||
op = opdict[opstr]
|
||
args = [self.make_sympy(x) for x in xml[1:]]
|
||
try:
|
||
res = op(*args)
|
||
except Exception, err:
|
||
self.args = args
|
||
self.op = op
|
||
raise Exception, '[formula] error=%s failed to apply %s to args=%s' % (err, opstr, args)
|
||
return res
|
||
else:
|
||
raise Exception, '[formula]: unknown operator tag %s' % (opstr)
|
||
|
||
elif tag == 'list': # square bracket list
|
||
if gettag(xml[0]) == 'matrix':
|
||
return self.make_sympy(xml[0])
|
||
else:
|
||
return [self.make_sympy(x) for x in xml]
|
||
|
||
elif tag == 'matrix':
|
||
return sympy.Matrix([self.make_sympy(x) for x in xml])
|
||
|
||
elif tag == 'vector':
|
||
return [self.make_sympy(x) for x in xml]
|
||
|
||
# atoms are below
|
||
|
||
elif tag == 'cn': # number
|
||
return sympy.sympify(xml.text)
|
||
return float(xml.text)
|
||
|
||
elif tag == 'ci': # variable (symbol)
|
||
if len(xml) > 0 and (gettag(xml[0]) == 'msub' or gettag(xml[0]) == 'msup'): # subscript or superscript
|
||
usym = parsePresentationMathMLSymbol(xml[0])
|
||
sym = sympy.Symbol(str(usym))
|
||
else:
|
||
usym = unicode(xml.text)
|
||
if 'hat' in usym:
|
||
sym = my_sympify(usym)
|
||
else:
|
||
if usym == 'i' and self.options is not None and 'imaginary' in self.options: # i = sqrt(-1)
|
||
sym = sympy.I
|
||
else:
|
||
sym = sympy.Symbol(str(usym))
|
||
return sym
|
||
|
||
else: # unknown tag
|
||
raise Exception, '[formula] unknown tag %s' % tag
|
||
|
||
sympy = property(make_sympy, None, None, 'sympy representation')
|
||
|
||
def GetContentMathML(self, asciimath, mathml):
|
||
# URL = 'http://192.168.1.2:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
|
||
# URL = 'http://127.0.0.1:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
|
||
URL = 'https://math-xserver.mitx.mit.edu/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
|
||
|
||
if 1:
|
||
payload = {'asciiMathInput': asciimath,
|
||
'asciiMathML': mathml,
|
||
#'asciiMathML':unicode(mathml).encode('utf-8'),
|
||
}
|
||
headers = {'User-Agent': "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13"}
|
||
r = requests.post(URL, data=payload, headers=headers, verify=False)
|
||
r.encoding = 'utf-8'
|
||
ret = r.text
|
||
#print "encoding: ",r.encoding
|
||
|
||
# return ret
|
||
|
||
mode = 0
|
||
cmathml = []
|
||
for k in ret.split('\n'):
|
||
if 'conversion to Content MathML' in k:
|
||
mode = 1
|
||
continue
|
||
if mode == 1:
|
||
if '<h3>Maxima Input Form</h3>' in k:
|
||
mode = 0
|
||
continue
|
||
cmathml.append(k)
|
||
# return '\n'.join(cmathml)
|
||
cmathml = '\n'.join(cmathml[2:])
|
||
cmathml = '<math xmlns="http://www.w3.org/1998/Math/MathML">\n' + unescape(cmathml) + '\n</math>'
|
||
# print cmathml
|
||
#return unicode(cmathml)
|
||
return cmathml
|
||
|
||
#-----------------------------------------------------------------------------
|
||
|
||
|
||
def test1():
|
||
xmlstr = '''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<apply>
|
||
<plus/>
|
||
<cn>1</cn>
|
||
<cn>2</cn>
|
||
</apply>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
|
||
def test2():
|
||
xmlstr = u'''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<apply>
|
||
<plus/>
|
||
<cn>1</cn>
|
||
<apply>
|
||
<times/>
|
||
<cn>2</cn>
|
||
<ci>α</ci>
|
||
</apply>
|
||
</apply>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
|
||
def test3():
|
||
xmlstr = '''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<apply>
|
||
<divide/>
|
||
<cn>1</cn>
|
||
<apply>
|
||
<plus/>
|
||
<cn>2</cn>
|
||
<ci>γ</ci>
|
||
</apply>
|
||
</apply>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
|
||
def test4():
|
||
xmlstr = u'''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<mstyle displaystyle="true">
|
||
<mn>1</mn>
|
||
<mo>+</mo>
|
||
<mfrac>
|
||
<mn>2</mn>
|
||
<mi>α</mi>
|
||
</mfrac>
|
||
</mstyle>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
|
||
def test5(): # sum of two matrices
|
||
xmlstr = u'''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<mstyle displaystyle="true">
|
||
<mrow>
|
||
<mi>cos</mi>
|
||
<mrow>
|
||
<mo>(</mo>
|
||
<mi>θ</mi>
|
||
<mo>)</mo>
|
||
</mrow>
|
||
</mrow>
|
||
<mo>⋅</mo>
|
||
<mrow>
|
||
<mo>[</mo>
|
||
<mtable>
|
||
<mtr>
|
||
<mtd>
|
||
<mn>1</mn>
|
||
</mtd>
|
||
<mtd>
|
||
<mn>0</mn>
|
||
</mtd>
|
||
</mtr>
|
||
<mtr>
|
||
<mtd>
|
||
<mn>0</mn>
|
||
</mtd>
|
||
<mtd>
|
||
<mn>1</mn>
|
||
</mtd>
|
||
</mtr>
|
||
</mtable>
|
||
<mo>]</mo>
|
||
</mrow>
|
||
<mo>+</mo>
|
||
<mrow>
|
||
<mo>[</mo>
|
||
<mtable>
|
||
<mtr>
|
||
<mtd>
|
||
<mn>0</mn>
|
||
</mtd>
|
||
<mtd>
|
||
<mn>1</mn>
|
||
</mtd>
|
||
</mtr>
|
||
<mtr>
|
||
<mtd>
|
||
<mn>1</mn>
|
||
</mtd>
|
||
<mtd>
|
||
<mn>0</mn>
|
||
</mtd>
|
||
</mtr>
|
||
</mtable>
|
||
<mo>]</mo>
|
||
</mrow>
|
||
</mstyle>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr)
|
||
|
||
|
||
def test6(): # imaginary numbers
|
||
xmlstr = u'''
|
||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||
<mstyle displaystyle="true">
|
||
<mn>1</mn>
|
||
<mo>+</mo>
|
||
<mi>i</mi>
|
||
</mstyle>
|
||
</math>
|
||
'''
|
||
return formula(xmlstr, options='imaginaryi')
|