edx-platform/lib/sympy_check/formula.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# File:   formula.py
# Date:   04-May-12
# Author: I. Chuang <ichuang@mit.edu>
#
# flexible python representation of a symbolic mathematical formula.
# Acceptes Presentation MathML, Content MathML (and could also do OpenMath)
# Provides sympy representation.

import os, sys, string, re
import operator
import sympy
from sympy.printing.latex import LatexPrinter
from sympy.printing.str import StrPrinter
from sympy import latex, sympify
from sympy.physics.quantum.qubit import *
from sympy.physics.quantum.state import *
# from sympy import exp, pi, I
# from sympy.core.operations import LatticeOp
# import sympy.physics.quantum.qubit

import urllib
from xml.sax.saxutils import escape, unescape
import sympy
import unicodedata
from lxml import etree
#import subprocess
import requests
from copy import deepcopy

print "[lib.sympy_check.formula] Warning: Dark code. Needs review before enabling in prod."

os.environ['PYTHONIOENCODING'] = 'utf-8'

#-----------------------------------------------------------------------------

class dot(sympy.operations.LatticeOp):	# my dot product
    zero = sympy.Symbol('dotzero')
    identity = sympy.Symbol('dotidentity')

#class dot(sympy.Mul):	# my dot product
#    is_Mul = False

def _print_dot(self,expr):
    return '{((%s) \cdot (%s))}' % (expr.args[0],expr.args[1])

LatexPrinter._print_dot = _print_dot

#-----------------------------------------------------------------------------
# unit vectors (for 8.02)

def _print_hat(self,expr): return '\\hat{%s}' % str(expr.args[0]).lower()

LatexPrinter._print_hat = _print_hat
StrPrinter._print_hat = _print_hat

#-----------------------------------------------------------------------------
# helper routines

def to_latex(x):
    if x==None: return ''
    # LatexPrinter._print_dot = _print_dot
    xs = latex(x)
    xs = xs.replace(r'\XI','XI')	# workaround for strange greek
    #return '<math>%s{}{}</math>' % (xs[1:-1])
    if xs[0]=='$':
        return '[mathjax]%s[/mathjax]<br>' % (xs[1:-1])	# for sympy v6
    return '[mathjax]%s[/mathjax]<br>' % (xs)		# for sympy v7

def my_evalf(expr,chop=False):
    if type(expr)==list:
        try:
            return [x.evalf(chop=chop) for x in expr]
        except:
            return expr
    try:
        return expr.evalf(chop=chop)
    except:
        return expr

#-----------------------------------------------------------------------------
# my version of sympify to import expression into sympy

def my_sympify(expr,normphase=False,matrix=False,abcsym=False,do_qubit=False,symtab=None):
    # make all lowercase real?
    if symtab:
        varset = symtab
    else:
        varset = {'p':sympy.Symbol('p'),
                  'g':sympy.Symbol('g'),
                  'e':sympy.E,			# for exp
                  'i':sympy.I,			# lowercase i is also sqrt(-1)
                  'Q':sympy.Symbol('Q'),	# otherwise it is a sympy "ask key"
                  #'X':sympy.sympify('Matrix([[0,1],[1,0]])'),
                  #'Y':sympy.sympify('Matrix([[0,-I],[I,0]])'),
                  #'Z':sympy.sympify('Matrix([[1,0],[0,-1]])'),
                  'ZZ':sympy.Symbol('ZZ'),	# otherwise it is the PythonIntegerRing
                  'XI':sympy.Symbol('XI'),	# otherwise it is the capital \XI
                  'hat':sympy.Function('hat'),	# for unit vectors (8.02)
                  }
    if do_qubit:		# turn qubit(...) into Qubit instance
        varset.update({'qubit':sympy.physics.quantum.qubit.Qubit,
                       'Ket':sympy.physics.quantum.state.Ket,
                       'dot':dot,
                       'bit':sympy.Function('bit'),
                       })
    if abcsym:			# consider all lowercase letters as real symbols, in the parsing
        for letter in string.lowercase:
            if letter in varset:	# exclude those already done
                continue
            varset.update({letter:sympy.Symbol(letter,real=True)})

    sexpr = sympify(expr,locals=varset)
    if normphase:	# remove overall phase if sexpr is a list
        if type(sexpr)==list:
            if sexpr[0].is_number:
                ophase = sympy.sympify('exp(-I*arg(%s))' % sexpr[0])
                sexpr = [ sympy.Mul(x,ophase) for x in sexpr ]

    def to_matrix(x):		# if x is a list of lists, and is rectangular, then return Matrix(x)
        if not type(x)==list:
            return x
        for row in x:
            if (not type(row)==list):
                return x
        rdim = len(x[0])
        for row in x:
            if not len(row)==rdim:
                return x
        return sympy.Matrix(x)

    if matrix:
        sexpr = to_matrix(sexpr)
    return sexpr

#-----------------------------------------------------------------------------
# class for symbolic mathematical formulas

class formula(object):
    '''
    Representation of a mathematical formula object.  Accepts mathml math expression for constructing,
    and can produce sympy translation.  The formula may or may not include an assignment (=).
    '''
    def __init__(self,expr,asciimath=''):
        self.expr = expr.strip()
        self.asciimath = asciimath
        self.the_cmathml = None
        self.the_sympy = None

    def is_presentation_mathml(self):
        return '<mstyle' in self.expr

    def is_mathml(self):
        return '<math ' in self.expr

    def fix_greek_in_mathml(self,xml):
        def gettag(x):
            return re.sub('{http://[^}]+}','',x.tag)

        for k in xml:
            tag = gettag(k)
            if tag=='mi' or tag=='ci':
                usym = unicode(k.text)
                try:
                    udata = unicodedata.name(usym)
                except Exception,err:
                    udata = None
                #print "usym = %s, udata=%s" % (usym,udata)
                if udata:			# eg "GREEK SMALL LETTER BETA"
                    if 'GREEK' in udata:
                        usym = udata.split(' ')[-1]
                        if 'SMALL' in udata: usym = usym.lower()
                        #print "greek: ",usym
                k.text = usym
            self.fix_greek_in_mathml(k)
        return xml

    def preprocess_pmathml(self,xml):
        '''
        Pre-process presentation MathML from ASCIIMathML to make it more acceptable for SnuggleTeX, and also
        to accomodate some sympy conventions (eg hat(i) for \hat{i}).
        '''

        if type(xml)==str or type(xml)==unicode:
            xml = etree.fromstring(xml)		# TODO: wrap in try

        xml = self.fix_greek_in_mathml(xml)	# convert greek utf letters to greek spelled out in ascii

        def gettag(x):
            return re.sub('{http://[^}]+}','',x.tag)

        # f and g are processed as functions by asciimathml, eg  "f-2" turns into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>"
        # this is really terrible for turning into cmathml.
        # undo this here.
        def fix_pmathml(xml):
            for k in xml:
                tag = gettag(k)
                if tag=='mrow':
                    if len(k)==2:
                        if gettag(k[0])=='mi' and k[0].text in ['f','g'] and gettag(k[1])=='mo':
                            idx = xml.index(k)
                            xml.insert(idx,deepcopy(k[0]))	# drop the <mrow> container
                            xml.insert(idx+1,deepcopy(k[1]))
                            xml.remove(k)
                fix_pmathml(k)

        fix_pmathml(xml)

        # hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle this into <mi>hat(f)</mi>
        # hat i also somtimes turned into <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>

        def fix_hat(xml):
            for k in xml:
                tag = gettag(k)
                if tag=='mover':
                    if len(k)==2:
                        if gettag(k[0])=='mi' and gettag(k[1])=='mo' and str(k[1].text)=='^':
                            newk = etree.Element('mi')
                            newk.text = 'hat(%s)' % k[0].text
                            xml.replace(k,newk)
                        if gettag(k[0])=='mrow' and gettag(k[0][0])=='mi' and gettag(k[1])=='mo' and str(k[1].text)=='^':
                            newk = etree.Element('mi')
                            newk.text = 'hat(%s)' % k[0][0].text
                            xml.replace(k,newk)
                fix_hat(k)
        fix_hat(xml)

        self.xml = xml
        return self.xml

    def get_content_mathml(self):
        if self.the_cmathml: return self.the_cmathml

        # pre-process the presentation mathml before sending it to snuggletex to convert to content mathml
        xml = self.preprocess_pmathml(self.expr)
        pmathml = etree.tostring(xml,pretty_print=True)
        self.the_pmathml = pmathml

        # convert to cmathml
        self.the_cmathml = self.GetContentMathML(self.asciimath,pmathml)
        return self.the_cmathml

    cmathml = property(get_content_mathml,None,None,'content MathML representation')

    def make_sympy(self,xml=None):
        '''
        Return sympy expression for the math formula
        '''

        if self.the_sympy: return self.the_sympy

        if xml==None:	# root
            if not self.is_mathml():
                return my_sympify(self.expr)
            if self.is_presentation_mathml():
                xml = etree.fromstring(str(self.cmathml))
                xml = self.fix_greek_in_mathml(xml)
                self.the_sympy = self.make_sympy(xml[0])
            else:
                xml = etree.fromstring(self.expr)
                xml = self.fix_greek_in_mathml(xml)
                self.the_sympy = self.make_sympy(xml[0])
            return self.the_sympy

        def gettag(x):
            return re.sub('{http://[^}]+}','',x.tag)

        # simple math
        def op_divide(*args):
            if not len(args)==2:
                raise Exception,'divide given wrong number of arguments!'
            # print "divide: arg0=%s, arg1=%s" % (args[0],args[1])
            return sympy.Mul(args[0],sympy.Pow(args[1],-1))

        def op_plus(*args): return sum(args)
        def op_times(*args): return reduce(operator.mul,args)

        def op_minus(*args):
            if len(args)==1:
                return -args[0]
            if not len(args)==2:
                raise Exception,'minus given wrong number of arguments!'
            #return sympy.Add(args[0],-args[1])
            return args[0]-args[1]

        opdict = {'plus': op_plus,
                  'divide' : operator.div,
                  'times' : op_times,
                  'minus' : op_minus,
                  #'plus': sympy.Add,
                  #'divide' : op_divide,
                  #'times' : sympy.Mul,
                  'minus' : op_minus,
                  'root' : sympy.sqrt,
                  'power' : sympy.Pow,
                  'sin': sympy.sin,
                  'cos': sympy.cos,
                   }

        # simple sumbols
        nums1dict = {'pi': sympy.pi,
                     }

        def parsePresentationMathMLSymbol(xml):
            '''
            Parse <msub>, <msup>, <mi>, and <mn>
            '''
            tag = gettag(xml)
            if tag=='mn': return xml.text
            elif tag=='mi': return xml.text
            elif tag=='msub': return '_'.join([parsePresentationMathMLSymbol(y) for y in xml])
            elif tag=='msup': return '^'.join([parsePresentationMathMLSymbol(y) for y in xml])
            raise Exception,'[parsePresentationMathMLSymbol] unknown tag %s' % tag

        # parser tree for content MathML
        tag = gettag(xml)
        print "tag = ",tag

        # first do compound objects

        if tag=='apply':		# apply operator
            opstr = gettag(xml[0])
            if opstr in opdict:
                op = opdict[opstr]
                args = [ self.make_sympy(x) for x in xml[1:]]
                return op(*args)
            else:
                raise Exception,'[formula]: unknown operator tag %s' % (opstr)

        elif tag=='list':		# square bracket list
            if gettag(xml[0])=='matrix':
                return self.make_sympy(xml[0])
            else:
                return [ self.make_sympy(x) for x in xml ]

        elif tag=='matrix':
            return sympy.Matrix([ self.make_sympy(x) for x in xml ])

        elif tag=='vector':
            return [ self.make_sympy(x) for x in xml ]

        # atoms are below

        elif tag=='cn':			# number
            return sympy.sympify(xml.text)
            return float(xml.text)

        elif tag=='ci':			# variable (symbol)
            if len(xml)>0 and (gettag(xml[0])=='msub' or gettag(xml[0])=='msup'):
                usym = parsePresentationMathMLSymbol(xml[0])
                sym = sympy.Symbol(str(usym))
            else:
                usym = unicode(xml.text)
                if 'hat' in usym:
                    sym = my_sympify(usym)
                else:
                    sym = sympy.Symbol(str(usym))
            return sym

        else:				# unknown tag
            raise Exception,'[formula] unknown tag %s' % tag

    sympy = property(make_sympy,None,None,'sympy representation')

    def GetContentMathML(self,asciimath,mathml):
        # URL = 'http://192.168.1.2:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
        URL = 'http://127.0.0.1:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'

        if 1:
            payload = {'asciiMathInput':asciimath,
                       'asciiMathML':mathml,
                       #'asciiMathML':unicode(mathml).encode('utf-8'),
                       }
            headers = {'User-Agent':"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13"}
            r = requests.post(URL,data=payload,headers=headers)
            r.encoding = 'utf-8'
            ret = r.text
            #print "encoding: ",r.encoding

        # return ret

        mode = 0
        cmathml = []
        for k in ret.split('\n'):
            if 'conversion to Content MathML' in k:
                mode = 1
                continue
            if mode==1:
                if '<h3>Maxima Input Form</h3>' in k:
                    mode = 0
                    continue
                cmathml.append(k)
        # return '\n'.join(cmathml)
        cmathml = '\n'.join(cmathml[2:])
        cmathml = '<math xmlns="http://www.w3.org/1998/Math/MathML">\n' + unescape(cmathml) + '\n</math>'
        # print cmathml
        #return unicode(cmathml)
        return cmathml

#-----------------------------------------------------------------------------

def test1():
    xmlstr = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
   <apply>
      <plus/>
      <cn>1</cn>
      <cn>2</cn>
   </apply>
</math>
    '''
    return formula(xmlstr)

def test2():
    xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
   <apply>
      <plus/>
      <cn>1</cn>
      <apply>
         <times/>
         <cn>2</cn>
	 <ci>α</ci>
      </apply>
   </apply>
</math>
    '''
    return formula(xmlstr)

def test3():
    xmlstr = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
   <apply>
      <divide/>
      <cn>1</cn>
      <apply>
         <plus/>
         <cn>2</cn>
         <ci>γ</ci>
      </apply>
   </apply>
</math>
    '''
    return formula(xmlstr)

def test4():
    xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
  <mstyle displaystyle="true">
    <mn>1</mn>
    <mo>+</mo>
    <mfrac>
      <mn>2</mn>
      <mi>α</mi>
    </mfrac>
  </mstyle>
</math>
'''
    return formula(xmlstr)