edx-platform/lms/lib/symmath/formula.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# File:   formula.py
# Date:   04-May-12 (creation)
# Author: I. Chuang <ichuang@mit.edu>
#
# flexible python representation of a symbolic mathematical formula.
# Acceptes Presentation MathML, Content MathML (and could also do OpenMath)
# Provides sympy representation.

import os
import sys
import string
import re
import logging
import operator
import sympy
from sympy.printing.latex import LatexPrinter
from sympy.printing.str import StrPrinter
from sympy import latex, sympify
from sympy.physics.quantum.qubit import *
from sympy.physics.quantum.state import *
# from sympy import exp, pi, I
# from sympy.core.operations import LatticeOp
# import sympy.physics.quantum.qubit

import urllib
from xml.sax.saxutils import escape, unescape
import sympy
import unicodedata
from lxml import etree
#import subprocess
import requests
from copy import deepcopy

log = logging.getLogger(__name__)

log.warning("Dark code. Needs review before enabling in prod.")

os.environ['PYTHONIOENCODING'] = 'utf-8'

#-----------------------------------------------------------------------------


class dot(sympy.operations.LatticeOp):	 # my dot product
    zero = sympy.Symbol('dotzero')
    identity = sympy.Symbol('dotidentity')

#class dot(sympy.Mul):	# my dot product
#    is_Mul = False


def _print_dot(self, expr):
    return '{((%s) \cdot (%s))}' % (expr.args[0], expr.args[1])

LatexPrinter._print_dot = _print_dot

#-----------------------------------------------------------------------------
# unit vectors (for 8.02)


def _print_hat(self, expr): return '\\hat{%s}' % str(expr.args[0]).lower()

LatexPrinter._print_hat = _print_hat
StrPrinter._print_hat = _print_hat

#-----------------------------------------------------------------------------
# helper routines


def to_latex(x):
    if x == None: return ''
    # LatexPrinter._print_dot = _print_dot
    xs = latex(x)
    xs = xs.replace(r'\XI', 'XI')	 # workaround for strange greek
    #return '<math>%s{}{}</math>' % (xs[1:-1])
    if xs[0] == '$':
        return '[mathjax]%s[/mathjax]<br>' % (xs[1:-1])	 # for sympy v6
    return '[mathjax]%s[/mathjax]<br>' % (xs)		# for sympy v7


def my_evalf(expr, chop=False):
    if type(expr) == list:
        try:
            return [x.evalf(chop=chop) for x in expr]
        except:
            return expr
    try:
        return expr.evalf(chop=chop)
    except:
        return expr

#-----------------------------------------------------------------------------
# my version of sympify to import expression into sympy


def my_sympify(expr, normphase=False, matrix=False, abcsym=False, do_qubit=False, symtab=None):
    # make all lowercase real?
    if symtab:
        varset = symtab
    else:
        varset = {'p': sympy.Symbol('p'),
                  'g': sympy.Symbol('g'),
                  'e': sympy.E,			# for exp
                  'i': sympy.I,			# lowercase i is also sqrt(-1)
                  'Q': sympy.Symbol('Q'),	 # otherwise it is a sympy "ask key"
                  'I': sympy.Symbol('I'),	 # otherwise it is sqrt(-1)
                  #'X':sympy.sympify('Matrix([[0,1],[1,0]])'),
                  #'Y':sympy.sympify('Matrix([[0,-I],[I,0]])'),
                  #'Z':sympy.sympify('Matrix([[1,0],[0,-1]])'),
                  'ZZ': sympy.Symbol('ZZ'),	 # otherwise it is the PythonIntegerRing
                  'XI': sympy.Symbol('XI'),	 # otherwise it is the capital \XI
                  'hat': sympy.Function('hat'),	 # for unit vectors (8.02)
                  }
    if do_qubit:		# turn qubit(...) into Qubit instance
        varset.update({'qubit': sympy.physics.quantum.qubit.Qubit,
                       'Ket': sympy.physics.quantum.state.Ket,
                       'dot': dot,
                       'bit': sympy.Function('bit'),
                       })
    if abcsym:			# consider all lowercase letters as real symbols, in the parsing
        for letter in string.lowercase:
            if letter in varset:	 # exclude those already done
                continue
            varset.update({letter: sympy.Symbol(letter, real=True)})

    sexpr = sympify(expr, locals=varset)
    if normphase:	 # remove overall phase if sexpr is a list
        if type(sexpr) == list:
            if sexpr[0].is_number:
                ophase = sympy.sympify('exp(-I*arg(%s))' % sexpr[0])
                sexpr = [sympy.Mul(x, ophase) for x in sexpr]

    def to_matrix(x):		# if x is a list of lists, and is rectangular, then return Matrix(x)
        if not type(x) == list:
            return x
        for row in x:
            if (not type(row) == list):
                return x
        rdim = len(x[0])
        for row in x:
            if not len(row) == rdim:
                return x
        return sympy.Matrix(x)

    if matrix:
        sexpr = to_matrix(sexpr)
    return sexpr

#-----------------------------------------------------------------------------
# class for symbolic mathematical formulas


class formula(object):
    '''
    Representation of a mathematical formula object.  Accepts mathml math expression
    for constructing, and can produce sympy translation.  The formula may or may not
    include an assignment (=).
    '''
    def __init__(self, expr, asciimath='', options=None):
        self.expr = expr.strip()
        self.asciimath = asciimath
        self.the_cmathml = None
        self.the_sympy = None
        self.options = options

    def is_presentation_mathml(self):
        return '<mstyle' in self.expr

    def is_mathml(self):
        return '<math ' in self.expr

    def fix_greek_in_mathml(self, xml):
        def gettag(x):
            return re.sub('{http://[^}]+}', '', x.tag)

        for k in xml:
            tag = gettag(k)
            if tag == 'mi' or tag == 'ci':
                usym = unicode(k.text)
                try:
                    udata = unicodedata.name(usym)
                except Exception, err:
                    udata = None
                #print "usym = %s, udata=%s" % (usym,udata)
                if udata:			# eg "GREEK SMALL LETTER BETA"
                    if 'GREEK' in udata:
                        usym = udata.split(' ')[-1]
                        if 'SMALL' in udata: usym = usym.lower()
                        #print "greek: ",usym
                k.text = usym
            self.fix_greek_in_mathml(k)
        return xml

    def preprocess_pmathml(self, xml):
        '''
        Pre-process presentation MathML from ASCIIMathML to make it more
        acceptable for SnuggleTeX, and also to accomodate some sympy
        conventions (eg hat(i) for \hat{i}).

        This method would be a good spot to look for an integral and convert
        it, if possible...
        '''

        if type(xml) == str or type(xml) == unicode:
            xml = etree.fromstring(xml)		# TODO: wrap in try

        xml = self.fix_greek_in_mathml(xml)	 # convert greek utf letters to greek spelled out in ascii

        def gettag(x):
            return re.sub('{http://[^}]+}', '', x.tag)

        # f and g are processed as functions by asciimathml, eg  "f-2" turns into "<mrow><mi>f</mi><mo>-</mo></mrow><mn>2</mn>"
        # this is really terrible for turning into cmathml.
        # undo this here.
        def fix_pmathml(xml):
            for k in xml:
                tag = gettag(k)
                if tag == 'mrow':
                    if len(k) == 2:
                        if gettag(k[0]) == 'mi' and k[0].text in ['f', 'g'] and gettag(k[1]) == 'mo':
                            idx = xml.index(k)
                            xml.insert(idx, deepcopy(k[0]))	 # drop the <mrow> container
                            xml.insert(idx + 1, deepcopy(k[1]))
                            xml.remove(k)
                fix_pmathml(k)

        fix_pmathml(xml)

        # hat i is turned into <mover><mi>i</mi><mo>^</mo></mover> ; mangle this into <mi>hat(f)</mi>
        # hat i also somtimes turned into <mover><mrow> <mi>j</mi> </mrow><mo>^</mo></mover>

        def fix_hat(xml):
            for k in xml:
                tag = gettag(k)
                if tag == 'mover':
                    if len(k) == 2:
                        if gettag(k[0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
                            newk = etree.Element('mi')
                            newk.text = 'hat(%s)' % k[0].text
                            xml.replace(k, newk)
                        if gettag(k[0]) == 'mrow' and gettag(k[0][0]) == 'mi' and gettag(k[1]) == 'mo' and str(k[1].text) == '^':
                            newk = etree.Element('mi')
                            newk.text = 'hat(%s)' % k[0][0].text
                            xml.replace(k, newk)
                fix_hat(k)
        fix_hat(xml)

        self.xml = xml
        return self.xml

    def get_content_mathml(self):
        if self.the_cmathml: return self.the_cmathml

        # pre-process the presentation mathml before sending it to snuggletex to convert to content mathml
        try:
            xml = self.preprocess_pmathml(self.expr)
        except Exception, err:
            return "<html>Error! Cannot process pmathml</html>"
        pmathml = etree.tostring(xml, pretty_print=True)
        self.the_pmathml = pmathml

        # convert to cmathml
        self.the_cmathml = self.GetContentMathML(self.asciimath, pmathml)
        return self.the_cmathml

    cmathml = property(get_content_mathml, None, None, 'content MathML representation')

    def make_sympy(self, xml=None):
        '''
        Return sympy expression for the math formula.
        The math formula is converted to Content MathML then that is parsed.

        This is a recursive function, called on every CMML node. Support for
        more functions can be added by modifying opdict, abould halfway down
        '''

        if self.the_sympy: return self.the_sympy

        if xml == None:	 # root
            if not self.is_mathml():
                return my_sympify(self.expr)
            if self.is_presentation_mathml():
                cmml = None
                try:
                    cmml = self.cmathml
                    xml = etree.fromstring(str(cmml))
                except Exception, err:
                    if 'conversion from Presentation MathML to Content MathML was not successful' in cmml:
                        msg = "Illegal math expression"
                    else:
                        msg = 'Err %s while converting cmathml to xml; cmml=%s' % (err, cmml)
                    raise Exception, msg
                xml = self.fix_greek_in_mathml(xml)
                self.the_sympy = self.make_sympy(xml[0])
            else:
                xml = etree.fromstring(self.expr)
                xml = self.fix_greek_in_mathml(xml)
                self.the_sympy = self.make_sympy(xml[0])
            return self.the_sympy

        def gettag(x):
            return re.sub('{http://[^}]+}', '', x.tag)

        # simple math
        def op_divide(*args):
            if not len(args) == 2:
                raise Exception, 'divide given wrong number of arguments!'
            # print "divide: arg0=%s, arg1=%s" % (args[0],args[1])
            return sympy.Mul(args[0], sympy.Pow(args[1], -1))

        def op_plus(*args): return args[0] if len(args) == 1 else op_plus(*args[:-1]) + args[-1]

        def op_times(*args): return reduce(operator.mul, args)

        def op_minus(*args):
            if len(args) == 1:
                return -args[0]
            if not len(args) == 2:
                raise Exception, 'minus given wrong number of arguments!'
            #return sympy.Add(args[0],-args[1])
            return args[0] - args[1]

        opdict = {'plus': op_plus,
                  'divide': operator.div,
                  'times': op_times,
                  'minus': op_minus,
                  #'plus': sympy.Add,
                  #'divide' : op_divide,
                  #'times' : sympy.Mul,
                  'minus': op_minus,
                  'root': sympy.sqrt,
                  'power': sympy.Pow,
                  'sin': sympy.sin,
                  'cos': sympy.cos,
                  'tan': sympy.tan,
                  'cot': sympy.cot,
                  'sinh': sympy.sinh,
                  'cosh': sympy.cosh,
                  'coth': sympy.coth,
                  'tanh': sympy.tanh,
                  'asin': sympy.asin,
                  'acos': sympy.acos,
                  'atan': sympy.atan,
                  'atan2': sympy.atan2,
                  'acot': sympy.acot,
                  'asinh': sympy.asinh,
                  'acosh': sympy.acosh,
                  'atanh': sympy.atanh,
                  'acoth': sympy.acoth,
                  'exp': sympy.exp,
                  'log': sympy.log,
                  'ln': sympy.ln,
                   }

        # simple sumbols
        nums1dict = {'pi': sympy.pi,
                     }

        def parsePresentationMathMLSymbol(xml):
            '''
            Parse <msub>, <msup>, <mi>, and <mn>
            '''
            tag = gettag(xml)
            if tag == 'mn': return xml.text
            elif tag == 'mi': return xml.text
            elif tag == 'msub': return '_'.join([parsePresentationMathMLSymbol(y) for y in xml])
            elif tag == 'msup': return '^'.join([parsePresentationMathMLSymbol(y) for y in xml])
            raise Exception, '[parsePresentationMathMLSymbol] unknown tag %s' % tag

        # parser tree for Content MathML
        tag = gettag(xml)
        # print "tag = ",tag

        # first do compound objects

        if tag == 'apply':		# apply operator
            opstr = gettag(xml[0])
            if opstr in opdict:
                op = opdict[opstr]
                args = [self.make_sympy(x) for x in xml[1:]]
                try:
                    res = op(*args)
                except Exception, err:
                    self.args = args
                    self.op = op
                    raise Exception, '[formula] error=%s failed to apply %s to args=%s' % (err, opstr, args)
                return res
            else:
                raise Exception, '[formula]: unknown operator tag %s' % (opstr)

        elif tag == 'list':		# square bracket list
            if gettag(xml[0]) == 'matrix':
                return self.make_sympy(xml[0])
            else:
                return [self.make_sympy(x) for x in xml]

        elif tag == 'matrix':
            return sympy.Matrix([self.make_sympy(x) for x in xml])

        elif tag == 'vector':
            return [self.make_sympy(x) for x in xml]

        # atoms are below

        elif tag == 'cn':			# number
            return sympy.sympify(xml.text)
            return float(xml.text)

        elif tag == 'ci':			# variable (symbol)
            if len(xml) > 0 and (gettag(xml[0]) == 'msub' or gettag(xml[0]) == 'msup'):	 # subscript or superscript
                usym = parsePresentationMathMLSymbol(xml[0])
                sym = sympy.Symbol(str(usym))
            else:
                usym = unicode(xml.text)
                if 'hat' in usym:
                    sym = my_sympify(usym)
                else:
                    if usym == 'i' and self.options is not None and 'imaginary' in self.options:	 # i = sqrt(-1)
                        sym = sympy.I
                    else:
                        sym = sympy.Symbol(str(usym))
            return sym

        else:				# unknown tag
            raise Exception, '[formula] unknown tag %s' % tag

    sympy = property(make_sympy, None, None, 'sympy representation')

    def GetContentMathML(self, asciimath, mathml):
        # URL = 'http://192.168.1.2:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
        # URL = 'http://127.0.0.1:8080/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'
        URL = 'https://math-xserver.mitx.mit.edu/snuggletex-webapp-1.2.2/ASCIIMathMLUpConversionDemo'

        if 1:
            payload = {'asciiMathInput': asciimath,
                       'asciiMathML': mathml,
                       #'asciiMathML':unicode(mathml).encode('utf-8'),
                       }
            headers = {'User-Agent': "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13"}
            r = requests.post(URL, data=payload, headers=headers, verify=False)
            r.encoding = 'utf-8'
            ret = r.text
            #print "encoding: ",r.encoding

        # return ret

        mode = 0
        cmathml = []
        for k in ret.split('\n'):
            if 'conversion to Content MathML' in k:
                mode = 1
                continue
            if mode == 1:
                if '<h3>Maxima Input Form</h3>' in k:
                    mode = 0
                    continue
                cmathml.append(k)
        # return '\n'.join(cmathml)
        cmathml = '\n'.join(cmathml[2:])
        cmathml = '<math xmlns="http://www.w3.org/1998/Math/MathML">\n' + unescape(cmathml) + '\n</math>'
        # print cmathml
        #return unicode(cmathml)
        return cmathml

#-----------------------------------------------------------------------------


def test1():
    xmlstr = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
   <apply>
      <plus/>
      <cn>1</cn>
      <cn>2</cn>
   </apply>
</math>
    '''
    return formula(xmlstr)


def test2():
    xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
   <apply>
      <plus/>
      <cn>1</cn>
      <apply>
         <times/>
         <cn>2</cn>
     <ci>α</ci>
      </apply>
   </apply>
</math>
    '''
    return formula(xmlstr)


def test3():
    xmlstr = '''
<math xmlns="http://www.w3.org/1998/Math/MathML">
   <apply>
      <divide/>
      <cn>1</cn>
      <apply>
         <plus/>
         <cn>2</cn>
         <ci>γ</ci>
      </apply>
   </apply>
</math>
    '''
    return formula(xmlstr)


def test4():
    xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
  <mstyle displaystyle="true">
    <mn>1</mn>
    <mo>+</mo>
    <mfrac>
      <mn>2</mn>
      <mi>α</mi>
    </mfrac>
  </mstyle>
</math>
'''
    return formula(xmlstr)


def test5():		# sum of two matrices
    xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
  <mstyle displaystyle="true">
    <mrow>
      <mi>cos</mi>
      <mrow>
        <mo>(</mo>
        <mi>&#x3B8;</mi>
        <mo>)</mo>
      </mrow>
    </mrow>
    <mo>&#x22C5;</mo>
    <mrow>
      <mo>[</mo>
      <mtable>
        <mtr>
          <mtd>
            <mn>1</mn>
          </mtd>
          <mtd>
            <mn>0</mn>
          </mtd>
        </mtr>
        <mtr>
          <mtd>
            <mn>0</mn>
          </mtd>
          <mtd>
            <mn>1</mn>
          </mtd>
        </mtr>
      </mtable>
      <mo>]</mo>
    </mrow>
    <mo>+</mo>
    <mrow>
      <mo>[</mo>
      <mtable>
        <mtr>
          <mtd>
            <mn>0</mn>
          </mtd>
          <mtd>
            <mn>1</mn>
          </mtd>
        </mtr>
        <mtr>
          <mtd>
            <mn>1</mn>
          </mtd>
          <mtd>
            <mn>0</mn>
          </mtd>
        </mtr>
      </mtable>
      <mo>]</mo>
    </mrow>
  </mstyle>
</math>
'''
    return formula(xmlstr)


def test6():		# imaginary numbers
    xmlstr = u'''
<math xmlns="http://www.w3.org/1998/Math/MathML">
  <mstyle displaystyle="true">
    <mn>1</mn>
    <mo>+</mo>
    <mi>i</mi>
  </mstyle>
</math>
'''
    return formula(xmlstr, options='imaginaryi')