diff --git a/common/lib/capa/capa/capa_problem.py b/common/lib/capa/capa/capa_problem.py
index 9a5a15a696..29e9b7eb97 100644
--- a/common/lib/capa/capa/capa_problem.py
+++ b/common/lib/capa/capa/capa_problem.py
@@ -30,6 +30,8 @@ import sys
from lxml import etree
from xml.sax.saxutils import unescape
+import chem
+import chem.chemcalc
import calc
from correctmap import CorrectMap
import eia
@@ -72,7 +74,8 @@ global_context = {'random': random,
'math': math,
'scipy': scipy,
'calc': calc,
- 'eia': eia}
+ 'eia': eia,
+ 'chemcalc': chem.chemcalc}
# These should be removed from HTML output, including all subelements
html_problem_semantics = ["codeparam", "responseparam", "answer", "script", "hintgroup"]
@@ -436,7 +439,7 @@ class LoncapaProblem(object):
sys.path = original_path + self._extract_system_path(script)
stype = script.get('type')
-
+
if stype:
if 'javascript' in stype:
continue # skip javascript
diff --git a/common/lib/capa/capa/chem/__init__.py b/common/lib/capa/capa/chem/__init__.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/common/lib/capa/capa/chem/__init__.py
@@ -0,0 +1 @@
+
diff --git a/common/lib/capa/capa/chem/chemcalc.py b/common/lib/capa/capa/chem/chemcalc.py
new file mode 100644
index 0000000000..65d1887d2e
--- /dev/null
+++ b/common/lib/capa/capa/chem/chemcalc.py
@@ -0,0 +1,540 @@
+from __future__ import division
+import copy
+import logging
+import math
+import operator
+import re
+import unittest
+import numpy
+import numbers
+import scipy.constants
+
+from pyparsing import Literal, Keyword, Word, nums, StringEnd, Optional, Forward, OneOrMore
+from pyparsing import ParseException
+import nltk
+from nltk.tree import Tree
+
+local_debug = None
+
+
+def log(s, output_type=None):
+ if local_debug:
+ print s
+ if output_type == 'html':
+ f.write(s + '\n
\n')
+
+## Defines a simple pyparsing tokenizer for chemical equations
+elements = ['Ac','Ag','Al','Am','Ar','As','At','Au','B','Ba','Be',
+ 'Bh','Bi','Bk','Br','C','Ca','Cd','Ce','Cf','Cl','Cm',
+ 'Cn','Co','Cr','Cs','Cu','Db','Ds','Dy','Er','Es','Eu',
+ 'F','Fe','Fl','Fm','Fr','Ga','Gd','Ge','H','He','Hf',
+ 'Hg','Ho','Hs','I','In','Ir','K','Kr','La','Li','Lr',
+ 'Lu','Lv','Md','Mg','Mn','Mo','Mt','N','Na','Nb','Nd',
+ 'Ne','Ni','No','Np','O','Os','P','Pa','Pb','Pd','Pm',
+ 'Po','Pr','Pt','Pu','Ra','Rb','Re','Rf','Rg','Rh','Rn',
+ 'Ru','S','Sb','Sc','Se','Sg','Si','Sm','Sn','Sr','Ta',
+ 'Tb','Tc','Te','Th','Ti','Tl','Tm','U','Uuo','Uup',
+ 'Uus','Uut','V','W','Xe','Y','Yb','Zn','Zr']
+digits = map(str, range(10))
+symbols = list("[](){}^+-/")
+phases = ["(s)", "(l)", "(g)", "(aq)"]
+tokens = reduce(lambda a, b: a ^ b, map(Literal, elements + digits + symbols + phases))
+tokenizer = OneOrMore(tokens) + StringEnd()
+
+
+def orjoin(l):
+ return "'" + "' | '".join(l) + "'"
+
+## Defines an NLTK parser for tokenized equations
+grammar = """
+ S -> multimolecule | multimolecule '+' S
+ multimolecule -> count molecule | molecule
+ count -> number | number '/' number
+ molecule -> unphased | unphased phase
+ unphased -> group | paren_group_round | paren_group_square
+ element -> """ + orjoin(elements) + """
+ digit -> """ + orjoin(digits) + """
+ phase -> """ + orjoin(phases) + """
+ number -> digit | digit number
+ group -> suffixed | suffixed group
+ paren_group_round -> '(' group ')'
+ paren_group_square -> '[' group ']'
+ plus_minus -> '+' | '-'
+ number_suffix -> number
+ ion_suffix -> '^' number plus_minus | '^' plus_minus
+ suffix -> number_suffix | number_suffix ion_suffix | ion_suffix
+ unsuffixed -> element | paren_group_round | paren_group_square
+
+ suffixed -> unsuffixed | unsuffixed suffix
+"""
+parser = nltk.ChartParser(nltk.parse_cfg(grammar))
+
+
+def clean_parse_tree(tree):
+ ''' The parse tree contains a lot of redundant
+ nodes. E.g. paren_groups have groups as children, etc. This will
+ clean up the tree.
+ '''
+ def unparse_number(n):
+ ''' Go from a number parse tree to a number '''
+ if len(n) == 1:
+ rv = n[0][0]
+ else:
+ rv = n[0][0] + unparse_number(n[1])
+ return rv
+
+ def null_tag(n):
+ ''' Remove a tag '''
+ return n[0]
+
+ def ion_suffix(n):
+ '''1. "if" part handles special case
+ 2. "else" part is general behaviour '''
+
+ if n[1:][0].node == 'number' and n[1:][0][0][0] == '1':
+ # if suffix is explicitly 1, like ^1-
+ # strip 1, leave only sign: ^-
+ return nltk.tree.Tree(n.node, n[2:])
+ else:
+ return nltk.tree.Tree(n.node, n[1:])
+
+ dispatch = {'number': lambda x: nltk.tree.Tree("number", [unparse_number(x)]),
+ 'unphased': null_tag,
+ 'unsuffixed': null_tag,
+ 'number_suffix': lambda x: nltk.tree.Tree('number_suffix', [unparse_number(x[0])]),
+ 'suffixed': lambda x: len(x) > 1 and x or x[0],
+ 'ion_suffix': ion_suffix,
+ 'paren_group_square': lambda x: nltk.tree.Tree(x.node, x[1]),
+ 'paren_group_round': lambda x: nltk.tree.Tree(x.node, x[1])}
+
+ if type(tree) == str:
+ return tree
+
+ old_node = None
+ ## This loop means that if a node is processed, and returns a child,
+ ## the child will be processed.
+ while tree.node in dispatch and tree.node != old_node:
+ old_node = tree.node
+ tree = dispatch[tree.node](tree)
+
+ children = []
+ for child in tree:
+ child = clean_parse_tree(child)
+ children.append(child)
+
+ tree = nltk.tree.Tree(tree.node, children)
+
+ return tree
+
+
+def merge_children(tree, tags):
+ ''' nltk, by documentation, cannot do arbitrary length
+ groups. Instead of:
+ (group 1 2 3 4)
+ It has to handle this recursively:
+ (group 1 (group 2 (group 3 (group 4))))
+ We do the cleanup of converting from the latter to the former (as a
+ '''
+ if type(tree) == str:
+ return tree
+
+ merged_children = []
+ done = False
+ #print '00000', tree
+ ## Merge current tag
+ while not done:
+ done = True
+ for child in tree:
+ if type(child) == nltk.tree.Tree and child.node == tree.node and tree.node in tags:
+ merged_children = merged_children + list(child)
+ done = False
+ else:
+ merged_children = merged_children + [child]
+ tree = nltk.tree.Tree(tree.node, merged_children)
+ merged_children = []
+ #print '======',tree
+
+ # And recurse
+ children = []
+ for child in tree:
+ children.append(merge_children(child, tags))
+
+ #return tree
+ return nltk.tree.Tree(tree.node, children)
+
+
+def render_to_html(tree):
+ ''' Renders a cleaned tree to HTML '''
+
+ def molecule_count(tree, children):
+ # If an integer, return that integer
+ if len(tree) == 1:
+ return tree[0][0]
+ # If a fraction, return the fraction
+ if len(tree) == 3:
+ return " {num}⁄{den} ".format(num=tree[0][0], den=tree[2][0])
+ return "Error"
+
+ def subscript(tree, children):
+ return "{sub}".format(sub=children)
+
+ def superscript(tree, children):
+ return "{sup}".format(sup=children)
+
+ def round_brackets(tree, children):
+ return "({insider})".format(insider=children)
+
+ def square_brackets(tree, children):
+ return "[{insider}]".format(insider=children)
+
+ dispatch = {'count': molecule_count,
+ 'number_suffix': subscript,
+ 'ion_suffix': superscript,
+ 'paren_group_round': round_brackets,
+ 'paren_group_square': square_brackets}
+
+ if type(tree) == str:
+ return tree
+ else:
+ children = "".join(map(render_to_html, tree))
+ if tree.node in dispatch:
+ return dispatch[tree.node](tree, children)
+ else:
+ return children.replace(' ', '')
+
+
+def clean_and_render_to_html(s):
+ ''' render a string to html '''
+ status = render_to_html(get_finale_tree(s))
+ return status
+
+
+def get_finale_tree(s):
+ ''' return final tree after merge and clean '''
+ tokenized = tokenizer.parseString(s)
+ parsed = parser.parse(tokenized)
+ merged = merge_children(parsed, {'S','group'})
+ final = clean_parse_tree(merged)
+ return final
+
+
+def check_equality(tuple1, tuple2):
+ ''' return True if tuples of multimolecules are equal '''
+ list1 = list(tuple1)
+ list2 = list(tuple2)
+
+ # Hypo: trees where are levels count+molecule vs just molecule
+ # cannot be sorted properly (tested on test_complex_additivity)
+ # But without factors and phases sorting seems to work.
+
+ # Also for lists of multimolecules without factors and phases
+ # sorting seems to work fine.
+ list1.sort()
+ list2.sort()
+ return list1 == list2
+
+
+def compare_chemical_expression(s1, s2, ignore_state=False):
+ ''' It does comparison between two equations.
+ It uses divide_chemical_expression and check if division is 1
+ '''
+ return divide_chemical_expression(s1, s2, ignore_state) == 1
+
+
+def divide_chemical_expression(s1, s2, ignore_state=False):
+ ''' Compare chemical equations for difference
+ in factors. Ideas:
+ - extract factors and phases to standalone lists,
+ - compare equations without factors and phases,
+ - divide lists of factors for each other and check
+ for equality of every element in list,
+ - return result of factor division '''
+
+ # parsed final trees
+ treedic = {}
+ treedic['1'] = get_finale_tree(s1)
+ treedic['2'] = get_finale_tree(s2)
+
+ # strip phases and factors
+ # collect factors in list
+ for i in ('1', '2'):
+ treedic[i + ' cleaned_mm_list'] = []
+ treedic[i + ' factors'] = []
+ treedic[i + ' phases'] = []
+ for el in treedic[i].subtrees(filter=lambda t: t.node == 'multimolecule'):
+ count_subtree = [t for t in el.subtrees() if t.node == 'count']
+ group_subtree = [t for t in el.subtrees() if t.node == 'group']
+ phase_subtree = [t for t in el.subtrees() if t.node == 'phase']
+ if count_subtree:
+ if len(count_subtree[0]) > 1:
+ treedic[i + ' factors'].append(
+ int(count_subtree[0][0][0]) /
+ int(count_subtree[0][2][0]))
+ else:
+ treedic[i + ' factors'].append(int(count_subtree[0][0][0]))
+ else:
+ treedic[i + ' factors'].append(1.0)
+ if phase_subtree:
+ treedic[i + ' phases'].append(phase_subtree[0][0])
+ else:
+ treedic[i + ' phases'].append(' ')
+ treedic[i + ' cleaned_mm_list'].append(
+ Tree('multimolecule', [Tree('molecule', group_subtree)]))
+
+ # order of factors and phases must mirror the order of multimolecules,
+ # use 'decorate, sort, undecorate' pattern
+ treedic['1 cleaned_mm_list'], treedic['1 factors'], treedic['1 phases'] = zip(
+ *sorted(zip(treedic['1 cleaned_mm_list'], treedic['1 factors'], treedic['1 phases'])))
+
+ treedic['2 cleaned_mm_list'], treedic['2 factors'], treedic['2 phases'] = zip(
+ *sorted(zip(treedic['2 cleaned_mm_list'], treedic['2 factors'], treedic['2 phases'])))
+
+ # check if equations are correct without factors
+ if not check_equality(treedic['1 cleaned_mm_list'], treedic['2 cleaned_mm_list']):
+ return False
+
+ # phases are ruled by ingore_state flag
+ if not ignore_state: # phases matters
+ if treedic['1 phases'] != treedic['2 phases']:
+ return False
+
+ if any(map(lambda x, y: x / y - treedic['1 factors'][0] / treedic['2 factors'][0],
+ treedic['1 factors'], treedic['2 factors'])):
+ log('factors are not proportional')
+ return False
+ else: # return ratio
+ return int(max(treedic['1 factors'][0] / treedic['2 factors'][0],
+ treedic['2 factors'][0] / treedic['1 factors'][0]))
+
+
+class Test_Compare_Equations(unittest.TestCase):
+
+ def test_compare_incorrect_order_of_atoms_in_molecule(self):
+ self.assertFalse(compare_chemical_expression("H2O + CO2", "O2C + OH2"))
+
+ def test_compare_same_order_no_phases_no_factors_no_ions(self):
+ self.assertTrue(compare_chemical_expression("H2O + CO2", "CO2+H2O"))
+
+ def test_compare_different_order_no_phases_no_factors_no_ions(self):
+ self.assertTrue(compare_chemical_expression("H2O + CO2", "CO2 + H2O"))
+
+ def test_compare_different_order_three_multimolecule(self):
+ self.assertTrue(compare_chemical_expression("H2O + Fe(OH)3 + CO2", "CO2 + H2O + Fe(OH)3"))
+
+ def test_compare_same_factors(self):
+ self.assertTrue(compare_chemical_expression("3H2O + 2CO2", "2CO2 + 3H2O "))
+
+ def test_compare_different_factors(self):
+ self.assertFalse(compare_chemical_expression("2H2O + 3CO2", "2CO2 + 3H2O "))
+
+ def test_compare_correct_ions(self):
+ self.assertTrue(compare_chemical_expression("H^+ + OH^-", " OH^- + H^+ "))
+
+ def test_compare_wrong_ions(self):
+ self.assertFalse(compare_chemical_expression("H^+ + OH^-", " OH^- + H^- "))
+
+ def test_compare_parent_groups_ions(self):
+ self.assertTrue(compare_chemical_expression("Fe(OH)^2- + (OH)^-", " (OH)^- + Fe(OH)^2- "))
+
+ def test_compare_correct_factors_ions_and_one(self):
+ self.assertTrue(compare_chemical_expression("3H^+ + 2OH^-", " 2OH^- + 3H^+ "))
+
+ def test_compare_wrong_factors_ions(self):
+ self.assertFalse(compare_chemical_expression("2H^+ + 3OH^-", " 2OH^- + 3H^+ "))
+
+ def test_compare_float_factors(self):
+ self.assertTrue(compare_chemical_expression("7/2H^+ + 3/5OH^-", " 3/5OH^- + 7/2H^+ "))
+
+ # Phases tests
+ def test_compare_phases_ignored(self):
+ self.assertTrue(compare_chemical_expression(
+ "H2O(s) + CO2", "H2O+CO2", ignore_state=True))
+
+ def test_compare_phases_not_ignored_explicitly(self):
+ self.assertFalse(compare_chemical_expression(
+ "H2O(s) + CO2", "H2O+CO2", ignore_state=False))
+
+ def test_compare_phases_not_ignored(self): # same as previous
+ self.assertFalse(compare_chemical_expression(
+ "H2O(s) + CO2", "H2O+CO2"))
+
+ def test_compare_phases_not_ignored_explicitly(self):
+ self.assertTrue(compare_chemical_expression(
+ "H2O(s) + CO2", "H2O(s)+CO2", ignore_state=False))
+
+ # all in one cases
+ def test_complex_additivity(self):
+ self.assertTrue(compare_chemical_expression(
+ "5(H1H212)^70010- + 2H20 + 7/2HCl + H2O",
+ "7/2HCl + 2H20 + H2O + 5(H1H212)^70010-"))
+
+ def test_complex_additivity_wrong(self):
+ self.assertFalse(compare_chemical_expression(
+ "5(H1H212)^70010- + 2H20 + 7/2HCl + H2O",
+ "2H20 + 7/2HCl + H2O + 5(H1H212)^70011-"))
+
+ def test_complex_all_grammar(self):
+ self.assertTrue(compare_chemical_expression(
+ "5[Ni(NH3)4]^2+ + 5/2SO4^2-",
+ "5/2SO4^2- + 5[Ni(NH3)4]^2+"))
+
+ # special cases
+
+ def test_compare_one_superscript_explicitly_set(self):
+ self.assertTrue(compare_chemical_expression("H^+ + OH^1-", " OH^- + H^+ "))
+
+ def test_compare_equal_factors_differently_set(self):
+ self.assertTrue(compare_chemical_expression("6/2H^+ + OH^-", " OH^- + 3H^+ "))
+
+ def test_compare_one_subscript_explicitly_set(self):
+ self.assertFalse(compare_chemical_expression("H2 + CO2", "H2 + C102"))
+
+
+class Test_Divide_Equations(unittest.TestCase):
+ ''' as compare_ use divide_,
+ tests here must consider different
+ division (not equality) cases '''
+
+ def test_divide_wrong_factors(self):
+ self.assertFalse(divide_chemical_expression(
+ "5(H1H212)^70010- + 10H2O", "5H2O + 10(H1H212)^70010-"))
+
+ def test_divide_right(self):
+ self.assertEqual(divide_chemical_expression(
+ "5(H1H212)^70010- + 10H2O", "10H2O + 5(H1H212)^70010-"), 1)
+
+ def test_divide_wrong_reagents(self):
+ self.assertFalse(divide_chemical_expression(
+ "H2O + CO2", "CO2"))
+
+ def test_divide_right_simple(self):
+ self.assertEqual(divide_chemical_expression(
+ "H2O + CO2", "H2O+CO2"), 1)
+
+ def test_divide_right_phases(self):
+ self.assertEqual(divide_chemical_expression(
+ "H2O(s) + CO2", "2H2O(s)+2CO2"), 2)
+
+ def test_divide_wrong_phases(self):
+ self.assertFalse(divide_chemical_expression(
+ "H2O(s) + CO2", "2H2O+2CO2(s)"))
+
+ def test_divide_wrong_phases_but_phases_ignored(self):
+ self.assertEqual(divide_chemical_expression(
+ "H2O(s) + CO2", "2H2O+2CO2(s)", ignore_state=True), 2)
+
+ def test_divide_order(self):
+ self.assertEqual(divide_chemical_expression(
+ "2CO2 + H2O", "2H2O+4CO2"), 2)
+
+ def test_divide_fract_to_int(self):
+ self.assertEqual(divide_chemical_expression(
+ "3/2CO2 + H2O", "2H2O+3CO2"), 2)
+
+ def test_divide_fract_to_frac(self):
+ self.assertEqual(divide_chemical_expression(
+ "3/4CO2 + H2O", "2H2O+9/6CO2"), 2)
+
+ def test_divide_fract_to_frac_wrog(self):
+ self.assertFalse(divide_chemical_expression(
+ "6/2CO2 + H2O", "2H2O+9/6CO2"), 2)
+
+
+class Test_Render_Equations(unittest.TestCase):
+
+ def test_render1(self):
+ s = "H2O + CO2"
+ out = clean_and_render_to_html(s)
+ correct = "H2O+CO2"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render_uncorrect_reaction(self):
+ s = "O2C + OH2"
+ out = clean_and_render_to_html(s)
+ correct = "O2C+OH2"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render2(self):
+ s = "CO2 + H2O + Fe(OH)3"
+ out = clean_and_render_to_html(s)
+ correct = "CO2+H2O+Fe(OH)3"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render3(self):
+ s = "3H2O + 2CO2"
+ out = clean_and_render_to_html(s)
+ correct = "3H2O+2CO2"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render4(self):
+ s = "H^+ + OH^-"
+ out = clean_and_render_to_html(s)
+ correct = "H++OH-"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render5(self):
+ s = "Fe(OH)^2- + (OH)^-"
+ out = clean_and_render_to_html(s)
+ correct = "Fe(OH)2-+(OH)-"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render6(self):
+ s = "7/2H^+ + 3/5OH^-"
+ out = clean_and_render_to_html(s)
+ correct = "7⁄2H++3⁄5OH-"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render7(self):
+ s = "5(H1H212)^70010- + 2H2O + 7/2HCl + H2O"
+ out = clean_and_render_to_html(s)
+ correct = "5(H1H212)70010-+2H2O+7⁄2HCl+H2O"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render8(self):
+ s = "H2O(s) + CO2"
+ out = clean_and_render_to_html(s)
+ correct = "H2O(s)+CO2"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render9(self):
+ s = "5[Ni(NH3)4]^2+ + 5/2SO4^2-"
+ #import ipdb; ipdb.set_trace()
+ out = clean_and_render_to_html(s)
+ correct = "5[Ni(NH3)4]2++5⁄2SO42-"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+ def test_render_error(self):
+ s = "5.2H20"
+ self.assertRaises(ParseException, clean_and_render_to_html, s)
+
+ def test_render_simple_brackets(self):
+ s = "(Ar)"
+ out = clean_and_render_to_html(s)
+ correct = "(Ar)"
+ log(out + ' ------- ' + correct, 'html')
+ self.assertEqual(out, correct)
+
+
+def suite():
+
+ testcases = [Test_Compare_Equations, Test_Divide_Equations, Test_Render_Equations]
+ suites = []
+ for testcase in testcases:
+ suites.append(unittest.TestLoader().loadTestsFromTestCase(testcase))
+ return unittest.TestSuite(suites)
+
+if __name__ == "__main__":
+ local_debug = True
+ with open('render.html', 'w') as f:
+ unittest.TextTestRunner(verbosity=2).run(suite())
+ # open render.html to look at rendered equations