Merge pull request #17413 from edx/bmedx/upgrade_nltk
Upgrade NLTK to 3.2.5, fixup usage and tests
This commit is contained in:
@@ -9,7 +9,7 @@ from __future__ import division
|
||||
|
||||
import string
|
||||
|
||||
import nltk
|
||||
from nltk.metrics.distance import edit_distance
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
@@ -106,6 +106,6 @@ def validate_password_dictionary(value):
|
||||
|
||||
if password_max_edit_distance and password_dictionary:
|
||||
for word in password_dictionary:
|
||||
distance = nltk.metrics.distance.edit_distance(value, word)
|
||||
distance = edit_distance(value, word)
|
||||
if distance <= password_max_edit_distance:
|
||||
raise ValidationError(_("Too similar to a restricted dictionary word."), code="dictionary_word")
|
||||
|
||||
@@ -51,9 +51,9 @@ def quote_attr(s):
|
||||
|
||||
|
||||
class OptionInputTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Make sure option inputs work
|
||||
'''
|
||||
"""
|
||||
|
||||
def test_rendering(self):
|
||||
xml_str = """<optioninput options="('Up','Down','Don't know')" id="sky_input" correct="Up"/>"""
|
||||
@@ -89,7 +89,9 @@ class OptionInputTest(unittest.TestCase):
|
||||
f = inputtypes.OptionInput.parse_options
|
||||
|
||||
def check(input, options):
|
||||
"""Take list of options, confirm that output is in the silly doubled format"""
|
||||
"""
|
||||
Take list of options, confirm that output is in the silly doubled format
|
||||
"""
|
||||
expected = [(o, o) for o in options]
|
||||
self.assertEqual(f(input), expected)
|
||||
|
||||
@@ -108,9 +110,9 @@ class OptionInputTest(unittest.TestCase):
|
||||
|
||||
|
||||
class ChoiceGroupTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Test choice groups, radio groups, and checkbox groups
|
||||
'''
|
||||
"""
|
||||
|
||||
def check_group(self, tag, expected_input_type, expected_suffix):
|
||||
xml_str = """
|
||||
@@ -248,9 +250,9 @@ class JSInputTest(unittest.TestCase):
|
||||
|
||||
|
||||
class TextLineTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that textline inputs work, with and without math.
|
||||
'''
|
||||
"""
|
||||
|
||||
def test_rendering(self):
|
||||
size = "42"
|
||||
@@ -369,9 +371,9 @@ class TextLineTest(unittest.TestCase):
|
||||
|
||||
|
||||
class FileSubmissionTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that file submission inputs work
|
||||
'''
|
||||
"""
|
||||
|
||||
def test_rendering(self):
|
||||
allowed_files = "runme.py nooooo.rb ohai.java"
|
||||
@@ -413,9 +415,9 @@ class FileSubmissionTest(unittest.TestCase):
|
||||
|
||||
|
||||
class CodeInputTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that codeinput inputs work
|
||||
'''
|
||||
"""
|
||||
|
||||
def test_rendering(self):
|
||||
mode = "parrot"
|
||||
@@ -434,8 +436,6 @@ class CodeInputTest(unittest.TestCase):
|
||||
|
||||
element = etree.fromstring(xml_str)
|
||||
|
||||
escapedict = {'"': '"'}
|
||||
|
||||
state = {
|
||||
'value': 'print "good evening"',
|
||||
'status': 'incomplete',
|
||||
@@ -471,9 +471,9 @@ class CodeInputTest(unittest.TestCase):
|
||||
|
||||
|
||||
class MatlabTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Test Matlab input types
|
||||
'''
|
||||
"""
|
||||
def setUp(self):
|
||||
super(MatlabTest, self).setUp()
|
||||
self.rows = '10'
|
||||
@@ -921,10 +921,9 @@ def html_tree_equal(received, expected):
|
||||
|
||||
|
||||
class SchematicTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that schematic inputs work
|
||||
'''
|
||||
|
||||
"""
|
||||
def test_rendering(self):
|
||||
height = '12'
|
||||
width = '33'
|
||||
@@ -977,10 +976,9 @@ class SchematicTest(unittest.TestCase):
|
||||
|
||||
|
||||
class ImageInputTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that image inputs work
|
||||
'''
|
||||
|
||||
"""
|
||||
def check(self, value, egx, egy):
|
||||
height = '78'
|
||||
width = '427'
|
||||
@@ -1037,10 +1035,9 @@ class ImageInputTest(unittest.TestCase):
|
||||
|
||||
|
||||
class CrystallographyTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that crystallography inputs work
|
||||
'''
|
||||
|
||||
"""
|
||||
def test_rendering(self):
|
||||
height = '12'
|
||||
width = '33'
|
||||
@@ -1079,10 +1076,9 @@ class CrystallographyTest(unittest.TestCase):
|
||||
|
||||
|
||||
class VseprTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that vsepr inputs work
|
||||
'''
|
||||
|
||||
"""
|
||||
def test_rendering(self):
|
||||
height = '12'
|
||||
width = '33'
|
||||
@@ -1127,9 +1123,9 @@ class VseprTest(unittest.TestCase):
|
||||
|
||||
|
||||
class ChemicalEquationTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that chemical equation inputs work.
|
||||
'''
|
||||
"""
|
||||
def setUp(self):
|
||||
super(ChemicalEquationTest, self).setUp()
|
||||
self.size = "42"
|
||||
@@ -1144,7 +1140,9 @@ class ChemicalEquationTest(unittest.TestCase):
|
||||
self.the_input = lookup_tag('chemicalequationinput')(test_capa_system(), element, state)
|
||||
|
||||
def test_rendering(self):
|
||||
''' Verify that the render context matches the expected render context'''
|
||||
"""
|
||||
Verify that the render context matches the expected render context
|
||||
"""
|
||||
context = self.the_input._get_render_context() # pylint: disable=protected-access
|
||||
prob_id = 'prob_1_2'
|
||||
expected = {
|
||||
@@ -1161,7 +1159,9 @@ class ChemicalEquationTest(unittest.TestCase):
|
||||
self.assertEqual(context, expected)
|
||||
|
||||
def test_chemcalc_ajax_sucess(self):
|
||||
''' Verify that using the correct dispatch and valid data produces a valid response'''
|
||||
"""
|
||||
Verify that using the correct dispatch and valid data produces a valid response
|
||||
"""
|
||||
data = {'formula': "H"}
|
||||
response = self.the_input.handle_ajax("preview_chemcalc", data)
|
||||
|
||||
@@ -1366,10 +1366,9 @@ class FormulaEquationTest(unittest.TestCase):
|
||||
|
||||
|
||||
class DragAndDropTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Check that drag and drop inputs work
|
||||
'''
|
||||
|
||||
"""
|
||||
def test_rendering(self):
|
||||
path_to_images = '/dummy-static/images/'
|
||||
|
||||
@@ -1441,9 +1440,9 @@ class DragAndDropTest(unittest.TestCase):
|
||||
|
||||
|
||||
class AnnotationInputTest(unittest.TestCase):
|
||||
'''
|
||||
"""
|
||||
Make sure option inputs work
|
||||
'''
|
||||
"""
|
||||
def test_rendering(self):
|
||||
xml_str = '''
|
||||
<annotationinput>
|
||||
|
||||
@@ -8,7 +8,7 @@ from pyparsing import Literal, OneOrMore, ParseException, StringEnd
|
||||
|
||||
ARROWS = ('<->', '->')
|
||||
|
||||
## Defines a simple pyparsing tokenizer for chemical equations
|
||||
# Defines a simple pyparsing tokenizer for chemical equations
|
||||
elements = ['Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be',
|
||||
'Bh', 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm',
|
||||
'Cn', 'Co', 'Cr', 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu',
|
||||
@@ -30,7 +30,8 @@ tokenizer = OneOrMore(tokens) + StringEnd()
|
||||
def _orjoin(l):
|
||||
return "'" + "' | '".join(l) + "'"
|
||||
|
||||
## Defines an NLTK parser for tokenized expressions
|
||||
|
||||
# Defines an NLTK parser for tokenized expressions
|
||||
grammar = """
|
||||
S -> multimolecule | multimolecule '+' S
|
||||
multimolecule -> count molecule | molecule
|
||||
@@ -52,16 +53,19 @@ grammar = """
|
||||
|
||||
suffixed -> unsuffixed | unsuffixed suffix
|
||||
"""
|
||||
parser = nltk.ChartParser(nltk.parse_cfg(grammar))
|
||||
parser = nltk.ChartParser(nltk.CFG.fromstring(grammar))
|
||||
|
||||
|
||||
def _clean_parse_tree(tree):
|
||||
''' The parse tree contains a lot of redundant
|
||||
"""
|
||||
The parse tree contains a lot of redundant
|
||||
nodes. E.g. paren_groups have groups as children, etc. This will
|
||||
clean up the tree.
|
||||
'''
|
||||
"""
|
||||
def unparse_number(n):
|
||||
''' Go from a number parse tree to a number '''
|
||||
"""
|
||||
Go from a number parse tree to a number
|
||||
"""
|
||||
if len(n) == 1:
|
||||
rv = n[0][0]
|
||||
else:
|
||||
@@ -69,19 +73,22 @@ def _clean_parse_tree(tree):
|
||||
return rv
|
||||
|
||||
def null_tag(n):
|
||||
''' Remove a tag '''
|
||||
"""
|
||||
Remove a tag
|
||||
"""
|
||||
return n[0]
|
||||
|
||||
def ion_suffix(n):
|
||||
'''1. "if" part handles special case
|
||||
2. "else" part is general behaviour '''
|
||||
|
||||
if n[1:][0].node == 'number' and n[1:][0][0][0] == '1':
|
||||
"""
|
||||
1. "if" part handles special case
|
||||
2. "else" part is general behaviour
|
||||
"""
|
||||
if n[1:][0].label() == 'number' and n[1:][0][0][0] == '1':
|
||||
# if suffix is explicitly 1, like ^1-
|
||||
# strip 1, leave only sign: ^-
|
||||
return nltk.tree.Tree(n.node, n[2:])
|
||||
return nltk.tree.Tree(n.label(), n[2:])
|
||||
else:
|
||||
return nltk.tree.Tree(n.node, n[1:])
|
||||
return nltk.tree.Tree(n.label(), n[1:])
|
||||
|
||||
dispatch = {'number': lambda x: nltk.tree.Tree("number", [unparse_number(x)]),
|
||||
'unphased': null_tag,
|
||||
@@ -89,40 +96,38 @@ def _clean_parse_tree(tree):
|
||||
'number_suffix': lambda x: nltk.tree.Tree('number_suffix', [unparse_number(x[0])]),
|
||||
'suffixed': lambda x: len(x) > 1 and x or x[0],
|
||||
'ion_suffix': ion_suffix,
|
||||
'paren_group_square': lambda x: nltk.tree.Tree(x.node, x[1]),
|
||||
'paren_group_round': lambda x: nltk.tree.Tree(x.node, x[1])}
|
||||
'paren_group_square': lambda x: nltk.tree.Tree(x.label(), x[1]),
|
||||
'paren_group_round': lambda x: nltk.tree.Tree(x.label(), x[1])}
|
||||
|
||||
if isinstance(tree, str):
|
||||
return tree
|
||||
|
||||
old_node = None
|
||||
## This loop means that if a node is processed, and returns a child,
|
||||
## the child will be processed.
|
||||
while tree.node in dispatch and tree.node != old_node:
|
||||
old_node = tree.node
|
||||
tree = dispatch[tree.node](tree)
|
||||
# This loop means that if a node is processed, and returns a child,
|
||||
# the child will be processed.
|
||||
while tree.label() in dispatch and tree.label() != old_node:
|
||||
old_node = tree.label()
|
||||
tree = dispatch[tree.label()](tree)
|
||||
|
||||
children = []
|
||||
for child in tree:
|
||||
child = _clean_parse_tree(child)
|
||||
children.append(child)
|
||||
|
||||
tree = nltk.tree.Tree(tree.node, children)
|
||||
tree = nltk.tree.Tree(tree.label(), children)
|
||||
|
||||
return tree
|
||||
|
||||
|
||||
def _merge_children(tree, tags):
|
||||
''' nltk, by documentation, cannot do arbitrary length
|
||||
groups. Instead of:
|
||||
(group 1 2 3 4)
|
||||
It has to handle this recursively:
|
||||
(group 1 (group 2 (group 3 (group 4))))
|
||||
"""
|
||||
nltk, by documentation, cannot do arbitrary length groups.
|
||||
Instead of: (group 1 2 3 4)
|
||||
It has to handle this recursively: (group 1 (group 2 (group 3 (group 4))))
|
||||
We do the cleanup of converting from the latter to the former.
|
||||
'''
|
||||
"""
|
||||
if tree is None:
|
||||
# There was a problem--shouldn't have empty trees (NOTE: see this with input e.g. 'H2O(', or 'Xe+').
|
||||
# Haven't grokked the code to tell if this is indeed the right thing to do.
|
||||
raise ParseException("Shouldn't have empty trees")
|
||||
|
||||
if isinstance(tree, str):
|
||||
@@ -130,32 +135,31 @@ def _merge_children(tree, tags):
|
||||
|
||||
merged_children = []
|
||||
done = False
|
||||
#print '00000', tree
|
||||
## Merge current tag
|
||||
|
||||
# Merge current tag
|
||||
while not done:
|
||||
done = True
|
||||
for child in tree:
|
||||
if isinstance(child, nltk.tree.Tree) and child.node == tree.node and tree.node in tags:
|
||||
if isinstance(child, nltk.tree.Tree) and child.label() == tree.label() and tree.label() in tags:
|
||||
merged_children = merged_children + list(child)
|
||||
done = False
|
||||
else:
|
||||
merged_children = merged_children + [child]
|
||||
tree = nltk.tree.Tree(tree.node, merged_children)
|
||||
tree = nltk.tree.Tree(tree.label(), merged_children)
|
||||
merged_children = []
|
||||
#print '======',tree
|
||||
|
||||
# And recurse
|
||||
children = []
|
||||
for child in tree:
|
||||
children.append(_merge_children(child, tags))
|
||||
|
||||
#return tree
|
||||
return nltk.tree.Tree(tree.node, children)
|
||||
return nltk.tree.Tree(tree.label(), children)
|
||||
|
||||
|
||||
def _render_to_html(tree):
|
||||
''' Renders a cleaned tree to HTML '''
|
||||
|
||||
"""
|
||||
Renders a cleaned tree to HTML
|
||||
"""
|
||||
def molecule_count(tree, children):
|
||||
# If an integer, return that integer
|
||||
if len(tree) == 1:
|
||||
@@ -187,24 +191,28 @@ def _render_to_html(tree):
|
||||
return tree
|
||||
else:
|
||||
children = "".join(map(_render_to_html, tree))
|
||||
if tree.node in dispatch:
|
||||
return dispatch[tree.node](tree, children)
|
||||
if tree.label() in dispatch:
|
||||
return dispatch[tree.label()](tree, children)
|
||||
else:
|
||||
return children.replace(' ', '')
|
||||
|
||||
|
||||
def render_to_html(eq):
|
||||
'''
|
||||
"""
|
||||
Render a chemical equation string to html.
|
||||
|
||||
Renders each molecule separately, and returns invalid input wrapped in a <span>.
|
||||
'''
|
||||
"""
|
||||
def err(s):
|
||||
"Render as an error span"
|
||||
"""
|
||||
Render as an error span
|
||||
"""
|
||||
return '<span class="inline-error inline">{0}</span>'.format(s)
|
||||
|
||||
def render_arrow(arrow):
|
||||
"""Turn text arrows into pretty ones"""
|
||||
"""
|
||||
Turn text arrows into pretty ones
|
||||
"""
|
||||
if arrow == '->':
|
||||
return u'\u2192'
|
||||
if arrow == '<->':
|
||||
@@ -235,20 +243,26 @@ def render_to_html(eq):
|
||||
|
||||
|
||||
def _get_final_tree(s):
|
||||
'''
|
||||
"""
|
||||
Return final tree after merge and clean.
|
||||
|
||||
Raises pyparsing.ParseException if s is invalid.
|
||||
'''
|
||||
tokenized = tokenizer.parseString(s)
|
||||
parsed = parser.parse(tokenized)
|
||||
merged = _merge_children(parsed, {'S', 'group'})
|
||||
final = _clean_parse_tree(merged)
|
||||
return final
|
||||
"""
|
||||
try:
|
||||
tokenized = tokenizer.parseString(s)
|
||||
parsed = parser.parse(tokenized)
|
||||
merged = _merge_children(parsed.next(), {'S', 'group'})
|
||||
final = _clean_parse_tree(merged)
|
||||
return final
|
||||
except StopIteration:
|
||||
# This happens with an empty tree- see this with input e.g. 'H2O(', or 'Xe+').
|
||||
raise ParseException("Shouldn't have empty trees")
|
||||
|
||||
|
||||
def _check_equality(tuple1, tuple2):
|
||||
''' return True if tuples of multimolecules are equal '''
|
||||
"""
|
||||
return True if tuples of multimolecules are equal
|
||||
"""
|
||||
list1 = list(tuple1)
|
||||
list2 = list(tuple2)
|
||||
|
||||
@@ -264,14 +278,16 @@ def _check_equality(tuple1, tuple2):
|
||||
|
||||
|
||||
def compare_chemical_expression(s1, s2, ignore_state=False):
|
||||
''' It does comparison between two expressions.
|
||||
It uses divide_chemical_expression and check if division is 1
|
||||
'''
|
||||
"""
|
||||
It does comparison between two expressions.
|
||||
It uses divide_chemical_expression and check if division is 1
|
||||
"""
|
||||
return divide_chemical_expression(s1, s2, ignore_state) == 1
|
||||
|
||||
|
||||
def divide_chemical_expression(s1, s2, ignore_state=False):
|
||||
'''Compare two chemical expressions for equivalence up to a multiplicative factor:
|
||||
"""
|
||||
Compare two chemical expressions for equivalence up to a multiplicative factor:
|
||||
|
||||
- If they are not the same chemicals, returns False.
|
||||
- If they are the same, "divide" s1 by s2 to returns a factor x such that s1 / s2 == x as a Fraction object.
|
||||
@@ -290,12 +306,13 @@ def divide_chemical_expression(s1, s2, ignore_state=False):
|
||||
for equality of every element in list,
|
||||
- return result of factor division
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
# parsed final trees
|
||||
treedic = {}
|
||||
treedic['1'] = _get_final_tree(s1)
|
||||
treedic['2'] = _get_final_tree(s2)
|
||||
treedic = {
|
||||
'1': _get_final_tree(s1),
|
||||
'2': _get_final_tree(s2)
|
||||
}
|
||||
|
||||
# strip phases and factors
|
||||
# collect factors in list
|
||||
@@ -303,10 +320,10 @@ def divide_chemical_expression(s1, s2, ignore_state=False):
|
||||
treedic[i + ' cleaned_mm_list'] = []
|
||||
treedic[i + ' factors'] = []
|
||||
treedic[i + ' phases'] = []
|
||||
for el in treedic[i].subtrees(filter=lambda t: t.node == 'multimolecule'):
|
||||
count_subtree = [t for t in el.subtrees() if t.node == 'count']
|
||||
group_subtree = [t for t in el.subtrees() if t.node == 'group']
|
||||
phase_subtree = [t for t in el.subtrees() if t.node == 'phase']
|
||||
for el in treedic[i].subtrees(filter=lambda t: t.label() == 'multimolecule'):
|
||||
count_subtree = [t for t in el.subtrees() if t.label() == 'count']
|
||||
group_subtree = [t for t in el.subtrees() if t.label() == 'group']
|
||||
phase_subtree = [t for t in el.subtrees() if t.label() == 'phase']
|
||||
if count_subtree:
|
||||
if len(count_subtree[0]) > 1:
|
||||
treedic[i + ' factors'].append(
|
||||
|
||||
@@ -8,6 +8,6 @@ setup(
|
||||
"pyparsing==2.0.7",
|
||||
"numpy==1.6.2",
|
||||
"scipy==0.14.0",
|
||||
"nltk==2.0.6",
|
||||
"nltk==3.2.5",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -8,12 +8,9 @@
|
||||
setuptools==37.0.0
|
||||
pip==9.0.1
|
||||
|
||||
|
||||
nltk==3.2.5
|
||||
numpy==1.6.2
|
||||
networkx==1.7
|
||||
sympy==0.7.1
|
||||
pyparsing==2.0.7
|
||||
cryptography==1.9
|
||||
|
||||
# We forked NLTK just to make it work with setuptools instead of distribute
|
||||
git+https://github.com/edx/nltk.git@2.0.6#egg=nltk==2.0.6
|
||||
|
||||
@@ -79,6 +79,7 @@ Markdown>=2.6,<2.7
|
||||
mongoengine==0.10.0
|
||||
MySQL-python==1.2.5
|
||||
networkx==1.7
|
||||
nltk==3.2.5
|
||||
nose-xunitmp==0.3.2
|
||||
oauthlib==1.0.3
|
||||
path.py==8.2.1
|
||||
|
||||
@@ -53,7 +53,6 @@ git+https://github.com/jazzband/django-pipeline.git@d068a019169c9de5ee20ece041a6
|
||||
-e git+https://github.com/edx/django-wiki.git@v0.0.17#egg=django-wiki
|
||||
git+https://github.com/edx/django-openid-auth.git@0.14#egg=django-openid-auth==0.14
|
||||
git+https://github.com/edx/MongoDBProxy.git@25b99097615bda06bd7cdfe5669ed80dc2a7fed0#egg=MongoDBProxy==0.1.0
|
||||
git+https://github.com/edx/nltk.git@2.0.6#egg=nltk==2.0.6
|
||||
-e git+https://github.com/dementrock/pystache_custom.git@776973740bdaad83a3b029f96e415a7d1e8bec2f#egg=pystache_custom-dev
|
||||
-e git+https://github.com/appliedsec/pygeoip.git@95e69341cebf5a6a9fbf7c4f5439d458898bdc3b#egg=pygeoip
|
||||
-e git+https://github.com/jazkarta/edx-jsme.git@690dbf75441fa91c7c4899df0b83d77f7deb5458#egg=edx-jsme
|
||||
|
||||
Reference in New Issue
Block a user