diff --git a/common/djangoapps/util/password_policy_validators.py b/common/djangoapps/util/password_policy_validators.py
index a0d7ee0be9..eaa552722c 100644
--- a/common/djangoapps/util/password_policy_validators.py
+++ b/common/djangoapps/util/password_policy_validators.py
@@ -9,7 +9,7 @@ from __future__ import division
import string
-import nltk
+from nltk.metrics.distance import edit_distance
from django.conf import settings
from django.core.exceptions import ValidationError
from django.utils.translation import ugettext_lazy as _
@@ -106,6 +106,6 @@ def validate_password_dictionary(value):
if password_max_edit_distance and password_dictionary:
for word in password_dictionary:
- distance = nltk.metrics.distance.edit_distance(value, word)
+ distance = edit_distance(value, word)
if distance <= password_max_edit_distance:
raise ValidationError(_("Too similar to a restricted dictionary word."), code="dictionary_word")
diff --git a/common/lib/capa/capa/tests/test_inputtypes.py b/common/lib/capa/capa/tests/test_inputtypes.py
index 2e8ae0f32c..f7ef3fbdc6 100644
--- a/common/lib/capa/capa/tests/test_inputtypes.py
+++ b/common/lib/capa/capa/tests/test_inputtypes.py
@@ -51,9 +51,9 @@ def quote_attr(s):
class OptionInputTest(unittest.TestCase):
- '''
+ """
Make sure option inputs work
- '''
+ """
def test_rendering(self):
xml_str = """"""
@@ -89,7 +89,9 @@ class OptionInputTest(unittest.TestCase):
f = inputtypes.OptionInput.parse_options
def check(input, options):
- """Take list of options, confirm that output is in the silly doubled format"""
+ """
+ Take list of options, confirm that output is in the silly doubled format
+ """
expected = [(o, o) for o in options]
self.assertEqual(f(input), expected)
@@ -108,9 +110,9 @@ class OptionInputTest(unittest.TestCase):
class ChoiceGroupTest(unittest.TestCase):
- '''
+ """
Test choice groups, radio groups, and checkbox groups
- '''
+ """
def check_group(self, tag, expected_input_type, expected_suffix):
xml_str = """
@@ -248,9 +250,9 @@ class JSInputTest(unittest.TestCase):
class TextLineTest(unittest.TestCase):
- '''
+ """
Check that textline inputs work, with and without math.
- '''
+ """
def test_rendering(self):
size = "42"
@@ -369,9 +371,9 @@ class TextLineTest(unittest.TestCase):
class FileSubmissionTest(unittest.TestCase):
- '''
+ """
Check that file submission inputs work
- '''
+ """
def test_rendering(self):
allowed_files = "runme.py nooooo.rb ohai.java"
@@ -413,9 +415,9 @@ class FileSubmissionTest(unittest.TestCase):
class CodeInputTest(unittest.TestCase):
- '''
+ """
Check that codeinput inputs work
- '''
+ """
def test_rendering(self):
mode = "parrot"
@@ -434,8 +436,6 @@ class CodeInputTest(unittest.TestCase):
element = etree.fromstring(xml_str)
- escapedict = {'"': '"'}
-
state = {
'value': 'print "good evening"',
'status': 'incomplete',
@@ -471,9 +471,9 @@ class CodeInputTest(unittest.TestCase):
class MatlabTest(unittest.TestCase):
- '''
+ """
Test Matlab input types
- '''
+ """
def setUp(self):
super(MatlabTest, self).setUp()
self.rows = '10'
@@ -921,10 +921,9 @@ def html_tree_equal(received, expected):
class SchematicTest(unittest.TestCase):
- '''
+ """
Check that schematic inputs work
- '''
-
+ """
def test_rendering(self):
height = '12'
width = '33'
@@ -977,10 +976,9 @@ class SchematicTest(unittest.TestCase):
class ImageInputTest(unittest.TestCase):
- '''
+ """
Check that image inputs work
- '''
-
+ """
def check(self, value, egx, egy):
height = '78'
width = '427'
@@ -1037,10 +1035,9 @@ class ImageInputTest(unittest.TestCase):
class CrystallographyTest(unittest.TestCase):
- '''
+ """
Check that crystallography inputs work
- '''
-
+ """
def test_rendering(self):
height = '12'
width = '33'
@@ -1079,10 +1076,9 @@ class CrystallographyTest(unittest.TestCase):
class VseprTest(unittest.TestCase):
- '''
+ """
Check that vsepr inputs work
- '''
-
+ """
def test_rendering(self):
height = '12'
width = '33'
@@ -1127,9 +1123,9 @@ class VseprTest(unittest.TestCase):
class ChemicalEquationTest(unittest.TestCase):
- '''
+ """
Check that chemical equation inputs work.
- '''
+ """
def setUp(self):
super(ChemicalEquationTest, self).setUp()
self.size = "42"
@@ -1144,7 +1140,9 @@ class ChemicalEquationTest(unittest.TestCase):
self.the_input = lookup_tag('chemicalequationinput')(test_capa_system(), element, state)
def test_rendering(self):
- ''' Verify that the render context matches the expected render context'''
+ """
+ Verify that the render context matches the expected render context
+ """
context = self.the_input._get_render_context() # pylint: disable=protected-access
prob_id = 'prob_1_2'
expected = {
@@ -1161,7 +1159,9 @@ class ChemicalEquationTest(unittest.TestCase):
self.assertEqual(context, expected)
def test_chemcalc_ajax_sucess(self):
- ''' Verify that using the correct dispatch and valid data produces a valid response'''
+ """
+ Verify that using the correct dispatch and valid data produces a valid response
+ """
data = {'formula': "H"}
response = self.the_input.handle_ajax("preview_chemcalc", data)
@@ -1366,10 +1366,9 @@ class FormulaEquationTest(unittest.TestCase):
class DragAndDropTest(unittest.TestCase):
- '''
+ """
Check that drag and drop inputs work
- '''
-
+ """
def test_rendering(self):
path_to_images = '/dummy-static/images/'
@@ -1441,9 +1440,9 @@ class DragAndDropTest(unittest.TestCase):
class AnnotationInputTest(unittest.TestCase):
- '''
+ """
Make sure option inputs work
- '''
+ """
def test_rendering(self):
xml_str = '''
diff --git a/common/lib/chem/chem/chemcalc.py b/common/lib/chem/chem/chemcalc.py
index d21d420359..dd4abefed2 100644
--- a/common/lib/chem/chem/chemcalc.py
+++ b/common/lib/chem/chem/chemcalc.py
@@ -8,7 +8,7 @@ from pyparsing import Literal, OneOrMore, ParseException, StringEnd
ARROWS = ('<->', '->')
-## Defines a simple pyparsing tokenizer for chemical equations
+# Defines a simple pyparsing tokenizer for chemical equations
elements = ['Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be',
'Bh', 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm',
'Cn', 'Co', 'Cr', 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu',
@@ -30,7 +30,8 @@ tokenizer = OneOrMore(tokens) + StringEnd()
def _orjoin(l):
return "'" + "' | '".join(l) + "'"
-## Defines an NLTK parser for tokenized expressions
+
+# Defines an NLTK parser for tokenized expressions
grammar = """
S -> multimolecule | multimolecule '+' S
multimolecule -> count molecule | molecule
@@ -52,16 +53,19 @@ grammar = """
suffixed -> unsuffixed | unsuffixed suffix
"""
-parser = nltk.ChartParser(nltk.parse_cfg(grammar))
+parser = nltk.ChartParser(nltk.CFG.fromstring(grammar))
def _clean_parse_tree(tree):
- ''' The parse tree contains a lot of redundant
+ """
+ The parse tree contains a lot of redundant
nodes. E.g. paren_groups have groups as children, etc. This will
clean up the tree.
- '''
+ """
def unparse_number(n):
- ''' Go from a number parse tree to a number '''
+ """
+ Go from a number parse tree to a number
+ """
if len(n) == 1:
rv = n[0][0]
else:
@@ -69,19 +73,22 @@ def _clean_parse_tree(tree):
return rv
def null_tag(n):
- ''' Remove a tag '''
+ """
+ Remove a tag
+ """
return n[0]
def ion_suffix(n):
- '''1. "if" part handles special case
- 2. "else" part is general behaviour '''
-
- if n[1:][0].node == 'number' and n[1:][0][0][0] == '1':
+ """
+ 1. "if" part handles special case
+ 2. "else" part is general behaviour
+ """
+ if n[1:][0].label() == 'number' and n[1:][0][0][0] == '1':
# if suffix is explicitly 1, like ^1-
# strip 1, leave only sign: ^-
- return nltk.tree.Tree(n.node, n[2:])
+ return nltk.tree.Tree(n.label(), n[2:])
else:
- return nltk.tree.Tree(n.node, n[1:])
+ return nltk.tree.Tree(n.label(), n[1:])
dispatch = {'number': lambda x: nltk.tree.Tree("number", [unparse_number(x)]),
'unphased': null_tag,
@@ -89,40 +96,38 @@ def _clean_parse_tree(tree):
'number_suffix': lambda x: nltk.tree.Tree('number_suffix', [unparse_number(x[0])]),
'suffixed': lambda x: len(x) > 1 and x or x[0],
'ion_suffix': ion_suffix,
- 'paren_group_square': lambda x: nltk.tree.Tree(x.node, x[1]),
- 'paren_group_round': lambda x: nltk.tree.Tree(x.node, x[1])}
+ 'paren_group_square': lambda x: nltk.tree.Tree(x.label(), x[1]),
+ 'paren_group_round': lambda x: nltk.tree.Tree(x.label(), x[1])}
if isinstance(tree, str):
return tree
old_node = None
- ## This loop means that if a node is processed, and returns a child,
- ## the child will be processed.
- while tree.node in dispatch and tree.node != old_node:
- old_node = tree.node
- tree = dispatch[tree.node](tree)
+ # This loop means that if a node is processed, and returns a child,
+ # the child will be processed.
+ while tree.label() in dispatch and tree.label() != old_node:
+ old_node = tree.label()
+ tree = dispatch[tree.label()](tree)
children = []
for child in tree:
child = _clean_parse_tree(child)
children.append(child)
- tree = nltk.tree.Tree(tree.node, children)
+ tree = nltk.tree.Tree(tree.label(), children)
return tree
def _merge_children(tree, tags):
- ''' nltk, by documentation, cannot do arbitrary length
- groups. Instead of:
- (group 1 2 3 4)
- It has to handle this recursively:
- (group 1 (group 2 (group 3 (group 4))))
+ """
+ nltk, by documentation, cannot do arbitrary length groups.
+ Instead of: (group 1 2 3 4)
+ It has to handle this recursively: (group 1 (group 2 (group 3 (group 4))))
We do the cleanup of converting from the latter to the former.
- '''
+ """
if tree is None:
# There was a problem--shouldn't have empty trees (NOTE: see this with input e.g. 'H2O(', or 'Xe+').
- # Haven't grokked the code to tell if this is indeed the right thing to do.
raise ParseException("Shouldn't have empty trees")
if isinstance(tree, str):
@@ -130,32 +135,31 @@ def _merge_children(tree, tags):
merged_children = []
done = False
- #print '00000', tree
- ## Merge current tag
+
+ # Merge current tag
while not done:
done = True
for child in tree:
- if isinstance(child, nltk.tree.Tree) and child.node == tree.node and tree.node in tags:
+ if isinstance(child, nltk.tree.Tree) and child.label() == tree.label() and tree.label() in tags:
merged_children = merged_children + list(child)
done = False
else:
merged_children = merged_children + [child]
- tree = nltk.tree.Tree(tree.node, merged_children)
+ tree = nltk.tree.Tree(tree.label(), merged_children)
merged_children = []
- #print '======',tree
# And recurse
children = []
for child in tree:
children.append(_merge_children(child, tags))
- #return tree
- return nltk.tree.Tree(tree.node, children)
+ return nltk.tree.Tree(tree.label(), children)
def _render_to_html(tree):
- ''' Renders a cleaned tree to HTML '''
-
+ """
+ Renders a cleaned tree to HTML
+ """
def molecule_count(tree, children):
# If an integer, return that integer
if len(tree) == 1:
@@ -187,24 +191,28 @@ def _render_to_html(tree):
return tree
else:
children = "".join(map(_render_to_html, tree))
- if tree.node in dispatch:
- return dispatch[tree.node](tree, children)
+ if tree.label() in dispatch:
+ return dispatch[tree.label()](tree, children)
else:
return children.replace(' ', '')
def render_to_html(eq):
- '''
+ """
Render a chemical equation string to html.
Renders each molecule separately, and returns invalid input wrapped in a .
- '''
+ """
def err(s):
- "Render as an error span"
+ """
+ Render as an error span
+ """
return '{0}'.format(s)
def render_arrow(arrow):
- """Turn text arrows into pretty ones"""
+ """
+ Turn text arrows into pretty ones
+ """
if arrow == '->':
return u'\u2192'
if arrow == '<->':
@@ -235,20 +243,26 @@ def render_to_html(eq):
def _get_final_tree(s):
- '''
+ """
Return final tree after merge and clean.
Raises pyparsing.ParseException if s is invalid.
- '''
- tokenized = tokenizer.parseString(s)
- parsed = parser.parse(tokenized)
- merged = _merge_children(parsed, {'S', 'group'})
- final = _clean_parse_tree(merged)
- return final
+ """
+ try:
+ tokenized = tokenizer.parseString(s)
+ parsed = parser.parse(tokenized)
+ merged = _merge_children(parsed.next(), {'S', 'group'})
+ final = _clean_parse_tree(merged)
+ return final
+ except StopIteration:
+ # This happens with an empty tree- see this with input e.g. 'H2O(', or 'Xe+').
+ raise ParseException("Shouldn't have empty trees")
def _check_equality(tuple1, tuple2):
- ''' return True if tuples of multimolecules are equal '''
+ """
+ return True if tuples of multimolecules are equal
+ """
list1 = list(tuple1)
list2 = list(tuple2)
@@ -264,14 +278,16 @@ def _check_equality(tuple1, tuple2):
def compare_chemical_expression(s1, s2, ignore_state=False):
- ''' It does comparison between two expressions.
- It uses divide_chemical_expression and check if division is 1
- '''
+ """
+ It does comparison between two expressions.
+ It uses divide_chemical_expression and check if division is 1
+ """
return divide_chemical_expression(s1, s2, ignore_state) == 1
def divide_chemical_expression(s1, s2, ignore_state=False):
- '''Compare two chemical expressions for equivalence up to a multiplicative factor:
+ """
+ Compare two chemical expressions for equivalence up to a multiplicative factor:
- If they are not the same chemicals, returns False.
- If they are the same, "divide" s1 by s2 to returns a factor x such that s1 / s2 == x as a Fraction object.
@@ -290,12 +306,13 @@ def divide_chemical_expression(s1, s2, ignore_state=False):
for equality of every element in list,
- return result of factor division
- '''
+ """
# parsed final trees
- treedic = {}
- treedic['1'] = _get_final_tree(s1)
- treedic['2'] = _get_final_tree(s2)
+ treedic = {
+ '1': _get_final_tree(s1),
+ '2': _get_final_tree(s2)
+ }
# strip phases and factors
# collect factors in list
@@ -303,10 +320,10 @@ def divide_chemical_expression(s1, s2, ignore_state=False):
treedic[i + ' cleaned_mm_list'] = []
treedic[i + ' factors'] = []
treedic[i + ' phases'] = []
- for el in treedic[i].subtrees(filter=lambda t: t.node == 'multimolecule'):
- count_subtree = [t for t in el.subtrees() if t.node == 'count']
- group_subtree = [t for t in el.subtrees() if t.node == 'group']
- phase_subtree = [t for t in el.subtrees() if t.node == 'phase']
+ for el in treedic[i].subtrees(filter=lambda t: t.label() == 'multimolecule'):
+ count_subtree = [t for t in el.subtrees() if t.label() == 'count']
+ group_subtree = [t for t in el.subtrees() if t.label() == 'group']
+ phase_subtree = [t for t in el.subtrees() if t.label() == 'phase']
if count_subtree:
if len(count_subtree[0]) > 1:
treedic[i + ' factors'].append(
diff --git a/common/lib/chem/setup.py b/common/lib/chem/setup.py
index b53a3f7c43..382a2f89e4 100644
--- a/common/lib/chem/setup.py
+++ b/common/lib/chem/setup.py
@@ -8,6 +8,6 @@ setup(
"pyparsing==2.0.7",
"numpy==1.6.2",
"scipy==0.14.0",
- "nltk==2.0.6",
+ "nltk==3.2.5",
],
)
diff --git a/requirements/edx-sandbox/base.txt b/requirements/edx-sandbox/base.txt
index 5d462a9ad8..75796afe1f 100644
--- a/requirements/edx-sandbox/base.txt
+++ b/requirements/edx-sandbox/base.txt
@@ -8,12 +8,9 @@
setuptools==37.0.0
pip==9.0.1
-
+nltk==3.2.5
numpy==1.6.2
networkx==1.7
sympy==0.7.1
pyparsing==2.0.7
cryptography==1.9
-
-# We forked NLTK just to make it work with setuptools instead of distribute
-git+https://github.com/edx/nltk.git@2.0.6#egg=nltk==2.0.6
diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt
index a476bbedf8..c1eade6ae9 100644
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -79,6 +79,7 @@ Markdown>=2.6,<2.7
mongoengine==0.10.0
MySQL-python==1.2.5
networkx==1.7
+nltk==3.2.5
nose-xunitmp==0.3.2
oauthlib==1.0.3
path.py==8.2.1
diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt
index 2e58a42e0a..539ecd2bb3 100644
--- a/requirements/edx/github.txt
+++ b/requirements/edx/github.txt
@@ -53,7 +53,6 @@ git+https://github.com/jazzband/django-pipeline.git@d068a019169c9de5ee20ece041a6
-e git+https://github.com/edx/django-wiki.git@v0.0.17#egg=django-wiki
git+https://github.com/edx/django-openid-auth.git@0.14#egg=django-openid-auth==0.14
git+https://github.com/edx/MongoDBProxy.git@25b99097615bda06bd7cdfe5669ed80dc2a7fed0#egg=MongoDBProxy==0.1.0
-git+https://github.com/edx/nltk.git@2.0.6#egg=nltk==2.0.6
-e git+https://github.com/dementrock/pystache_custom.git@776973740bdaad83a3b029f96e415a7d1e8bec2f#egg=pystache_custom-dev
-e git+https://github.com/appliedsec/pygeoip.git@95e69341cebf5a6a9fbf7c4f5439d458898bdc3b#egg=pygeoip
-e git+https://github.com/jazkarta/edx-jsme.git@690dbf75441fa91c7c4899df0b83d77f7deb5458#egg=edx-jsme