diff --git a/common/djangoapps/util/password_policy_validators.py b/common/djangoapps/util/password_policy_validators.py index a0d7ee0be9..eaa552722c 100644 --- a/common/djangoapps/util/password_policy_validators.py +++ b/common/djangoapps/util/password_policy_validators.py @@ -9,7 +9,7 @@ from __future__ import division import string -import nltk +from nltk.metrics.distance import edit_distance from django.conf import settings from django.core.exceptions import ValidationError from django.utils.translation import ugettext_lazy as _ @@ -106,6 +106,6 @@ def validate_password_dictionary(value): if password_max_edit_distance and password_dictionary: for word in password_dictionary: - distance = nltk.metrics.distance.edit_distance(value, word) + distance = edit_distance(value, word) if distance <= password_max_edit_distance: raise ValidationError(_("Too similar to a restricted dictionary word."), code="dictionary_word") diff --git a/common/lib/capa/capa/tests/test_inputtypes.py b/common/lib/capa/capa/tests/test_inputtypes.py index 2e8ae0f32c..f7ef3fbdc6 100644 --- a/common/lib/capa/capa/tests/test_inputtypes.py +++ b/common/lib/capa/capa/tests/test_inputtypes.py @@ -51,9 +51,9 @@ def quote_attr(s): class OptionInputTest(unittest.TestCase): - ''' + """ Make sure option inputs work - ''' + """ def test_rendering(self): xml_str = """""" @@ -89,7 +89,9 @@ class OptionInputTest(unittest.TestCase): f = inputtypes.OptionInput.parse_options def check(input, options): - """Take list of options, confirm that output is in the silly doubled format""" + """ + Take list of options, confirm that output is in the silly doubled format + """ expected = [(o, o) for o in options] self.assertEqual(f(input), expected) @@ -108,9 +110,9 @@ class OptionInputTest(unittest.TestCase): class ChoiceGroupTest(unittest.TestCase): - ''' + """ Test choice groups, radio groups, and checkbox groups - ''' + """ def check_group(self, tag, expected_input_type, expected_suffix): xml_str = """ @@ -248,9 +250,9 @@ class JSInputTest(unittest.TestCase): class TextLineTest(unittest.TestCase): - ''' + """ Check that textline inputs work, with and without math. - ''' + """ def test_rendering(self): size = "42" @@ -369,9 +371,9 @@ class TextLineTest(unittest.TestCase): class FileSubmissionTest(unittest.TestCase): - ''' + """ Check that file submission inputs work - ''' + """ def test_rendering(self): allowed_files = "runme.py nooooo.rb ohai.java" @@ -413,9 +415,9 @@ class FileSubmissionTest(unittest.TestCase): class CodeInputTest(unittest.TestCase): - ''' + """ Check that codeinput inputs work - ''' + """ def test_rendering(self): mode = "parrot" @@ -434,8 +436,6 @@ class CodeInputTest(unittest.TestCase): element = etree.fromstring(xml_str) - escapedict = {'"': '"'} - state = { 'value': 'print "good evening"', 'status': 'incomplete', @@ -471,9 +471,9 @@ class CodeInputTest(unittest.TestCase): class MatlabTest(unittest.TestCase): - ''' + """ Test Matlab input types - ''' + """ def setUp(self): super(MatlabTest, self).setUp() self.rows = '10' @@ -921,10 +921,9 @@ def html_tree_equal(received, expected): class SchematicTest(unittest.TestCase): - ''' + """ Check that schematic inputs work - ''' - + """ def test_rendering(self): height = '12' width = '33' @@ -977,10 +976,9 @@ class SchematicTest(unittest.TestCase): class ImageInputTest(unittest.TestCase): - ''' + """ Check that image inputs work - ''' - + """ def check(self, value, egx, egy): height = '78' width = '427' @@ -1037,10 +1035,9 @@ class ImageInputTest(unittest.TestCase): class CrystallographyTest(unittest.TestCase): - ''' + """ Check that crystallography inputs work - ''' - + """ def test_rendering(self): height = '12' width = '33' @@ -1079,10 +1076,9 @@ class CrystallographyTest(unittest.TestCase): class VseprTest(unittest.TestCase): - ''' + """ Check that vsepr inputs work - ''' - + """ def test_rendering(self): height = '12' width = '33' @@ -1127,9 +1123,9 @@ class VseprTest(unittest.TestCase): class ChemicalEquationTest(unittest.TestCase): - ''' + """ Check that chemical equation inputs work. - ''' + """ def setUp(self): super(ChemicalEquationTest, self).setUp() self.size = "42" @@ -1144,7 +1140,9 @@ class ChemicalEquationTest(unittest.TestCase): self.the_input = lookup_tag('chemicalequationinput')(test_capa_system(), element, state) def test_rendering(self): - ''' Verify that the render context matches the expected render context''' + """ + Verify that the render context matches the expected render context + """ context = self.the_input._get_render_context() # pylint: disable=protected-access prob_id = 'prob_1_2' expected = { @@ -1161,7 +1159,9 @@ class ChemicalEquationTest(unittest.TestCase): self.assertEqual(context, expected) def test_chemcalc_ajax_sucess(self): - ''' Verify that using the correct dispatch and valid data produces a valid response''' + """ + Verify that using the correct dispatch and valid data produces a valid response + """ data = {'formula': "H"} response = self.the_input.handle_ajax("preview_chemcalc", data) @@ -1366,10 +1366,9 @@ class FormulaEquationTest(unittest.TestCase): class DragAndDropTest(unittest.TestCase): - ''' + """ Check that drag and drop inputs work - ''' - + """ def test_rendering(self): path_to_images = '/dummy-static/images/' @@ -1441,9 +1440,9 @@ class DragAndDropTest(unittest.TestCase): class AnnotationInputTest(unittest.TestCase): - ''' + """ Make sure option inputs work - ''' + """ def test_rendering(self): xml_str = ''' diff --git a/common/lib/chem/chem/chemcalc.py b/common/lib/chem/chem/chemcalc.py index d21d420359..dd4abefed2 100644 --- a/common/lib/chem/chem/chemcalc.py +++ b/common/lib/chem/chem/chemcalc.py @@ -8,7 +8,7 @@ from pyparsing import Literal, OneOrMore, ParseException, StringEnd ARROWS = ('<->', '->') -## Defines a simple pyparsing tokenizer for chemical equations +# Defines a simple pyparsing tokenizer for chemical equations elements = ['Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Cn', 'Co', 'Cr', 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', @@ -30,7 +30,8 @@ tokenizer = OneOrMore(tokens) + StringEnd() def _orjoin(l): return "'" + "' | '".join(l) + "'" -## Defines an NLTK parser for tokenized expressions + +# Defines an NLTK parser for tokenized expressions grammar = """ S -> multimolecule | multimolecule '+' S multimolecule -> count molecule | molecule @@ -52,16 +53,19 @@ grammar = """ suffixed -> unsuffixed | unsuffixed suffix """ -parser = nltk.ChartParser(nltk.parse_cfg(grammar)) +parser = nltk.ChartParser(nltk.CFG.fromstring(grammar)) def _clean_parse_tree(tree): - ''' The parse tree contains a lot of redundant + """ + The parse tree contains a lot of redundant nodes. E.g. paren_groups have groups as children, etc. This will clean up the tree. - ''' + """ def unparse_number(n): - ''' Go from a number parse tree to a number ''' + """ + Go from a number parse tree to a number + """ if len(n) == 1: rv = n[0][0] else: @@ -69,19 +73,22 @@ def _clean_parse_tree(tree): return rv def null_tag(n): - ''' Remove a tag ''' + """ + Remove a tag + """ return n[0] def ion_suffix(n): - '''1. "if" part handles special case - 2. "else" part is general behaviour ''' - - if n[1:][0].node == 'number' and n[1:][0][0][0] == '1': + """ + 1. "if" part handles special case + 2. "else" part is general behaviour + """ + if n[1:][0].label() == 'number' and n[1:][0][0][0] == '1': # if suffix is explicitly 1, like ^1- # strip 1, leave only sign: ^- - return nltk.tree.Tree(n.node, n[2:]) + return nltk.tree.Tree(n.label(), n[2:]) else: - return nltk.tree.Tree(n.node, n[1:]) + return nltk.tree.Tree(n.label(), n[1:]) dispatch = {'number': lambda x: nltk.tree.Tree("number", [unparse_number(x)]), 'unphased': null_tag, @@ -89,40 +96,38 @@ def _clean_parse_tree(tree): 'number_suffix': lambda x: nltk.tree.Tree('number_suffix', [unparse_number(x[0])]), 'suffixed': lambda x: len(x) > 1 and x or x[0], 'ion_suffix': ion_suffix, - 'paren_group_square': lambda x: nltk.tree.Tree(x.node, x[1]), - 'paren_group_round': lambda x: nltk.tree.Tree(x.node, x[1])} + 'paren_group_square': lambda x: nltk.tree.Tree(x.label(), x[1]), + 'paren_group_round': lambda x: nltk.tree.Tree(x.label(), x[1])} if isinstance(tree, str): return tree old_node = None - ## This loop means that if a node is processed, and returns a child, - ## the child will be processed. - while tree.node in dispatch and tree.node != old_node: - old_node = tree.node - tree = dispatch[tree.node](tree) + # This loop means that if a node is processed, and returns a child, + # the child will be processed. + while tree.label() in dispatch and tree.label() != old_node: + old_node = tree.label() + tree = dispatch[tree.label()](tree) children = [] for child in tree: child = _clean_parse_tree(child) children.append(child) - tree = nltk.tree.Tree(tree.node, children) + tree = nltk.tree.Tree(tree.label(), children) return tree def _merge_children(tree, tags): - ''' nltk, by documentation, cannot do arbitrary length - groups. Instead of: - (group 1 2 3 4) - It has to handle this recursively: - (group 1 (group 2 (group 3 (group 4)))) + """ + nltk, by documentation, cannot do arbitrary length groups. + Instead of: (group 1 2 3 4) + It has to handle this recursively: (group 1 (group 2 (group 3 (group 4)))) We do the cleanup of converting from the latter to the former. - ''' + """ if tree is None: # There was a problem--shouldn't have empty trees (NOTE: see this with input e.g. 'H2O(', or 'Xe+'). - # Haven't grokked the code to tell if this is indeed the right thing to do. raise ParseException("Shouldn't have empty trees") if isinstance(tree, str): @@ -130,32 +135,31 @@ def _merge_children(tree, tags): merged_children = [] done = False - #print '00000', tree - ## Merge current tag + + # Merge current tag while not done: done = True for child in tree: - if isinstance(child, nltk.tree.Tree) and child.node == tree.node and tree.node in tags: + if isinstance(child, nltk.tree.Tree) and child.label() == tree.label() and tree.label() in tags: merged_children = merged_children + list(child) done = False else: merged_children = merged_children + [child] - tree = nltk.tree.Tree(tree.node, merged_children) + tree = nltk.tree.Tree(tree.label(), merged_children) merged_children = [] - #print '======',tree # And recurse children = [] for child in tree: children.append(_merge_children(child, tags)) - #return tree - return nltk.tree.Tree(tree.node, children) + return nltk.tree.Tree(tree.label(), children) def _render_to_html(tree): - ''' Renders a cleaned tree to HTML ''' - + """ + Renders a cleaned tree to HTML + """ def molecule_count(tree, children): # If an integer, return that integer if len(tree) == 1: @@ -187,24 +191,28 @@ def _render_to_html(tree): return tree else: children = "".join(map(_render_to_html, tree)) - if tree.node in dispatch: - return dispatch[tree.node](tree, children) + if tree.label() in dispatch: + return dispatch[tree.label()](tree, children) else: return children.replace(' ', '') def render_to_html(eq): - ''' + """ Render a chemical equation string to html. Renders each molecule separately, and returns invalid input wrapped in a . - ''' + """ def err(s): - "Render as an error span" + """ + Render as an error span + """ return '{0}'.format(s) def render_arrow(arrow): - """Turn text arrows into pretty ones""" + """ + Turn text arrows into pretty ones + """ if arrow == '->': return u'\u2192' if arrow == '<->': @@ -235,20 +243,26 @@ def render_to_html(eq): def _get_final_tree(s): - ''' + """ Return final tree after merge and clean. Raises pyparsing.ParseException if s is invalid. - ''' - tokenized = tokenizer.parseString(s) - parsed = parser.parse(tokenized) - merged = _merge_children(parsed, {'S', 'group'}) - final = _clean_parse_tree(merged) - return final + """ + try: + tokenized = tokenizer.parseString(s) + parsed = parser.parse(tokenized) + merged = _merge_children(parsed.next(), {'S', 'group'}) + final = _clean_parse_tree(merged) + return final + except StopIteration: + # This happens with an empty tree- see this with input e.g. 'H2O(', or 'Xe+'). + raise ParseException("Shouldn't have empty trees") def _check_equality(tuple1, tuple2): - ''' return True if tuples of multimolecules are equal ''' + """ + return True if tuples of multimolecules are equal + """ list1 = list(tuple1) list2 = list(tuple2) @@ -264,14 +278,16 @@ def _check_equality(tuple1, tuple2): def compare_chemical_expression(s1, s2, ignore_state=False): - ''' It does comparison between two expressions. - It uses divide_chemical_expression and check if division is 1 - ''' + """ + It does comparison between two expressions. + It uses divide_chemical_expression and check if division is 1 + """ return divide_chemical_expression(s1, s2, ignore_state) == 1 def divide_chemical_expression(s1, s2, ignore_state=False): - '''Compare two chemical expressions for equivalence up to a multiplicative factor: + """ + Compare two chemical expressions for equivalence up to a multiplicative factor: - If they are not the same chemicals, returns False. - If they are the same, "divide" s1 by s2 to returns a factor x such that s1 / s2 == x as a Fraction object. @@ -290,12 +306,13 @@ def divide_chemical_expression(s1, s2, ignore_state=False): for equality of every element in list, - return result of factor division - ''' + """ # parsed final trees - treedic = {} - treedic['1'] = _get_final_tree(s1) - treedic['2'] = _get_final_tree(s2) + treedic = { + '1': _get_final_tree(s1), + '2': _get_final_tree(s2) + } # strip phases and factors # collect factors in list @@ -303,10 +320,10 @@ def divide_chemical_expression(s1, s2, ignore_state=False): treedic[i + ' cleaned_mm_list'] = [] treedic[i + ' factors'] = [] treedic[i + ' phases'] = [] - for el in treedic[i].subtrees(filter=lambda t: t.node == 'multimolecule'): - count_subtree = [t for t in el.subtrees() if t.node == 'count'] - group_subtree = [t for t in el.subtrees() if t.node == 'group'] - phase_subtree = [t for t in el.subtrees() if t.node == 'phase'] + for el in treedic[i].subtrees(filter=lambda t: t.label() == 'multimolecule'): + count_subtree = [t for t in el.subtrees() if t.label() == 'count'] + group_subtree = [t for t in el.subtrees() if t.label() == 'group'] + phase_subtree = [t for t in el.subtrees() if t.label() == 'phase'] if count_subtree: if len(count_subtree[0]) > 1: treedic[i + ' factors'].append( diff --git a/common/lib/chem/setup.py b/common/lib/chem/setup.py index b53a3f7c43..382a2f89e4 100644 --- a/common/lib/chem/setup.py +++ b/common/lib/chem/setup.py @@ -8,6 +8,6 @@ setup( "pyparsing==2.0.7", "numpy==1.6.2", "scipy==0.14.0", - "nltk==2.0.6", + "nltk==3.2.5", ], ) diff --git a/requirements/edx-sandbox/base.txt b/requirements/edx-sandbox/base.txt index 5d462a9ad8..75796afe1f 100644 --- a/requirements/edx-sandbox/base.txt +++ b/requirements/edx-sandbox/base.txt @@ -8,12 +8,9 @@ setuptools==37.0.0 pip==9.0.1 - +nltk==3.2.5 numpy==1.6.2 networkx==1.7 sympy==0.7.1 pyparsing==2.0.7 cryptography==1.9 - -# We forked NLTK just to make it work with setuptools instead of distribute -git+https://github.com/edx/nltk.git@2.0.6#egg=nltk==2.0.6 diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index a476bbedf8..c1eade6ae9 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -79,6 +79,7 @@ Markdown>=2.6,<2.7 mongoengine==0.10.0 MySQL-python==1.2.5 networkx==1.7 +nltk==3.2.5 nose-xunitmp==0.3.2 oauthlib==1.0.3 path.py==8.2.1 diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index 2e58a42e0a..539ecd2bb3 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -53,7 +53,6 @@ git+https://github.com/jazzband/django-pipeline.git@d068a019169c9de5ee20ece041a6 -e git+https://github.com/edx/django-wiki.git@v0.0.17#egg=django-wiki git+https://github.com/edx/django-openid-auth.git@0.14#egg=django-openid-auth==0.14 git+https://github.com/edx/MongoDBProxy.git@25b99097615bda06bd7cdfe5669ed80dc2a7fed0#egg=MongoDBProxy==0.1.0 -git+https://github.com/edx/nltk.git@2.0.6#egg=nltk==2.0.6 -e git+https://github.com/dementrock/pystache_custom.git@776973740bdaad83a3b029f96e415a7d1e8bec2f#egg=pystache_custom-dev -e git+https://github.com/appliedsec/pygeoip.git@95e69341cebf5a6a9fbf7c4f5439d458898bdc3b#egg=pygeoip -e git+https://github.com/jazkarta/edx-jsme.git@690dbf75441fa91c7c4899df0b83d77f7deb5458#egg=edx-jsme