From fb13dc6499cc8a41fbbd2937b55b3b5ca742bc9c Mon Sep 17 00:00:00 2001
From: Robert Raposa <rraposa@edx.org>
Date: Mon, 2 May 2016 05:04:51 -0400
Subject: [PATCH] Move MakoLinter.

---
 scripts/safe_template_linter.py | 580 ++++++++++++++++----------------
 1 file changed, 290 insertions(+), 290 deletions(-)

diff --git a/scripts/safe_template_linter.py b/scripts/safe_template_linter.py
index 55d9ef2000..460a558a96 100755
--- a/scripts/safe_template_linter.py
+++ b/scripts/safe_template_linter.py
@@ -1407,6 +1407,296 @@ class JavaScriptLinter(BaseLinter):
         return False
 
 
+class PythonLinter(BaseLinter):
+    """
+    The linter for Python files.
+
+    The current implementation of the linter does naive Python parsing. It does
+    not use the parser. One known issue is that parsing errors found inside a
+    docstring need to be disabled, rather than being automatically skipped.
+    Skipping docstrings is an enhancement that could be added.
+    """
+
+    def __init__(self):
+        """
+        Init method.
+        """
+        super(PythonLinter, self).__init__()
+        self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
+
+    def process_file(self, directory, file_name):
+        """
+        Process file to determine if it is a Python file and
+        if it is safe.
+
+        Arguments:
+            directory (string): The directory of the file to be checked
+            file_name (string): A filename for a potential Python file
+
+        Returns:
+            The file results containing any violations.
+
+        """
+        file_full_path = os.path.normpath(directory + '/' + file_name)
+        results = FileResults(file_full_path)
+
+        if not results.is_file:
+            return results
+
+        if file_name.lower().endswith('.py') is False:
+            return results
+
+        # skip this linter code (i.e. safe_template_linter.py)
+        if file_name == os.path.basename(__file__):
+            return results
+
+        if not self._is_valid_directory(self._skip_python_dirs, directory):
+            return results
+
+        return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
+
+    def check_python_file_is_safe(self, file_contents, results):
+        """
+        Checks for violations in a Python file.
+
+        Arguments:
+            file_contents: The contents of the Python file.
+            results: A file results objects to which violations will be added.
+
+        """
+        self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
+        self._check_deprecated_display_name(file_contents, results)
+        self._check_custom_escape(file_contents, results)
+        self._check_html(file_contents, results)
+        results.prepare_results(file_contents, line_comment_delim='#')
+
+    def _check_deprecated_display_name(self, file_contents, results):
+        """
+        Checks that the deprecated display_name_with_default_escaped is not
+        used. Adds violation to results if there is a problem.
+
+        Arguments:
+            file_contents: The contents of the Python file
+            results: A list of results into which violations will be added.
+
+        """
+        for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
+            expression = Expression(match.start(), match.end())
+            results.violations.append(ExpressionRuleViolation(
+                Rules.python_deprecated_display_name, expression
+            ))
+
+    def _check_custom_escape(self, file_contents, results):
+        """
+        Checks for custom escaping calls, rather than using a standard escaping
+        method.
+
+        Arguments:
+            file_contents: The contents of the Python file
+            results: A list of results into which violations will be added.
+
+        """
+        for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
+            expression = Expression(match.start(), match.end())
+            results.violations.append(ExpressionRuleViolation(
+                Rules.python_custom_escape, expression
+            ))
+
+    def _check_html(self, file_contents, results):
+        """
+        Checks many rules related to HTML in a Python file.
+
+        Arguments:
+            file_contents: The contents of the Python file
+            results: A list of results into which violations will be added.
+
+        """
+        # Text() Expressions keyed by its end index
+        text_calls_by_end_index = {}
+        # HTML() Expressions keyed by its end index
+        html_calls_by_end_index = {}
+        start_index = 0
+        while True:
+
+            # check HTML(), Text() and format() calls
+            result = self._check_html_text_format(
+                file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
+            )
+            next_start_index = result['next_start_index']
+            interpolate_end_index = result['interpolate_end_index']
+
+            # check for interpolation including HTML outside of function calls
+            self._check_interpolate_with_html(
+                file_contents, start_index, interpolate_end_index, results
+            )
+
+            # advance the search
+            start_index = next_start_index
+
+            # end if there is nothing left to search
+            if interpolate_end_index is None:
+                break
+
+    def _check_html_text_format(
+            self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
+    ):
+        """
+        Checks for HTML(), Text() and format() calls, and various rules related
+        to these calls.
+
+        Arguments:
+            file_contents: The contents of the Python file
+            start_index: The index at which to begin searching for a function
+                call.
+            text_calls_by_end_index: Text() Expressions keyed by its end index.
+            html_calls_by_end_index: HTML() Expressions keyed by its end index.
+            results: A list of results into which violations will be added.
+
+        Returns:
+            A dict with the following keys:
+                'next_start_index': The start index of the next search for a
+                    function call.
+                'interpolate_end_index': The end index of the next next search
+                    for interpolation with html, or None if the end of file
+                    should be used.
+
+        """
+        # used to find opening of .format(), Text() and HTML() calls
+        regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
+        interpolate_end_index = None
+        end_index = None
+        strings = None
+        html_calls = []
+        while True:
+            # first search for HTML(), Text(), or .format()
+            if end_index is None:
+                function_match = regex_function_open.search(file_contents, start_index)
+            else:
+                function_match = regex_function_open.search(file_contents, start_index, end_index)
+            if function_match is not None:
+                if interpolate_end_index is None:
+                    interpolate_end_index = function_match.start()
+                function_close_result = self._find_closing_char_index(
+                    None, '(', ')', file_contents, start_index=function_match.end(),
+                )
+                if function_close_result is None:
+                    results.violations.append(ExpressionRuleViolation(
+                        Rules.python_parse_error, Expression(function_match.start())
+                    ))
+                else:
+                    expression = Expression(
+                        function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
+                        start_delim=function_match.group(), end_delim=")"
+                    )
+                    # if this an outer most Text(), HTML(), or format() call
+                    if end_index is None:
+                        end_index = expression.end_index
+                        interpolate_end_index = expression.start_index
+                        strings = function_close_result['strings']
+                    if function_match.group() == '.format(':
+                        if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
+                            is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
+                            is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
+                            if is_wrapped_with_text is False and is_wrapped_with_html is False:
+                                results.violations.append(ExpressionRuleViolation(
+                                    Rules.python_requires_html_or_text, expression
+                                ))
+                    else:  # expression is 'HTML(' or 'Text('
+                        # HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
+                        # Generally, format() would be the issue if there is one.
+                        if regex_function_open.search(expression.expression_inner) is not None:
+                            results.violations.append(ExpressionRuleViolation(
+                                Rules.python_close_before_format, expression
+                            ))
+                        if function_match.group() == 'Text(':
+                            text_calls_by_end_index[str(expression.end_index)] = expression
+                        else:  # function_match.group() == 'HTML(':
+                            html_calls_by_end_index[str(expression.end_index)] = expression
+                            html_calls.append(expression)
+
+                start_index = function_match.end()
+            else:
+                break
+
+        # checks strings in the outer most call to ensure they are properly
+        # wrapped with HTML()
+        self._check_format_html_strings_wrapped(strings, html_calls, results)
+
+        # compute where to continue the search
+        if function_match is None and end_index is None:
+            next_start_index = start_index
+        elif end_index is None:
+            next_start_index = function_match.end()
+        else:
+            next_start_index = end_index
+
+        return {
+            'next_start_index': next_start_index,
+            'interpolate_end_index': interpolate_end_index,
+        }
+
+    def _check_format_html_strings_wrapped(self, strings, html_calls, results):
+        """
+        Checks that any string inside a format call that seems to contain HTML
+        is wrapped with a call to HTML().
+
+        Arguments:
+            strings: A list of ParseStrings for each string inside the format()
+                call.
+            html_calls: A list of Expressions representing all of the HTML()
+                calls inside the format() call.
+            results: A list of results into which violations will be added.
+
+        """
+        html_strings = []
+        html_wrapped_strings = []
+        if strings is not None:
+            # find all strings that contain HTML
+            for string in strings:
+                if '<' in string.string:
+                    html_strings.append(string)
+                    # check if HTML string is appropriately wrapped
+                    for html_call in html_calls:
+                        if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
+                            html_wrapped_strings.append(string)
+                            break
+            # loop through all unwrapped strings
+            for unsafe_string in set(html_strings) - set(html_wrapped_strings):
+                unsafe_string_expression = Expression(unsafe_string.start_index)
+                results.violations.append(ExpressionRuleViolation(
+                    Rules.python_wrap_html, unsafe_string_expression
+                ))
+
+    def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
+        """
+        Find interpolations with html that fall outside of any calls to HTML(),
+        Text(), and .format().
+
+        Arguments:
+            file_contents: The contents of the Python file
+            start_index: The index to start the search, or None if nothing to
+                search
+            end_index: The index to end the search, or None if the end of file
+                should be used.
+            results: A list of results into which violations will be added.
+
+        """
+        # used to find interpolation with HTML
+        pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
+        regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
+            pattern_interpolate_html_inner, pattern_interpolate_html_inner
+        ))
+        if end_index is None:
+            interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
+        else:
+            interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
+        for match_html_string in interpolate_string_iter:
+            expression = Expression(match_html_string.start(), match_html_string.end())
+            results.violations.append(ExpressionRuleViolation(
+                Rules.python_interpolate_html, expression
+            ))
+
+
 class MakoTemplateLinter(BaseLinter):
     """
     The linter for Mako template files.
@@ -1968,296 +2258,6 @@ class MakoTemplateLinter(BaseLinter):
         return expressions
 
 
-class PythonLinter(BaseLinter):
-    """
-    The linter for Python files.
-
-    The current implementation of the linter does naive Python parsing. It does
-    not use the parser. One known issue is that parsing errors found inside a
-    docstring need to be disabled, rather than being automatically skipped.
-    Skipping docstrings is an enhancement that could be added.
-    """
-
-    def __init__(self):
-        """
-        Init method.
-        """
-        super(PythonLinter, self).__init__()
-        self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
-
-    def process_file(self, directory, file_name):
-        """
-        Process file to determine if it is a Python file and
-        if it is safe.
-
-        Arguments:
-            directory (string): The directory of the file to be checked
-            file_name (string): A filename for a potential Python file
-
-        Returns:
-            The file results containing any violations.
-
-        """
-        file_full_path = os.path.normpath(directory + '/' + file_name)
-        results = FileResults(file_full_path)
-
-        if not results.is_file:
-            return results
-
-        if file_name.lower().endswith('.py') is False:
-            return results
-
-        # skip this linter code (i.e. safe_template_linter.py)
-        if file_name == os.path.basename(__file__):
-            return results
-
-        if not self._is_valid_directory(self._skip_python_dirs, directory):
-            return results
-
-        return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
-
-    def check_python_file_is_safe(self, file_contents, results):
-        """
-        Checks for violations in a Python file.
-
-        Arguments:
-            file_contents: The contents of the Python file.
-            results: A file results objects to which violations will be added.
-
-        """
-        self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
-        self._check_deprecated_display_name(file_contents, results)
-        self._check_custom_escape(file_contents, results)
-        self._check_html(file_contents, results)
-        results.prepare_results(file_contents, line_comment_delim='#')
-
-    def _check_deprecated_display_name(self, file_contents, results):
-        """
-        Checks that the deprecated display_name_with_default_escaped is not
-        used. Adds violation to results if there is a problem.
-
-        Arguments:
-            file_contents: The contents of the Python file
-            results: A list of results into which violations will be added.
-
-        """
-        for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
-            expression = Expression(match.start(), match.end())
-            results.violations.append(ExpressionRuleViolation(
-                Rules.python_deprecated_display_name, expression
-            ))
-
-    def _check_custom_escape(self, file_contents, results):
-        """
-        Checks for custom escaping calls, rather than using a standard escaping
-        method.
-
-        Arguments:
-            file_contents: The contents of the Python file
-            results: A list of results into which violations will be added.
-
-        """
-        for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
-            expression = Expression(match.start(), match.end())
-            results.violations.append(ExpressionRuleViolation(
-                Rules.python_custom_escape, expression
-            ))
-
-    def _check_html(self, file_contents, results):
-        """
-        Checks many rules related to HTML in a Python file.
-
-        Arguments:
-            file_contents: The contents of the Python file
-            results: A list of results into which violations will be added.
-
-        """
-        # Text() Expressions keyed by its end index
-        text_calls_by_end_index = {}
-        # HTML() Expressions keyed by its end index
-        html_calls_by_end_index = {}
-        start_index = 0
-        while True:
-
-            # check HTML(), Text() and format() calls
-            result = self._check_html_text_format(
-                file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
-            )
-            next_start_index = result['next_start_index']
-            interpolate_end_index = result['interpolate_end_index']
-
-            # check for interpolation including HTML outside of function calls
-            self._check_interpolate_with_html(
-                file_contents, start_index, interpolate_end_index, results
-            )
-
-            # advance the search
-            start_index = next_start_index
-
-            # end if there is nothing left to search
-            if interpolate_end_index is None:
-                break
-
-    def _check_html_text_format(
-            self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
-    ):
-        """
-        Checks for HTML(), Text() and format() calls, and various rules related
-        to these calls.
-
-        Arguments:
-            file_contents: The contents of the Python file
-            start_index: The index at which to begin searching for a function
-                call.
-            text_calls_by_end_index: Text() Expressions keyed by its end index.
-            html_calls_by_end_index: HTML() Expressions keyed by its end index.
-            results: A list of results into which violations will be added.
-
-        Returns:
-            A dict with the following keys:
-                'next_start_index': The start index of the next search for a
-                    function call.
-                'interpolate_end_index': The end index of the next next search
-                    for interpolation with html, or None if the end of file
-                    should be used.
-
-        """
-        # used to find opening of .format(), Text() and HTML() calls
-        regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
-        interpolate_end_index = None
-        end_index = None
-        strings = None
-        html_calls = []
-        while True:
-            # first search for HTML(), Text(), or .format()
-            if end_index is None:
-                function_match = regex_function_open.search(file_contents, start_index)
-            else:
-                function_match = regex_function_open.search(file_contents, start_index, end_index)
-            if function_match is not None:
-                if interpolate_end_index is None:
-                    interpolate_end_index = function_match.start()
-                function_close_result = self._find_closing_char_index(
-                    None, '(', ')', file_contents, start_index=function_match.end(),
-                )
-                if function_close_result is None:
-                    results.violations.append(ExpressionRuleViolation(
-                        Rules.python_parse_error, Expression(function_match.start())
-                    ))
-                else:
-                    expression = Expression(
-                        function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
-                        start_delim=function_match.group(), end_delim=")"
-                    )
-                    # if this an outer most Text(), HTML(), or format() call
-                    if end_index is None:
-                        end_index = expression.end_index
-                        interpolate_end_index = expression.start_index
-                        strings = function_close_result['strings']
-                    if function_match.group() == '.format(':
-                        if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
-                            is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
-                            is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
-                            if is_wrapped_with_text is False and is_wrapped_with_html is False:
-                                results.violations.append(ExpressionRuleViolation(
-                                    Rules.python_requires_html_or_text, expression
-                                ))
-                    else:  # expression is 'HTML(' or 'Text('
-                        # HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
-                        # Generally, format() would be the issue if there is one.
-                        if regex_function_open.search(expression.expression_inner) is not None:
-                            results.violations.append(ExpressionRuleViolation(
-                                Rules.python_close_before_format, expression
-                            ))
-                        if function_match.group() == 'Text(':
-                            text_calls_by_end_index[str(expression.end_index)] = expression
-                        else:  # function_match.group() == 'HTML(':
-                            html_calls_by_end_index[str(expression.end_index)] = expression
-                            html_calls.append(expression)
-
-                start_index = function_match.end()
-            else:
-                break
-
-        # checks strings in the outer most call to ensure they are properly
-        # wrapped with HTML()
-        self._check_format_html_strings_wrapped(strings, html_calls, results)
-
-        # compute where to continue the search
-        if function_match is None and end_index is None:
-            next_start_index = start_index
-        elif end_index is None:
-            next_start_index = function_match.end()
-        else:
-            next_start_index = end_index
-
-        return {
-            'next_start_index': next_start_index,
-            'interpolate_end_index': interpolate_end_index,
-        }
-
-    def _check_format_html_strings_wrapped(self, strings, html_calls, results):
-        """
-        Checks that any string inside a format call that seems to contain HTML
-        is wrapped with a call to HTML().
-
-        Arguments:
-            strings: A list of ParseStrings for each string inside the format()
-                call.
-            html_calls: A list of Expressions representing all of the HTML()
-                calls inside the format() call.
-            results: A list of results into which violations will be added.
-
-        """
-        html_strings = []
-        html_wrapped_strings = []
-        if strings is not None:
-            # find all strings that contain HTML
-            for string in strings:
-                if '<' in string.string:
-                    html_strings.append(string)
-                    # check if HTML string is appropriately wrapped
-                    for html_call in html_calls:
-                        if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
-                            html_wrapped_strings.append(string)
-                            break
-            # loop through all unwrapped strings
-            for unsafe_string in set(html_strings) - set(html_wrapped_strings):
-                unsafe_string_expression = Expression(unsafe_string.start_index)
-                results.violations.append(ExpressionRuleViolation(
-                    Rules.python_wrap_html, unsafe_string_expression
-                ))
-
-    def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
-        """
-        Find interpolations with html that fall outside of any calls to HTML(),
-        Text(), and .format().
-
-        Arguments:
-            file_contents: The contents of the Python file
-            start_index: The index to start the search, or None if nothing to
-                search
-            end_index: The index to end the search, or None if the end of file
-                should be used.
-            results: A list of results into which violations will be added.
-
-        """
-        # used to find interpolation with HTML
-        pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
-        regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
-            pattern_interpolate_html_inner, pattern_interpolate_html_inner
-        ))
-        if end_index is None:
-            interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
-        else:
-            interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
-        for match_html_string in interpolate_string_iter:
-            expression = Expression(match_html_string.start(), match_html_string.end())
-            results.violations.append(ExpressionRuleViolation(
-                Rules.python_interpolate_html, expression
-            ))
-
-
 def _process_file(full_path, template_linters, options, out):
     """
     For each linter, lints the provided file.  This means finding and printing