diff --git a/pavelib/quality.py b/pavelib/quality.py index 1fe36f40fa..aaaf7b809a 100644 --- a/pavelib/quality.py +++ b/pavelib/quality.py @@ -409,7 +409,7 @@ def run_stylelint(options): @timed def run_xsslint(options): """ - Runs xss_linter.py on the codebase + Runs xsslint/xss_linter.py on the codebase """ thresholds_option = getattr(options, 'thresholds', '{}') @@ -434,7 +434,7 @@ def run_xsslint(options): _prepare_report_dir(xsslint_report_dir) sh( - "{repo_root}/scripts/{xsslint_script} --rule-totals >> {xsslint_report}".format( + "{repo_root}/scripts/xsslint/{xsslint_script} --rule-totals >> {xsslint_report}".format( repo_root=Env.REPO_ROOT, xsslint_script=xsslint_script, xsslint_report=xsslint_report, diff --git a/scripts/xss-commit-linter.sh b/scripts/xss-commit-linter.sh index 2a1dce4b9a..8941760b57 100755 --- a/scripts/xss-commit-linter.sh +++ b/scripts/xss-commit-linter.sh @@ -5,7 +5,7 @@ set -e # # xss-commit-linter.sh # -# Executes xss_linter.py on the set of files in a particular git commit. +# Executes xsslint/xss_linter.py on the set of files in a particular git commit. # ############################################################################### @@ -82,6 +82,6 @@ else for f in $diff_files; do echo "" echo "Linting $f:" - ./scripts/xss_linter.py $f + ./scripts/xsslint/xss_linter.py $f done fi diff --git a/scripts/tests/__init__.py b/scripts/xsslint/tests/__init__.py similarity index 100% rename from scripts/tests/__init__.py rename to scripts/xsslint/tests/__init__.py diff --git a/scripts/tests/templates/test.coffee b/scripts/xsslint/tests/templates/test.coffee similarity index 100% rename from scripts/tests/templates/test.coffee rename to scripts/xsslint/tests/templates/test.coffee diff --git a/scripts/tests/templates/test.html b/scripts/xsslint/tests/templates/test.html similarity index 100% rename from scripts/tests/templates/test.html rename to scripts/xsslint/tests/templates/test.html diff --git a/scripts/tests/templates/test.js b/scripts/xsslint/tests/templates/test.js similarity index 100% rename from scripts/tests/templates/test.js rename to scripts/xsslint/tests/templates/test.js diff --git a/scripts/tests/templates/test.py b/scripts/xsslint/tests/templates/test.py similarity index 100% rename from scripts/tests/templates/test.py rename to scripts/xsslint/tests/templates/test.py diff --git a/scripts/tests/templates/test.underscore b/scripts/xsslint/tests/templates/test.underscore similarity index 100% rename from scripts/tests/templates/test.underscore rename to scripts/xsslint/tests/templates/test.underscore diff --git a/scripts/tests/test_xss_linter.py b/scripts/xsslint/tests/test_linters.py similarity index 85% rename from scripts/tests/test_xss_linter.py rename to scripts/xsslint/tests/test_linters.py index 9f592652f2..ccc4b81329 100644 --- a/scripts/tests/test_xss_linter.py +++ b/scripts/xsslint/tests/test_linters.py @@ -1,66 +1,18 @@ # -*- coding: utf-8 -*- """ -Tests for xss_linter.py +Tests for linters.py """ -import re +from __future__ import print_function + import textwrap -from StringIO import StringIO from unittest import TestCase -import mock from ddt import data, ddt -from scripts.xss_linter import ( - FileResults, - JavaScriptLinter, - MakoTemplateLinter, - ParseString, - PythonLinter, - Rules, - StringLines, - SummaryResults, - UnderscoreTemplateLinter, - _lint -) - - -@ddt -class TestStringLines(TestCase): - """ - Test StringLines class. - """ - @data( - {'string': 'test', 'index': 0, 'line_start_index': 0, 'line_end_index': 4}, - {'string': 'test', 'index': 2, 'line_start_index': 0, 'line_end_index': 4}, - {'string': 'test', 'index': 3, 'line_start_index': 0, 'line_end_index': 4}, - {'string': '\ntest', 'index': 0, 'line_start_index': 0, 'line_end_index': 1}, - {'string': '\ntest', 'index': 2, 'line_start_index': 1, 'line_end_index': 5}, - {'string': '\ntest\n', 'index': 0, 'line_start_index': 0, 'line_end_index': 1}, - {'string': '\ntest\n', 'index': 2, 'line_start_index': 1, 'line_end_index': 6}, - {'string': '\ntest\n', 'index': 6, 'line_start_index': 6, 'line_end_index': 6}, - ) - def test_string_lines_start_end_index(self, data): - """ - Test StringLines index_to_line_start_index and index_to_line_end_index. - """ - lines = StringLines(data['string']) - self.assertEqual(lines.index_to_line_start_index(data['index']), data['line_start_index']) - self.assertEqual(lines.index_to_line_end_index(data['index']), data['line_end_index']) - - @data( - {'string': 'test', 'line_number': 1, 'line': 'test'}, - {'string': '\ntest', 'line_number': 1, 'line': ''}, - {'string': '\ntest', 'line_number': 2, 'line': 'test'}, - {'string': '\ntest\n', 'line_number': 1, 'line': ''}, - {'string': '\ntest\n', 'line_number': 2, 'line': 'test'}, - {'string': '\ntest\n', 'line_number': 3, 'line': ''}, - ) - def test_string_lines_start_end_index(self, data): - """ - Test line_number_to_line. - """ - lines = StringLines(data['string']) - self.assertEqual(lines.line_number_to_line(data['line_number']), data['line']) +from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter +from xsslint.reporting import FileResults +from xsslint.rules import Rules +from xsslint.utils import ParseString class TestLinter(TestCase): @@ -93,168 +45,678 @@ class TestLinter(TestCase): self.assertEqual(violation.rule, rule) -class TestXSSLinter(TestCase): +@ddt +class TestUnderscoreTemplateLinter(TestLinter): """ - Test some top-level linter functions + Test UnderscoreTemplateLinter """ - def setUp(self): + def test_check_underscore_file_is_safe(self): """ - Setup patches on linters for testing. + Test check_underscore_file_is_safe with safe template """ - self.patch_is_valid_directory(MakoTemplateLinter) - self.patch_is_valid_directory(JavaScriptLinter) - self.patch_is_valid_directory(UnderscoreTemplateLinter) - self.patch_is_valid_directory(PythonLinter) + linter = UnderscoreTemplateLinter() + results = FileResults('') - patcher = mock.patch('scripts.xss_linter.is_skip_dir', return_value=False) - patcher.start() - self.addCleanup(patcher.stop) + template = textwrap.dedent(""" + <%- gettext('Single Line') %> - def patch_is_valid_directory(self, linter_class): + <%- + gettext('Multiple Lines') + %> + """) + + linter.check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 0) + + def test_check_underscore_file_is_not_safe(self): """ - Creates a mock patch for _is_valid_directory on a Linter to always - return true. This avoids nested patch calls. - - Arguments: - linter_class: The linter class to be patched + Test check_underscore_file_is_safe with unsafe template """ - patcher = mock.patch.object(linter_class, '_is_valid_directory', return_value=True) - patch_start = patcher.start() - self.addCleanup(patcher.stop) - return patch_start + linter = UnderscoreTemplateLinter() + results = FileResults('') - def test_lint_defaults(self): + template = textwrap.dedent(""" + <%= gettext('Single Line') %> + + <%= + gettext('Multiple Lines') + %> + """) + + linter.check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 2) + self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped) + self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped) + + @data( + { + 'template': + '<% // xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True], + }, + { + 'template': + '<% // xss-lint: disable=another-rule,underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True], + }, + { + 'template': + '<% // xss-lint: disable=another-rule %>\n' + '<%= message %>', + 'is_disabled': [False], + }, + { + 'template': + '<% // xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '// This test does not use proper Underscore.js Template syntax\n' + '// But, it is just testing that a maximum of 5 non-whitespace\n' + '// are used to designate start of line for disabling the next line.\n' + ' 1 2 3 4 5 xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>\n' + ' 1 2 3 4 5 6 xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '<%= message %><% // xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '<%= message %>\n' + '<% // xss-lint: disable=underscore-not-escaped %>', + 'is_disabled': [False], + }, + ) + def test_check_underscore_file_disable_rule(self, data): """ - Tests the top-level linting with default options. + Test check_underscore_file_is_safe with various disabled pragmas """ - out = StringIO() - summary_results = SummaryResults() + linter = UnderscoreTemplateLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': False, - 'verbose': False, - 'rule_totals': False, - }, - summary_results=summary_results, - out=out, - ) + linter.check_underscore_file_is_safe(data['template'], results) - output = out.getvalue() - # Assert violation details are displayed. - self.assertIsNotNone(re.search('test\.html.*{}'.format(Rules.mako_missing_default.rule_id), output)) - self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.javascript_concat_html.rule_id), output)) - self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.underscore_not_escaped.rule_id), output)) - self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.javascript_concat_html.rule_id), output)) - self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.underscore_not_escaped.rule_id), output)) - lines_with_rule = 0 - lines_without_rule = 0 # Output with verbose setting only. - for underscore_match in re.finditer('test\.underscore:.*\n', output): - if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None: - lines_with_rule += 1 - else: - lines_without_rule += 1 - self.assertGreaterEqual(lines_with_rule, 1) - self.assertEquals(lines_without_rule, 0) - self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_parse_error.rule_id), output)) - self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) - # Assert no rule totals. - self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - # Assert final total - self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + violation_count = len(data['is_disabled']) + self.assertEqual(len(results.violations), violation_count) + for index in range(0, violation_count - 1): + self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index]) - def test_lint_with_verbose(self): + def test_check_underscore_file_disables_one_violation(self): """ - Tests the top-level linting with verbose option. + Test check_underscore_file_is_safe with disabled before a line only + disables for the violation following """ - out = StringIO() - summary_results = SummaryResults() + linter = UnderscoreTemplateLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': False, - 'verbose': True, - 'rule_totals': False, - }, - summary_results=summary_results, - out=out, - ) + template = textwrap.dedent(""" + <% // xss-lint: disable=underscore-not-escaped %> + <%= message %> + <%= message %> + """) - output = out.getvalue() - lines_with_rule = 0 - lines_without_rule = 0 # Output with verbose setting only. - for underscore_match in re.finditer('test\.underscore:.*\n', output): - if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None: - lines_with_rule += 1 - else: - lines_without_rule += 1 - self.assertGreaterEqual(lines_with_rule, 1) - self.assertGreaterEqual(lines_without_rule, 1) - # Assert no rule totals. - self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - # Assert final total - self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + linter.check_underscore_file_is_safe(template, results) - def test_lint_with_rule_totals(self): + self.assertEqual(len(results.violations), 2) + self.assertEqual(results.violations[0].is_disabled, True) + self.assertEqual(results.violations[1].is_disabled, False) + + @data( + {'template': '<%= HtmlUtils.ensureHtml(message) %>'}, + {'template': '<%= _.escape(message) %>'}, + ) + def test_check_underscore_no_escape_allowed(self, data): """ - Tests the top-level linting with rule totals option. + Test check_underscore_file_is_safe with expressions that are allowed + without escaping because the internal calls properly escape. """ - out = StringIO() - summary_results = SummaryResults() + linter = UnderscoreTemplateLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': False, - 'verbose': False, - 'rule_totals': True, - }, - summary_results=summary_results, - out=out, - ) + linter.check_underscore_file_is_safe(data['template'], results) - output = out.getvalue() - self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) + self.assertEqual(len(results.violations), 0) - # Assert totals output. - self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_wrap_html.rule_id, 1), output)) - self.assertIsNotNone(re.search('{} violations total'.format(7), output)) - def test_lint_with_list_files(self): +@ddt +class TestJavaScriptLinter(TestLinter): + """ + Test JavaScriptLinter + """ + @data( + {'template': 'var m = "Plain text " + message + "plain text"', 'rule': None}, + {'template': 'var m = "檌檒濦 " + message + "plain text"', 'rule': None}, + { + 'template': + ("""$email_header.append($('', type: "button", name: "copy-email-body-text",""" + """ value: gettext("Copy Email To Editor"), id: 'copy_email_' + email_id))"""), + 'rule': None + }, + {'template': 'var m = "
" + message + "
"', 'rule': Rules.javascript_concat_html}, + { + 'template': r'var m = "\"escaped quote\"" + message + "\"escaped quote\"
"', + 'rule': Rules.javascript_concat_html + }, + {'template': ' // var m = "" + commentedOutMessage + "
"', 'rule': None}, + {'template': 'var m = "" + message + "
"', 'rule': Rules.javascript_concat_html}, + {'template': 'var m = "" + message + " broken string', 'rule': Rules.javascript_concat_html}, + ) + def test_concat_with_html(self, data): """ - Tests the top-level linting with list files option. + Test check_javascript_file_is_safe with concatenating strings and HTML """ - out = StringIO() - summary_results = SummaryResults() + linter = JavaScriptLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': True, - 'verbose': False, - 'rule_totals': False, - }, - summary_results=summary_results, - out=out, - ) + linter.check_javascript_file_is_safe(data['template'], results) + self._validate_data_rules(data, results) - output = out.getvalue() - # Assert file with rule is not output. - self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) - # Assert file is output. - self.assertIsNotNone(re.search('test\.py', output)) + @data( + {'template': 'test.append( test.render().el )', 'rule': None}, + {'template': 'test.append(test.render().el)', 'rule': None}, + {'template': 'test.append(test.render().$el)', 'rule': None}, + {'template': 'test.append(testEl)', 'rule': None}, + {'template': 'test.append($test)', 'rule': None}, + # plain text is ok because any & will be escaped, and it stops false + # negatives on some other objects with an append() method + {'template': 'test.append("plain text")', 'rule': None}, + {'template': 'test.append("
")', 'rule': Rules.javascript_jquery_append}, + {'template': 'graph.svg.append("g")', 'rule': None}, + {'template': 'test.append( $( "" + commentedOutMessage + "
"', 'rule': None}, + {'template': 'm = "" + message + "
"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, + {'template': 'm = "" + message + "
"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, + {'template': 'm = " " + message + " broken string', 'rule': Rules.python_parse_error},
+ )
+ def test_concat_with_html(self, data):
+ """
+ Test check_python_file_is_safe with concatenating strings and HTML
+ """
+ linter = PythonLinter()
+ results = FileResults('')
+
+ linter.check_python_file_is_safe(data['template'], results)
+
+ self._validate_data_rules(data, results)
+
+ def test_check_python_expression_display_name(self):
+ """
+ Test _check_python_file_is_safe with display_name_with_default_escaped
+ fails.
+ """
+ linter = PythonLinter()
+ results = FileResults('')
+
+ python_file = textwrap.dedent("""
+ context = {
+ 'display_name': self.display_name_with_default_escaped,
+ }
+ """)
+
+ linter.check_python_file_is_safe(python_file, results)
+
+ self.assertEqual(len(results.violations), 1)
+ self.assertEqual(results.violations[0].rule, Rules.python_deprecated_display_name)
+
+ def test_check_custom_escaping(self):
+ """
+ Test _check_python_file_is_safe fails when custom escapins is used.
+ """
+ linter = PythonLinter()
+ results = FileResults('')
+
+ python_file = textwrap.dedent("""
+ msg = mmlans.replace('<', '<')
+ """)
+
+ linter.check_python_file_is_safe(python_file, results)
+
+ self.assertEqual(len(results.violations), 1)
+ self.assertEqual(results.violations[0].rule, Rules.python_custom_escape)
+
+ @data(
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = Text("Mixed {span_start}text{span_end}").format(
+ span_start=HTML(""),
+ span_end=HTML(""),
+ )
+ """),
+ 'rule': None
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = "Mixed {span_start}text{span_end}".format(
+ span_start=HTML(""),
+ span_end=HTML(""),
+ )
+ """),
+ 'rule': Rules.python_requires_html_or_text
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = "Mixed {span_start}{text}{span_end}".format(
+ span_start=HTML(""),
+ text=Text("This should still break."),
+ span_end=HTML(""),
+ )
+ """),
+ 'rule': Rules.python_requires_html_or_text
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = Text("Mixed {link_start}text{link_end}".format(
+ link_start=HTML("").format(url),
+ link_end=HTML(""),
+ ))
+ """),
+ 'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text]
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = Text("Mixed {link_start}text{link_end}").format(
+ link_start=HTML("".format(url)),
+ link_end=HTML(""),
+ )
+ """),
+ 'rule': Rules.python_close_before_format
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = Text("Mixed {link_start}text{link_end}".format(
+ link_start=HTML("".format(HTML(''))),
+ link_end=HTML(""),
+ ))
+ """),
+ 'rule':
+ [
+ Rules.python_close_before_format,
+ Rules.python_requires_html_or_text,
+ Rules.python_close_before_format,
+ Rules.python_requires_html_or_text
+ ]
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = "Mixed {span_start}text{span_end}".format(
+ span_start="",
+ span_end="",
+ )
+ """),
+ 'rule': [Rules.python_wrap_html, Rules.python_wrap_html]
+ },
+ {
+ 'python':
+ textwrap.dedent("""
+ msg = Text(_("String with multiple lines "
+ "{link_start}unenroll{link_end} "
+ "and final line")).format(
+ link_start=HTML(
+ ''
+ ).format(
+ # Line comment with ' to throw off parser
+ course_id=course_overview.id
+ ),
+ link_end=HTML(''),
+ )
+ """),
+ 'rule': None
+ },
+ {
+ 'python': "msg = ''",
+ 'rule': None
+ },
+ {
+ 'python': "msg = HTML('')",
+ 'rule': None
+ },
+ {
+ 'python': r"""msg = ''.format(message)""",
+ 'rule': Rules.python_wrap_html
+ },
+ {
+ 'python': r"""r'regex with {} and named group(?P
" + message + "
"', 'rule': Rules.javascript_concat_html}, - { - 'template': r'var m = "\"escaped quote\"" + message + "\"escaped quote\"
"', - 'rule': Rules.javascript_concat_html - }, - {'template': ' // var m = "" + commentedOutMessage + "
"', 'rule': None}, - {'template': 'var m = "" + message + "
"', 'rule': Rules.javascript_concat_html}, - {'template': 'var m = "" + message + " broken string', 'rule': Rules.javascript_concat_html}, - ) - def test_concat_with_html(self, data): - """ - Test check_javascript_file_is_safe with concatenating strings and HTML - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - self._validate_data_rules(data, results) - - @data( - {'template': 'test.append( test.render().el )', 'rule': None}, - {'template': 'test.append(test.render().el)', 'rule': None}, - {'template': 'test.append(test.render().$el)', 'rule': None}, - {'template': 'test.append(testEl)', 'rule': None}, - {'template': 'test.append($test)', 'rule': None}, - # plain text is ok because any & will be escaped, and it stops false - # negatives on some other objects with an append() method - {'template': 'test.append("plain text")', 'rule': None}, - {'template': 'test.append("
")', 'rule': Rules.javascript_jquery_append}, - {'template': 'graph.svg.append("g")', 'rule': None}, - {'template': 'test.append( $( "" + commentedOutMessage + "
"', 'rule': None}, - {'template': 'm = "" + message + "
"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, - {'template': 'm = "" + message + "
"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, - {'template': 'm = " " + message + " broken string', 'rule': Rules.python_parse_error},
- )
- def test_concat_with_html(self, data):
- """
- Test check_python_file_is_safe with concatenating strings and HTML
- """
- linter = PythonLinter()
- results = FileResults('')
-
- linter.check_python_file_is_safe(data['template'], results)
-
- self._validate_data_rules(data, results)
-
- def test_check_python_expression_display_name(self):
- """
- Test _check_python_file_is_safe with display_name_with_default_escaped
- fails.
- """
- linter = PythonLinter()
- results = FileResults('')
-
- python_file = textwrap.dedent("""
- context = {
- 'display_name': self.display_name_with_default_escaped,
- }
- """)
-
- linter.check_python_file_is_safe(python_file, results)
-
- self.assertEqual(len(results.violations), 1)
- self.assertEqual(results.violations[0].rule, Rules.python_deprecated_display_name)
-
- def test_check_custom_escaping(self):
- """
- Test _check_python_file_is_safe fails when custom escapins is used.
- """
- linter = PythonLinter()
- results = FileResults('')
-
- python_file = textwrap.dedent("""
- msg = mmlans.replace('<', '<')
- """)
-
- linter.check_python_file_is_safe(python_file, results)
-
- self.assertEqual(len(results.violations), 1)
- self.assertEqual(results.violations[0].rule, Rules.python_custom_escape)
-
- @data(
- {
- 'python':
- textwrap.dedent("""
- msg = Text("Mixed {span_start}text{span_end}").format(
- span_start=HTML(""),
- span_end=HTML(""),
- )
- """),
- 'rule': None
- },
- {
- 'python':
- textwrap.dedent("""
- msg = "Mixed {span_start}text{span_end}".format(
- span_start=HTML(""),
- span_end=HTML(""),
- )
- """),
- 'rule': Rules.python_requires_html_or_text
- },
- {
- 'python':
- textwrap.dedent("""
- msg = "Mixed {span_start}{text}{span_end}".format(
- span_start=HTML(""),
- text=Text("This should still break."),
- span_end=HTML(""),
- )
- """),
- 'rule': Rules.python_requires_html_or_text
- },
- {
- 'python':
- textwrap.dedent("""
- msg = Text("Mixed {link_start}text{link_end}".format(
- link_start=HTML("").format(url),
- link_end=HTML(""),
- ))
- """),
- 'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text]
- },
- {
- 'python':
- textwrap.dedent("""
- msg = Text("Mixed {link_start}text{link_end}").format(
- link_start=HTML("".format(url)),
- link_end=HTML(""),
- )
- """),
- 'rule': Rules.python_close_before_format
- },
- {
- 'python':
- textwrap.dedent("""
- msg = Text("Mixed {link_start}text{link_end}".format(
- link_start=HTML("".format(HTML(''))),
- link_end=HTML(""),
- ))
- """),
- 'rule':
- [
- Rules.python_close_before_format,
- Rules.python_requires_html_or_text,
- Rules.python_close_before_format,
- Rules.python_requires_html_or_text
- ]
- },
- {
- 'python':
- textwrap.dedent("""
- msg = "Mixed {span_start}text{span_end}".format(
- span_start="",
- span_end="",
- )
- """),
- 'rule': [Rules.python_wrap_html, Rules.python_wrap_html]
- },
- {
- 'python':
- textwrap.dedent("""
- msg = Text(_("String with multiple lines "
- "{link_start}unenroll{link_end} "
- "and final line")).format(
- link_start=HTML(
- ''
- ).format(
- # Line comment with ' to throw off parser
- course_id=course_overview.id
- ),
- link_end=HTML(''),
- )
- """),
- 'rule': None
- },
- {
- 'python': "msg = ''",
- 'rule': None
- },
- {
- 'python': "msg = HTML('')",
- 'rule': None
- },
- {
- 'python': r"""msg = ''.format(message)""",
- 'rule': Rules.python_wrap_html
- },
- {
- 'python': r"""r'regex with {} and named group(?P
" + message + "
" - - Other details of note: - - Only a start_index is required for a valid expression. - - If end_index is None, it means we couldn't parse the rest of the - expression. - - All other details of the expression are optional, and are only added if - and when supplied and needed for additional checks. They are not necessary - for the final results output. - - """ - - def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None): - """ - Init method. - - Arguments: - start_index: the starting index of the expression - end_index: the index immediately following the expression, or None - if the expression was unparseable - template: optional template code in which the expression was found - start_delim: optional starting delimiter of the expression - end_delim: optional ending delimeter of the expression - strings: optional list of ParseStrings - - """ - self.start_index = start_index - self.end_index = end_index - self.start_delim = start_delim - self.end_delim = end_delim - self.strings = strings - if template is not None and self.end_index is not None: - self.expression = template[start_index:end_index] - self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip() - else: - self.expression = None - self.expression_inner = None - - -class RuleViolation(object): - """ - Base class representing a rule violation which can be used for reporting. - """ - - def __init__(self, rule): - """ - Init method. - - Arguments: - rule: The Rule which was violated. - - """ - self.rule = rule - self.full_path = '' - self.is_disabled = False - - def _mark_disabled(self, string, scope_start_string=False): - """ - Performs the disable pragma search and marks the rule as disabled if a - matching pragma is found. - - Pragma format:: - - xss-lint: disable=violation-name,other-violation-name - - Arguments: - string: The string of code in which to search for the pragma. - scope_start_string: True if the pragma must be at the start of the - string, False otherwise. The pragma is considered at the start - of the string if it has a maximum of 5 non-whitespace characters - preceding it. - - Side Effect: - Sets self.is_disabled as appropriate based on whether the pragma is - found. - - """ - pragma_match = re.search(r'xss-lint:\s*disable=([a-zA-Z,\- ]+)', string) - if pragma_match is None: - return - if scope_start_string: - spaces_count = string.count(' ', 0, pragma_match.start()) - non_space_count = pragma_match.start() - spaces_count - if non_space_count > 5: - return - - for disabled_rule in pragma_match.group(1).split(','): - if disabled_rule.strip() == self.rule.rule_id: - self.is_disabled = True - return - - def sort_key(self): - """ - Returns a key that can be sorted on - """ - return (0, 0, self.rule.rule_id) - - def first_line(self): - """ - Since a file level rule has no first line, returns empty string. - """ - return '' - - def prepare_results(self, full_path, string_lines): - """ - Preps this instance for results reporting. - - Arguments: - full_path: Path of the file in violation. - string_lines: A StringLines containing the contents of the file in - violation. - - """ - self.full_path = full_path - self._mark_disabled(string_lines.get_string()) - - def print_results(self, _options, out): - """ - Prints the results represented by this rule violation. - - Arguments: - _options: ignored - out: output file - """ - print("{}: {}".format(self.full_path, self.rule.rule_id), file=out) - - -class ExpressionRuleViolation(RuleViolation): - """ - A class representing a particular rule violation for expressions which - contain more specific details of the location of the violation for reporting - purposes. - - """ - - def __init__(self, rule, expression): - """ - Init method. - - Arguments: - rule: The Rule which was violated. - expression: The Expression that was in violation. - - """ - super(ExpressionRuleViolation, self).__init__(rule) - self.expression = expression - self.start_line = 0 - self.start_column = 0 - self.end_line = 0 - self.end_column = 0 - self.lines = [] - self.is_disabled = False - - def _mark_expression_disabled(self, string_lines): - """ - Marks the expression violation as disabled if it finds the disable - pragma anywhere on the first line of the violation, or at the start of - the line preceding the violation. - - Pragma format:: - - xss-lint: disable=violation-name,other-violation-name - - Examples:: - - <% // xss-lint: disable=underscore-not-escaped %> - <%= gettext('Single Line') %> - - <%= gettext('Single Line') %><% // xss-lint: disable=underscore-not-escaped %> - - Arguments: - string_lines: A StringLines containing the contents of the file in - violation. - - Side Effect: - Sets self.is_disabled as appropriate based on whether the pragma is - found. - - """ - # disable pragma can be at the start of the preceding line - has_previous_line = self.start_line > 1 - if has_previous_line: - line_to_check = string_lines.line_number_to_line(self.start_line - 1) - self._mark_disabled(line_to_check, scope_start_string=True) - if self.is_disabled: - return - - # TODO: this should work at end of any line of the violation - # disable pragma can be anywhere on the first line of the violation - line_to_check = string_lines.line_number_to_line(self.start_line) - self._mark_disabled(line_to_check, scope_start_string=False) - - def sort_key(self): - """ - Returns a key that can be sorted on - """ - return (self.start_line, self.start_column, self.rule.rule_id) - - def first_line(self): - """ - Returns the initial line of code of the violation. - """ - return self.lines[0] - - def prepare_results(self, full_path, string_lines): - """ - Preps this instance for results reporting. - - Arguments: - full_path: Path of the file in violation. - string_lines: A StringLines containing the contents of the file in - violation. - - """ - self.full_path = full_path - start_index = self.expression.start_index - self.start_line = string_lines.index_to_line_number(start_index) - self.start_column = string_lines.index_to_column_number(start_index) - end_index = self.expression.end_index - if end_index is not None: - self.end_line = string_lines.index_to_line_number(end_index) - self.end_column = string_lines.index_to_column_number(end_index) - else: - self.end_line = self.start_line - self.end_column = '?' - for line_number in range(self.start_line, self.end_line + 1): - self.lines.append(string_lines.line_number_to_line(line_number)) - self._mark_expression_disabled(string_lines) - - def print_results(self, options, out): - """ - Prints the results represented by this rule violation. - - Arguments: - options: A list of the following options: - list_files: True to print only file names, and False to print - all violations. - verbose: True for multiple lines of context, False single line. - out: output file - - """ - if options['verbose']: - end_line = self.end_line + 1 - else: - end_line = self.start_line + 1 - for line_number in range(self.start_line, end_line): - if line_number == self.start_line: - column = self.start_column - rule_id = self.rule.rule_id + ":" - else: - column = 1 - rule_id = " " * (len(self.rule.rule_id) + 1) - line = self.lines[line_number - self.start_line].encode(encoding='utf-8') - print("{}: {}:{}: {} {}".format( - self.full_path, - line_number, - column, - rule_id, - line - ), file=out) - - -class SummaryResults(object): - """ - Contains the summary results for all violations. - """ - - def __init__(self): - """ - Init method. - """ - self.total_violations = 0 - self.totals_by_rule = dict.fromkeys( - [rule.rule_id for rule in Rules.__members__.values()], 0 - ) - - def add_violation(self, violation): - """ - Adds a violation to the summary details. - - Arguments: - violation: The violation to add to the summary. - - """ - self.total_violations += 1 - self.totals_by_rule[violation.rule.rule_id] += 1 - - def print_results(self, options, out): - """ - Prints the results (i.e. violations) in this file. - - Arguments: - options: A list of the following options: - list_files: True to print only file names, and False to print - all violations. - rule_totals: If True include totals by rule. - out: output file - - """ - if options['list_files'] is False: - if options['rule_totals']: - max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule) - print("", file=out) - for rule_id in sorted(self.totals_by_rule.keys()): - padding = " " * (max_rule_id_len - len(rule_id)) - print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out) - print("", file=out) - - # matches output of eslint for simplicity - print("", file=out) - print("{} violations total".format(self.total_violations), file=out) - - -class FileResults(object): - """ - Contains the results, or violations, for a file. - """ - - def __init__(self, full_path): - """ - Init method. - - Arguments: - full_path: The full path for this file. - - """ - self.full_path = full_path - self.directory = os.path.dirname(full_path) - self.is_file = os.path.isfile(full_path) - self.violations = [] - - def prepare_results(self, file_string, line_comment_delim=None): - """ - Prepares the results for output for this file. - - Arguments: - file_string: The string of content for this file. - line_comment_delim: A string representing the start of a line - comment. For example "##" for Mako and "//" for JavaScript. - - """ - string_lines = StringLines(file_string) - for violation in self.violations: - violation.prepare_results(self.full_path, string_lines) - if line_comment_delim is not None: - self._filter_commented_code(line_comment_delim) - - def print_results(self, options, summary_results, out): - """ - Prints the results (i.e. violations) in this file. - - Arguments: - options: A list of the following options: - list_files: True to print only file names, and False to print - all violations. - summary_results: A SummaryResults with a summary of the violations. - verbose: True for multiple lines of context, False single line. - out: output file - - Side effect: - Updates the passed SummaryResults. - - """ - if options['list_files']: - if self.violations is not None and 0 < len(self.violations): - print(self.full_path, file=out) - else: - self.violations.sort(key=lambda violation: violation.sort_key()) - for violation in self.violations: - if not violation.is_disabled: - violation.print_results(options, out) - summary_results.add_violation(violation) - - def _filter_commented_code(self, line_comment_delim): - """ - Remove any violations that were found in commented out code. - - Arguments: - line_comment_delim: A string representing the start of a line - comment. For example "##" for Mako and "//" for JavaScript. - - """ - self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)] - - def _is_commented(self, violation, line_comment_delim): - """ - Checks if violation line is commented out. - - Arguments: - violation: The violation to check - line_comment_delim: A string representing the start of a line - comment. For example "##" for Mako and "//" for JavaScript. - - Returns: - True if the first line of the violation is actually commented out, - False otherwise. - """ - if 'parse' in violation.rule.rule_id: - # For parse rules, don't filter them because the comment could be a - # part of the parse issue to begin with. - return False - else: - return violation.first_line().lstrip().startswith(line_comment_delim) - - -class ParseString(object): - """ - ParseString is the result of parsing a string out of a template. - - A ParseString has the following attributes: - start_index: The index of the first quote, or None if none found - end_index: The index following the closing quote, or None if - unparseable - quote_length: The length of the quote. Could be 3 for a Python - triple quote. Or None if none found. - string: the text of the parsed string, or None if none found. - string_inner: the text inside the quotes of the parsed string, or None - if none found. - - """ - - def __init__(self, template, start_index, end_index): - """ - Init method. - - Arguments: - template: The template to be searched. - start_index: The start index to search. - end_index: The end index to search before. - - """ - self.end_index = None - self.quote_length = None - self.string = None - self.string_inner = None - self.start_index = self._find_string_start(template, start_index, end_index) - if self.start_index is not None: - result = self._parse_string(template, self.start_index) - if result is not None: - self.end_index = result['end_index'] - self.quote_length = result['quote_length'] - self.string = result['string'] - self.string_inner = result['string_inner'] - - def _find_string_start(self, template, start_index, end_index): - """ - Finds the index of the end of start of a string. In other words, the - first single or double quote. - - Arguments: - template: The template to be searched. - start_index: The start index to search. - end_index: The end index to search before. - - Returns: - The start index of the first single or double quote, or None if no - quote was found. - """ - quote_regex = re.compile(r"""['"]""") - start_match = quote_regex.search(template, start_index, end_index) - if start_match is None: - return None - else: - return start_match.start() - - def _parse_string(self, template, start_index): - """ - Finds the indices of a string inside a template. - - Arguments: - template: The template to be searched. - start_index: The start index of the open quote. - - Returns: - A dict containing the following, or None if not parseable: - end_index: The index following the closing quote - quote_length: The length of the quote. Could be 3 for a Python - triple quote. - string: the text of the parsed string - string_inner: the text inside the quotes of the parsed string - - """ - quote = template[start_index] - if quote not in ["'", '"']: - raise ValueError("start_index must refer to a single or double quote.") - triple_quote = quote * 3 - if template.startswith(triple_quote, start_index): - quote = triple_quote - - next_start_index = start_index + len(quote) - while True: - quote_end_index = template.find(quote, next_start_index) - backslash_index = template.find("\\", next_start_index) - if quote_end_index < 0: - return None - if 0 <= backslash_index < quote_end_index: - next_start_index = backslash_index + 2 - else: - end_index = quote_end_index + len(quote) - quote_length = len(quote) - string = template[start_index:end_index] - return { - 'end_index': end_index, - 'quote_length': quote_length, - 'string': string, - 'string_inner': string[quote_length:-quote_length], - } +from xsslint.reporting import ExpressionRuleViolation, FileResults, RuleViolation +from xsslint.rules import Rules +from xsslint.utils import SKIP_DIRS, Expression, ParseString, StringLines, is_skip_dir +from xsslint.visitors import AllNodeVisitor, HtmlStringVisitor, OuterFormatVisitor class BaseLinter(object): @@ -1419,328 +685,6 @@ class JavaScriptLinter(BaseLinter): )) -class BaseVisitor(ast.NodeVisitor): - """ - Base class for AST NodeVisitor used for Python xss linting. - - Important: This base visitor skips all __repr__ function definitions. - """ - def __init__(self, file_contents, results): - """ - Init method. - - Arguments: - file_contents: The contents of the Python file. - results: A file results objects to which violations will be added. - - """ - super(BaseVisitor, self).__init__() - self.file_contents = file_contents - self.lines = StringLines(self.file_contents) - self.results = results - - def node_to_expression(self, node): - """ - Takes a node and translates it to an expression to be used with - violations. - - Arguments: - node: An AST node. - - """ - line_start_index = self.lines.line_number_to_start_index(node.lineno) - start_index = line_start_index + node.col_offset - if isinstance(node, ast.Str): - # Triple quotes give col_offset of -1 on the last line of the string. - if node.col_offset == -1: - triple_quote_regex = re.compile("""['"]{3}""") - end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index) - open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start()) - if open_quote_index > 0: - start_index = open_quote_index - else: - # If we can't find a starting quote, let's assume that what - # we considered the end quote is really the start quote. - start_index = end_triple_quote_match.start() - string = ParseString(self.file_contents, start_index, len(self.file_contents)) - return Expression(string.start_index, string.end_index) - else: - return Expression(start_index) - - def visit_FunctionDef(self, node): - """ - Skips processing of __repr__ functions, since these sometimes use '<' - for non-HTML purposes. - - Arguments: - node: An AST node. - """ - if node.name != '__repr__': - self.generic_visit(node) - - -class HtmlStringVisitor(BaseVisitor): - """ - Checks for strings that contain HTML. Assumes any string with < or > is - considered potential HTML. - - To be used only with strings in context of format or concat. - - """ - def __init__(self, file_contents, results, skip_wrapped_html=False): - """ - Init function. - - Arguments: - file_contents: The contents of the Python file. - results: A file results objects to which violations will be added. - skip_wrapped_html: True if visitor should skip strings wrapped with - HTML() or Text(), and False otherwise. - """ - super(HtmlStringVisitor, self).__init__(file_contents, results) - self.skip_wrapped_html = skip_wrapped_html - self.unsafe_html_string_nodes = [] - self.over_escaped_entity_string_nodes = [] - self.has_text_or_html_call = False - - def visit_Str(self, node): - """ - When strings are visited, checks if it contains HTML. - - Arguments: - node: An AST node. - """ - # Skips '<' (and '>') in regex named groups. For example, "(?P" + message + "
" + + Other details of note: + - Only a start_index is required for a valid expression. + - If end_index is None, it means we couldn't parse the rest of the + expression. + - All other details of the expression are optional, and are only added if + and when supplied and needed for additional checks. They are not necessary + for the final results output. + + """ + + def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None): + """ + Init method. + + Arguments: + start_index: the starting index of the expression + end_index: the index immediately following the expression, or None + if the expression was unparseable + template: optional template code in which the expression was found + start_delim: optional starting delimiter of the expression + end_delim: optional ending delimeter of the expression + strings: optional list of ParseStrings + + """ + self.start_index = start_index + self.end_index = end_index + self.start_delim = start_delim + self.end_delim = end_delim + self.strings = strings + if template is not None and self.end_index is not None: + self.expression = template[start_index:end_index] + self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip() + else: + self.expression = None + self.expression_inner = None diff --git a/scripts/xsslint/xsslint/visitors.py b/scripts/xsslint/xsslint/visitors.py new file mode 100644 index 0000000000..581a563cea --- /dev/null +++ b/scripts/xsslint/xsslint/visitors.py @@ -0,0 +1,331 @@ +""" +Custom AST NodeVisitor classes uses for Python xss linting. +""" +import ast +import re + +from xsslint.reporting import ExpressionRuleViolation +from xsslint.rules import Rules +from xsslint.utils import Expression, ParseString, StringLines + + +class BaseVisitor(ast.NodeVisitor): + """ + Base class for AST NodeVisitor used for Python xss linting. + + Important: This base visitor skips all __repr__ function definitions. + """ + def __init__(self, file_contents, results): + """ + Init method. + + Arguments: + file_contents: The contents of the Python file. + results: A file results objects to which violations will be added. + + """ + super(BaseVisitor, self).__init__() + self.file_contents = file_contents + self.lines = StringLines(self.file_contents) + self.results = results + + def node_to_expression(self, node): + """ + Takes a node and translates it to an expression to be used with + violations. + + Arguments: + node: An AST node. + + """ + line_start_index = self.lines.line_number_to_start_index(node.lineno) + start_index = line_start_index + node.col_offset + if isinstance(node, ast.Str): + # Triple quotes give col_offset of -1 on the last line of the string. + if node.col_offset == -1: + triple_quote_regex = re.compile("""['"]{3}""") + end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index) + open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start()) + if open_quote_index > 0: + start_index = open_quote_index + else: + # If we can't find a starting quote, let's assume that what + # we considered the end quote is really the start quote. + start_index = end_triple_quote_match.start() + string = ParseString(self.file_contents, start_index, len(self.file_contents)) + return Expression(string.start_index, string.end_index) + else: + return Expression(start_index) + + def visit_FunctionDef(self, node): + """ + Skips processing of __repr__ functions, since these sometimes use '<' + for non-HTML purposes. + + Arguments: + node: An AST node. + """ + if node.name != '__repr__': + self.generic_visit(node) + + +class HtmlStringVisitor(BaseVisitor): + """ + Checks for strings that contain HTML. Assumes any string with < or > is + considered potential HTML. + + To be used only with strings in context of format or concat. + + """ + def __init__(self, file_contents, results, skip_wrapped_html=False): + """ + Init function. + + Arguments: + file_contents: The contents of the Python file. + results: A file results objects to which violations will be added. + skip_wrapped_html: True if visitor should skip strings wrapped with + HTML() or Text(), and False otherwise. + """ + super(HtmlStringVisitor, self).__init__(file_contents, results) + self.skip_wrapped_html = skip_wrapped_html + self.unsafe_html_string_nodes = [] + self.over_escaped_entity_string_nodes = [] + self.has_text_or_html_call = False + + def visit_Str(self, node): + """ + When strings are visited, checks if it contains HTML. + + Arguments: + node: An AST node. + """ + # Skips '<' (and '>') in regex named groups. For example, "(?P