From 07ca4402268741fe9ca195c16bc2affc59c275b1 Mon Sep 17 00:00:00 2001 From: Anthony Mangano Date: Thu, 8 Feb 2018 17:33:18 -0500 Subject: [PATCH] Refactor XSS linter into smaller files --- pavelib/quality.py | 4 +- scripts/xss-commit-linter.sh | 4 +- scripts/{ => xsslint}/tests/__init__.py | 0 .../{ => xsslint}/tests/templates/test.coffee | 0 .../{ => xsslint}/tests/templates/test.html | 0 scripts/{ => xsslint}/tests/templates/test.js | 0 scripts/{ => xsslint}/tests/templates/test.py | 0 .../tests/templates/test.underscore | 0 .../tests/test_linters.py} | 1508 +++++++---------- scripts/xsslint/tests/test_main.py | 179 ++ scripts/xsslint/tests/test_utils.py | 44 + scripts/xsslint/xss_linter.py | 9 + scripts/xsslint/xsslint/__init__.py | 0 .../xsslint/linters.py} | 1235 +------------- scripts/xsslint/xsslint/main.py | 140 ++ scripts/xsslint/xsslint/reporting.py | 376 ++++ scripts/xsslint/xsslint/rules.py | 39 + scripts/xsslint/xsslint/utils.py | 366 ++++ scripts/xsslint/xsslint/visitors.py | 331 ++++ 19 files changed, 2141 insertions(+), 2094 deletions(-) rename scripts/{ => xsslint}/tests/__init__.py (100%) rename scripts/{ => xsslint}/tests/templates/test.coffee (100%) rename scripts/{ => xsslint}/tests/templates/test.html (100%) rename scripts/{ => xsslint}/tests/templates/test.js (100%) rename scripts/{ => xsslint}/tests/templates/test.py (100%) rename scripts/{ => xsslint}/tests/templates/test.underscore (100%) rename scripts/{tests/test_xss_linter.py => xsslint/tests/test_linters.py} (85%) create mode 100644 scripts/xsslint/tests/test_main.py create mode 100644 scripts/xsslint/tests/test_utils.py create mode 100755 scripts/xsslint/xss_linter.py create mode 100644 scripts/xsslint/xsslint/__init__.py rename scripts/{xss_linter.py => xsslint/xsslint/linters.py} (57%) mode change 100755 => 100644 create mode 100644 scripts/xsslint/xsslint/main.py create mode 100644 scripts/xsslint/xsslint/reporting.py create mode 100644 scripts/xsslint/xsslint/rules.py create mode 100644 scripts/xsslint/xsslint/utils.py create mode 100644 scripts/xsslint/xsslint/visitors.py diff --git a/pavelib/quality.py b/pavelib/quality.py index 1fe36f40fa..aaaf7b809a 100644 --- a/pavelib/quality.py +++ b/pavelib/quality.py @@ -409,7 +409,7 @@ def run_stylelint(options): @timed def run_xsslint(options): """ - Runs xss_linter.py on the codebase + Runs xsslint/xss_linter.py on the codebase """ thresholds_option = getattr(options, 'thresholds', '{}') @@ -434,7 +434,7 @@ def run_xsslint(options): _prepare_report_dir(xsslint_report_dir) sh( - "{repo_root}/scripts/{xsslint_script} --rule-totals >> {xsslint_report}".format( + "{repo_root}/scripts/xsslint/{xsslint_script} --rule-totals >> {xsslint_report}".format( repo_root=Env.REPO_ROOT, xsslint_script=xsslint_script, xsslint_report=xsslint_report, diff --git a/scripts/xss-commit-linter.sh b/scripts/xss-commit-linter.sh index 2a1dce4b9a..8941760b57 100755 --- a/scripts/xss-commit-linter.sh +++ b/scripts/xss-commit-linter.sh @@ -5,7 +5,7 @@ set -e # # xss-commit-linter.sh # -# Executes xss_linter.py on the set of files in a particular git commit. +# Executes xsslint/xss_linter.py on the set of files in a particular git commit. # ############################################################################### @@ -82,6 +82,6 @@ else for f in $diff_files; do echo "" echo "Linting $f:" - ./scripts/xss_linter.py $f + ./scripts/xsslint/xss_linter.py $f done fi diff --git a/scripts/tests/__init__.py b/scripts/xsslint/tests/__init__.py similarity index 100% rename from scripts/tests/__init__.py rename to scripts/xsslint/tests/__init__.py diff --git a/scripts/tests/templates/test.coffee b/scripts/xsslint/tests/templates/test.coffee similarity index 100% rename from scripts/tests/templates/test.coffee rename to scripts/xsslint/tests/templates/test.coffee diff --git a/scripts/tests/templates/test.html b/scripts/xsslint/tests/templates/test.html similarity index 100% rename from scripts/tests/templates/test.html rename to scripts/xsslint/tests/templates/test.html diff --git a/scripts/tests/templates/test.js b/scripts/xsslint/tests/templates/test.js similarity index 100% rename from scripts/tests/templates/test.js rename to scripts/xsslint/tests/templates/test.js diff --git a/scripts/tests/templates/test.py b/scripts/xsslint/tests/templates/test.py similarity index 100% rename from scripts/tests/templates/test.py rename to scripts/xsslint/tests/templates/test.py diff --git a/scripts/tests/templates/test.underscore b/scripts/xsslint/tests/templates/test.underscore similarity index 100% rename from scripts/tests/templates/test.underscore rename to scripts/xsslint/tests/templates/test.underscore diff --git a/scripts/tests/test_xss_linter.py b/scripts/xsslint/tests/test_linters.py similarity index 85% rename from scripts/tests/test_xss_linter.py rename to scripts/xsslint/tests/test_linters.py index 9f592652f2..ccc4b81329 100644 --- a/scripts/tests/test_xss_linter.py +++ b/scripts/xsslint/tests/test_linters.py @@ -1,66 +1,18 @@ # -*- coding: utf-8 -*- """ -Tests for xss_linter.py +Tests for linters.py """ -import re +from __future__ import print_function + import textwrap -from StringIO import StringIO from unittest import TestCase -import mock from ddt import data, ddt -from scripts.xss_linter import ( - FileResults, - JavaScriptLinter, - MakoTemplateLinter, - ParseString, - PythonLinter, - Rules, - StringLines, - SummaryResults, - UnderscoreTemplateLinter, - _lint -) - - -@ddt -class TestStringLines(TestCase): - """ - Test StringLines class. - """ - @data( - {'string': 'test', 'index': 0, 'line_start_index': 0, 'line_end_index': 4}, - {'string': 'test', 'index': 2, 'line_start_index': 0, 'line_end_index': 4}, - {'string': 'test', 'index': 3, 'line_start_index': 0, 'line_end_index': 4}, - {'string': '\ntest', 'index': 0, 'line_start_index': 0, 'line_end_index': 1}, - {'string': '\ntest', 'index': 2, 'line_start_index': 1, 'line_end_index': 5}, - {'string': '\ntest\n', 'index': 0, 'line_start_index': 0, 'line_end_index': 1}, - {'string': '\ntest\n', 'index': 2, 'line_start_index': 1, 'line_end_index': 6}, - {'string': '\ntest\n', 'index': 6, 'line_start_index': 6, 'line_end_index': 6}, - ) - def test_string_lines_start_end_index(self, data): - """ - Test StringLines index_to_line_start_index and index_to_line_end_index. - """ - lines = StringLines(data['string']) - self.assertEqual(lines.index_to_line_start_index(data['index']), data['line_start_index']) - self.assertEqual(lines.index_to_line_end_index(data['index']), data['line_end_index']) - - @data( - {'string': 'test', 'line_number': 1, 'line': 'test'}, - {'string': '\ntest', 'line_number': 1, 'line': ''}, - {'string': '\ntest', 'line_number': 2, 'line': 'test'}, - {'string': '\ntest\n', 'line_number': 1, 'line': ''}, - {'string': '\ntest\n', 'line_number': 2, 'line': 'test'}, - {'string': '\ntest\n', 'line_number': 3, 'line': ''}, - ) - def test_string_lines_start_end_index(self, data): - """ - Test line_number_to_line. - """ - lines = StringLines(data['string']) - self.assertEqual(lines.line_number_to_line(data['line_number']), data['line']) +from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter +from xsslint.reporting import FileResults +from xsslint.rules import Rules +from xsslint.utils import ParseString class TestLinter(TestCase): @@ -93,168 +45,678 @@ class TestLinter(TestCase): self.assertEqual(violation.rule, rule) -class TestXSSLinter(TestCase): +@ddt +class TestUnderscoreTemplateLinter(TestLinter): """ - Test some top-level linter functions + Test UnderscoreTemplateLinter """ - def setUp(self): + def test_check_underscore_file_is_safe(self): """ - Setup patches on linters for testing. + Test check_underscore_file_is_safe with safe template """ - self.patch_is_valid_directory(MakoTemplateLinter) - self.patch_is_valid_directory(JavaScriptLinter) - self.patch_is_valid_directory(UnderscoreTemplateLinter) - self.patch_is_valid_directory(PythonLinter) + linter = UnderscoreTemplateLinter() + results = FileResults('') - patcher = mock.patch('scripts.xss_linter.is_skip_dir', return_value=False) - patcher.start() - self.addCleanup(patcher.stop) + template = textwrap.dedent(""" + <%- gettext('Single Line') %> - def patch_is_valid_directory(self, linter_class): + <%- + gettext('Multiple Lines') + %> + """) + + linter.check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 0) + + def test_check_underscore_file_is_not_safe(self): """ - Creates a mock patch for _is_valid_directory on a Linter to always - return true. This avoids nested patch calls. - - Arguments: - linter_class: The linter class to be patched + Test check_underscore_file_is_safe with unsafe template """ - patcher = mock.patch.object(linter_class, '_is_valid_directory', return_value=True) - patch_start = patcher.start() - self.addCleanup(patcher.stop) - return patch_start + linter = UnderscoreTemplateLinter() + results = FileResults('') - def test_lint_defaults(self): + template = textwrap.dedent(""" + <%= gettext('Single Line') %> + + <%= + gettext('Multiple Lines') + %> + """) + + linter.check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 2) + self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped) + self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped) + + @data( + { + 'template': + '<% // xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True], + }, + { + 'template': + '<% // xss-lint: disable=another-rule,underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True], + }, + { + 'template': + '<% // xss-lint: disable=another-rule %>\n' + '<%= message %>', + 'is_disabled': [False], + }, + { + 'template': + '<% // xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '// This test does not use proper Underscore.js Template syntax\n' + '// But, it is just testing that a maximum of 5 non-whitespace\n' + '// are used to designate start of line for disabling the next line.\n' + ' 1 2 3 4 5 xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>\n' + ' 1 2 3 4 5 6 xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '<%= message %><% // xss-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '<%= message %>\n' + '<% // xss-lint: disable=underscore-not-escaped %>', + 'is_disabled': [False], + }, + ) + def test_check_underscore_file_disable_rule(self, data): """ - Tests the top-level linting with default options. + Test check_underscore_file_is_safe with various disabled pragmas """ - out = StringIO() - summary_results = SummaryResults() + linter = UnderscoreTemplateLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': False, - 'verbose': False, - 'rule_totals': False, - }, - summary_results=summary_results, - out=out, - ) + linter.check_underscore_file_is_safe(data['template'], results) - output = out.getvalue() - # Assert violation details are displayed. - self.assertIsNotNone(re.search('test\.html.*{}'.format(Rules.mako_missing_default.rule_id), output)) - self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.javascript_concat_html.rule_id), output)) - self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.underscore_not_escaped.rule_id), output)) - self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.javascript_concat_html.rule_id), output)) - self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.underscore_not_escaped.rule_id), output)) - lines_with_rule = 0 - lines_without_rule = 0 # Output with verbose setting only. - for underscore_match in re.finditer('test\.underscore:.*\n', output): - if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None: - lines_with_rule += 1 - else: - lines_without_rule += 1 - self.assertGreaterEqual(lines_with_rule, 1) - self.assertEquals(lines_without_rule, 0) - self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_parse_error.rule_id), output)) - self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) - # Assert no rule totals. - self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - # Assert final total - self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + violation_count = len(data['is_disabled']) + self.assertEqual(len(results.violations), violation_count) + for index in range(0, violation_count - 1): + self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index]) - def test_lint_with_verbose(self): + def test_check_underscore_file_disables_one_violation(self): """ - Tests the top-level linting with verbose option. + Test check_underscore_file_is_safe with disabled before a line only + disables for the violation following """ - out = StringIO() - summary_results = SummaryResults() + linter = UnderscoreTemplateLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': False, - 'verbose': True, - 'rule_totals': False, - }, - summary_results=summary_results, - out=out, - ) + template = textwrap.dedent(""" + <% // xss-lint: disable=underscore-not-escaped %> + <%= message %> + <%= message %> + """) - output = out.getvalue() - lines_with_rule = 0 - lines_without_rule = 0 # Output with verbose setting only. - for underscore_match in re.finditer('test\.underscore:.*\n', output): - if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None: - lines_with_rule += 1 - else: - lines_without_rule += 1 - self.assertGreaterEqual(lines_with_rule, 1) - self.assertGreaterEqual(lines_without_rule, 1) - # Assert no rule totals. - self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - # Assert final total - self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + linter.check_underscore_file_is_safe(template, results) - def test_lint_with_rule_totals(self): + self.assertEqual(len(results.violations), 2) + self.assertEqual(results.violations[0].is_disabled, True) + self.assertEqual(results.violations[1].is_disabled, False) + + @data( + {'template': '<%= HtmlUtils.ensureHtml(message) %>'}, + {'template': '<%= _.escape(message) %>'}, + ) + def test_check_underscore_no_escape_allowed(self, data): """ - Tests the top-level linting with rule totals option. + Test check_underscore_file_is_safe with expressions that are allowed + without escaping because the internal calls properly escape. """ - out = StringIO() - summary_results = SummaryResults() + linter = UnderscoreTemplateLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': False, - 'verbose': False, - 'rule_totals': True, - }, - summary_results=summary_results, - out=out, - ) + linter.check_underscore_file_is_safe(data['template'], results) - output = out.getvalue() - self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) + self.assertEqual(len(results.violations), 0) - # Assert totals output. - self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_wrap_html.rule_id, 1), output)) - self.assertIsNotNone(re.search('{} violations total'.format(7), output)) - def test_lint_with_list_files(self): +@ddt +class TestJavaScriptLinter(TestLinter): + """ + Test JavaScriptLinter + """ + @data( + {'template': 'var m = "Plain text " + message + "plain text"', 'rule': None}, + {'template': 'var m = "檌檒濦 " + message + "plain text"', 'rule': None}, + { + 'template': + ("""$email_header.append($('', type: "button", name: "copy-email-body-text",""" + """ value: gettext("Copy Email To Editor"), id: 'copy_email_' + email_id))"""), + 'rule': None + }, + {'template': 'var m = "

" + message + "

"', 'rule': Rules.javascript_concat_html}, + { + 'template': r'var m = "

\"escaped quote\"" + message + "\"escaped quote\"

"', + 'rule': Rules.javascript_concat_html + }, + {'template': ' // var m = "

" + commentedOutMessage + "

"', 'rule': None}, + {'template': 'var m = "

" + message + "

"', 'rule': Rules.javascript_concat_html}, + {'template': 'var m = "

" + message + " broken string', 'rule': Rules.javascript_concat_html}, + ) + def test_concat_with_html(self, data): """ - Tests the top-level linting with list files option. + Test check_javascript_file_is_safe with concatenating strings and HTML """ - out = StringIO() - summary_results = SummaryResults() + linter = JavaScriptLinter() + results = FileResults('') - _lint( - 'scripts/tests/templates', - template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], - options={ - 'list_files': True, - 'verbose': False, - 'rule_totals': False, - }, - summary_results=summary_results, - out=out, - ) + linter.check_javascript_file_is_safe(data['template'], results) + self._validate_data_rules(data, results) - output = out.getvalue() - # Assert file with rule is not output. - self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) - # Assert file is output. - self.assertIsNotNone(re.search('test\.py', output)) + @data( + {'template': 'test.append( test.render().el )', 'rule': None}, + {'template': 'test.append(test.render().el)', 'rule': None}, + {'template': 'test.append(test.render().$el)', 'rule': None}, + {'template': 'test.append(testEl)', 'rule': None}, + {'template': 'test.append($test)', 'rule': None}, + # plain text is ok because any & will be escaped, and it stops false + # negatives on some other objects with an append() method + {'template': 'test.append("plain text")', 'rule': None}, + {'template': 'test.append("

")', 'rule': Rules.javascript_jquery_append}, + {'template': 'graph.svg.append("g")', 'rule': None}, + {'template': 'test.append( $( "
" ) )', 'rule': None}, + {'template': 'test.append($("
"))', 'rule': None}, + {'template': 'test.append($("
"))', 'rule': None}, + {'template': 'test.append(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'HtmlUtils.append($el, someHtml)', 'rule': None}, + {'template': 'test.append("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_append}, + {'template': 'test.append("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_append}, + {'template': 'test.append(message)', 'rule': Rules.javascript_jquery_append}, + ) + def test_jquery_append(self, data): + """ + Test check_javascript_file_is_safe with JQuery append() + """ + linter = JavaScriptLinter() + results = FileResults('') - # Assert no totals. - self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) - self.assertIsNone(re.search('{} violations total'.format(7), output)) + linter.check_javascript_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + @data( + {'template': 'test.prepend( test.render().el )', 'rule': None}, + {'template': 'test.prepend(test.render().el)', 'rule': None}, + {'template': 'test.prepend(test.render().$el)', 'rule': None}, + {'template': 'test.prepend(testEl)', 'rule': None}, + {'template': 'test.prepend($test)', 'rule': None}, + {'template': 'test.prepend("text")', 'rule': None}, + {'template': 'test.prepend( $( "
" ) )', 'rule': None}, + {'template': 'test.prepend($("
"))', 'rule': None}, + {'template': 'test.prepend($("
"))', 'rule': None}, + {'template': 'test.prepend(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'HtmlUtils.prepend($el, someHtml)', 'rule': None}, + {'template': 'test.prepend("broken string)', 'rule': Rules.javascript_jquery_prepend}, + {'template': 'test.prepend("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_prepend}, + {'template': 'test.prepend("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_prepend}, + {'template': 'test.prepend(message)', 'rule': Rules.javascript_jquery_prepend}, + ) + def test_jquery_prepend(self, data): + """ + Test check_javascript_file_is_safe with JQuery prepend() + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + @data( + {'template': 'test.unwrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.wrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.wrapAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.wrapInner(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.after(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.before(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.replaceAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.replaceWith(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'test.replaceWith(edx.HtmlUtils.HTML(htmlString).toString())', 'rule': None}, + {'template': 'test.unwrap(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.wrap(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.wrapAll(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.wrapInner(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.after(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.before(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.replaceAll(anything)', 'rule': Rules.javascript_jquery_insertion}, + {'template': 'test.replaceWith(anything)', 'rule': Rules.javascript_jquery_insertion}, + ) + def test_jquery_insertion(self, data): + """ + Test check_javascript_file_is_safe with JQuery insertion functions + other than append(), prepend() and html() that take content as an + argument (e.g. before(), after()). + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + @data( + {'template': ' element.parentNode.appendTo(target);', 'rule': None}, + {'template': ' test.render().el.appendTo(target);', 'rule': None}, + {'template': ' test.render().$el.appendTo(target);', 'rule': None}, + {'template': ' test.$element.appendTo(target);', 'rule': None}, + {'template': ' test.testEl.appendTo(target);', 'rule': None}, + {'template': '$element.appendTo(target);', 'rule': None}, + {'template': 'el.appendTo(target);', 'rule': None}, + {'template': 'testEl.appendTo(target);', 'rule': None}, + {'template': 'testEl.prependTo(target);', 'rule': None}, + {'template': 'testEl.insertAfter(target);', 'rule': None}, + {'template': 'testEl.insertBefore(target);', 'rule': None}, + {'template': 'anycall().appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target}, + {'template': 'anything.appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target}, + {'template': 'anything.prependTo(target)', 'rule': Rules.javascript_jquery_insert_into_target}, + {'template': 'anything.insertAfter(target)', 'rule': Rules.javascript_jquery_insert_into_target}, + {'template': 'anything.insertBefore(target)', 'rule': Rules.javascript_jquery_insert_into_target}, + ) + def test_jquery_insert_to_target(self, data): + """ + Test check_javascript_file_is_safe with JQuery insert to target + functions that take a target as an argument, like appendTo() and + prependTo(). + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + @data( + {'template': 'test.html()', 'rule': None}, + {'template': 'test.html( )', 'rule': None}, + {'template': "test.html( '' )", 'rule': None}, + {'template': "test.html('')", 'rule': None}, + {'template': 'test.html("")', 'rule': None}, + {'template': 'test.html(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, + {'template': 'HtmlUtils.setHtml($el, someHtml)', 'rule': None}, + {'template': 'test.html("any string")', 'rule': Rules.javascript_jquery_html}, + {'template': 'test.html("broken string)', 'rule': Rules.javascript_jquery_html}, + {'template': 'test.html("檌檒濦")', 'rule': Rules.javascript_jquery_html}, + {'template': 'test.html(anything)', 'rule': Rules.javascript_jquery_html}, + ) + def test_jquery_html(self, data): + """ + Test check_javascript_file_is_safe with JQuery html() + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + self._validate_data_rules(data, results) + + @data( + {'template': 'StringUtils.interpolate()', 'rule': None}, + {'template': 'HtmlUtils.interpolateHtml()', 'rule': None}, + {'template': 'interpolate(anything)', 'rule': Rules.javascript_interpolate}, + ) + def test_javascript_interpolate(self, data): + """ + Test check_javascript_file_is_safe with interpolate() + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + @data( + {'template': '_.escape(message)', 'rule': None}, + {'template': 'anything.escape(message)', 'rule': Rules.javascript_escape}, + ) + def test_javascript_interpolate(self, data): + """ + Test check_javascript_file_is_safe with interpolate() + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + +@ddt +class TestPythonLinter(TestLinter): + """ + Test PythonLinter + """ + @data( + {'template': 'm = "Plain text " + message + "plain text"', 'rule': None}, + {'template': 'm = "檌檒濦 " + message + "plain text"', 'rule': None}, + {'template': ' # m = "

" + commentedOutMessage + "

"', 'rule': None}, + {'template': 'm = "

" + message + "

"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, + {'template': 'm = "

" + message + "

"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, + {'template': 'm = "

" + message + " broken string', 'rule': Rules.python_parse_error}, + ) + def test_concat_with_html(self, data): + """ + Test check_python_file_is_safe with concatenating strings and HTML + """ + linter = PythonLinter() + results = FileResults('') + + linter.check_python_file_is_safe(data['template'], results) + + self._validate_data_rules(data, results) + + def test_check_python_expression_display_name(self): + """ + Test _check_python_file_is_safe with display_name_with_default_escaped + fails. + """ + linter = PythonLinter() + results = FileResults('') + + python_file = textwrap.dedent(""" + context = { + 'display_name': self.display_name_with_default_escaped, + } + """) + + linter.check_python_file_is_safe(python_file, results) + + self.assertEqual(len(results.violations), 1) + self.assertEqual(results.violations[0].rule, Rules.python_deprecated_display_name) + + def test_check_custom_escaping(self): + """ + Test _check_python_file_is_safe fails when custom escapins is used. + """ + linter = PythonLinter() + results = FileResults('') + + python_file = textwrap.dedent(""" + msg = mmlans.replace('<', '<') + """) + + linter.check_python_file_is_safe(python_file, results) + + self.assertEqual(len(results.violations), 1) + self.assertEqual(results.violations[0].rule, Rules.python_custom_escape) + + @data( + { + 'python': + textwrap.dedent(""" + msg = Text("Mixed {span_start}text{span_end}").format( + span_start=HTML(""), + span_end=HTML(""), + ) + """), + 'rule': None + }, + { + 'python': + textwrap.dedent(""" + msg = "Mixed {span_start}text{span_end}".format( + span_start=HTML(""), + span_end=HTML(""), + ) + """), + 'rule': Rules.python_requires_html_or_text + }, + { + 'python': + textwrap.dedent(""" + msg = "Mixed {span_start}{text}{span_end}".format( + span_start=HTML(""), + text=Text("This should still break."), + span_end=HTML(""), + ) + """), + 'rule': Rules.python_requires_html_or_text + }, + { + 'python': + textwrap.dedent(""" + msg = Text("Mixed {link_start}text{link_end}".format( + link_start=HTML("").format(url), + link_end=HTML(""), + )) + """), + 'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text] + }, + { + 'python': + textwrap.dedent(""" + msg = Text("Mixed {link_start}text{link_end}").format( + link_start=HTML("".format(url)), + link_end=HTML(""), + ) + """), + 'rule': Rules.python_close_before_format + }, + { + 'python': + textwrap.dedent(""" + msg = Text("Mixed {link_start}text{link_end}".format( + link_start=HTML("".format(HTML(''))), + link_end=HTML(""), + )) + """), + 'rule': + [ + Rules.python_close_before_format, + Rules.python_requires_html_or_text, + Rules.python_close_before_format, + Rules.python_requires_html_or_text + ] + }, + { + 'python': + textwrap.dedent(""" + msg = "Mixed {span_start}text{span_end}".format( + span_start="", + span_end="", + ) + """), + 'rule': [Rules.python_wrap_html, Rules.python_wrap_html] + }, + { + 'python': + textwrap.dedent(""" + msg = Text(_("String with multiple lines " + "{link_start}unenroll{link_end} " + "and final line")).format( + link_start=HTML( + '' + ).format( + # Line comment with ' to throw off parser + course_id=course_overview.id + ), + link_end=HTML(''), + ) + """), + 'rule': None + }, + { + 'python': "msg = ''", + 'rule': None + }, + { + 'python': "msg = HTML('')", + 'rule': None + }, + { + 'python': r"""msg = '\d+)?$'.format(test)""", + 'rule': None + }, + { + 'python': r"""msg = ''.format(self.title) + """), + 'rule': None + }, + { + 'python': r"""msg = '%s

' % message""", + 'rule': Rules.python_interpolate_html + }, + { + 'python': "msg = HTML(''", + 'rule': Rules.python_parse_error + }, + ) + def test_check_python_with_text_and_html(self, data): + """ + Test _check_python_file_is_safe tests for proper use of Text() and + Html(). + + """ + linter = PythonLinter() + results = FileResults('') + + file_content = textwrap.dedent(data['python']) + + linter.check_python_file_is_safe(file_content, results) + + self._validate_data_rules(data, results) + + def test_check_python_with_text_and_html_mixed(self): + """ + Test _check_python_file_is_safe tests for proper use of Text() and + Html() for a Python file with a mix of rules. + + """ + linter = PythonLinter() + results = FileResults('') + + file_content = textwrap.dedent(""" + msg1 = '".format(url)), + link_end="", + ) + msg3 = ' +

{response}

+
+ ''').format(response=response_text) + """, + 'rule': Rules.python_wrap_html, + 'start_line': 2, + }, + { + 'python': + """ + def function(self): + ''' + Function comment. + ''' + response_str = textwrap.dedent(''' +
+

{response}

+
+ ''').format(response=response_text) + """, + 'rule': Rules.python_wrap_html, + 'start_line': 6, + }, + { + 'python': + """ + def function(self): + ''' + Function comment. + ''' + response_str = '''

{response}

'''.format(response=response_text) + """, + 'rule': Rules.python_wrap_html, + 'start_line': 6, + }, + { + 'python': + """ + def function(self): + ''' + Function comment. + ''' + response_str = textwrap.dedent(''' +
+ \"\"\" Do we care about a nested triple quote? \"\"\" +

{response}

+
+ ''').format(response=response_text) + """, + 'rule': Rules.python_wrap_html, + 'start_line': 6, + }, + ) + def test_check_python_with_triple_quotes(self, data): + """ + Test _check_python_file_is_safe with triple quotes. + + """ + linter = PythonLinter() + results = FileResults('') + + file_content = textwrap.dedent(data['python']) + + linter.check_python_file_is_safe(file_content, results) + + self._validate_data_rules(data, results) + self.assertEqual(results.violations[0].start_line, data['start_line']) @ddt @@ -954,677 +1416,3 @@ class TestMakoTemplateLinter(TestLinter): start_inner_index = parse_string.start_index + parse_string.quote_length end_inner_index = parse_string.end_index - parse_string.quote_length self.assertEqual(data['template'][start_inner_index:end_inner_index], parse_string.string_inner) - - -@ddt -class TestUnderscoreTemplateLinter(TestLinter): - """ - Test UnderscoreTemplateLinter - """ - - def test_check_underscore_file_is_safe(self): - """ - Test check_underscore_file_is_safe with safe template - """ - linter = UnderscoreTemplateLinter() - results = FileResults('') - - template = textwrap.dedent(""" - <%- gettext('Single Line') %> - - <%- - gettext('Multiple Lines') - %> - """) - - linter.check_underscore_file_is_safe(template, results) - - self.assertEqual(len(results.violations), 0) - - def test_check_underscore_file_is_not_safe(self): - """ - Test check_underscore_file_is_safe with unsafe template - """ - linter = UnderscoreTemplateLinter() - results = FileResults('') - - template = textwrap.dedent(""" - <%= gettext('Single Line') %> - - <%= - gettext('Multiple Lines') - %> - """) - - linter.check_underscore_file_is_safe(template, results) - - self.assertEqual(len(results.violations), 2) - self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped) - self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped) - - @data( - { - 'template': - '<% // xss-lint: disable=underscore-not-escaped %>\n' - '<%= message %>', - 'is_disabled': [True], - }, - { - 'template': - '<% // xss-lint: disable=another-rule,underscore-not-escaped %>\n' - '<%= message %>', - 'is_disabled': [True], - }, - { - 'template': - '<% // xss-lint: disable=another-rule %>\n' - '<%= message %>', - 'is_disabled': [False], - }, - { - 'template': - '<% // xss-lint: disable=underscore-not-escaped %>\n' - '<%= message %>\n' - '<%= message %>', - 'is_disabled': [True, False], - }, - { - 'template': - '// This test does not use proper Underscore.js Template syntax\n' - '// But, it is just testing that a maximum of 5 non-whitespace\n' - '// are used to designate start of line for disabling the next line.\n' - ' 1 2 3 4 5 xss-lint: disable=underscore-not-escaped %>\n' - '<%= message %>\n' - ' 1 2 3 4 5 6 xss-lint: disable=underscore-not-escaped %>\n' - '<%= message %>', - 'is_disabled': [True, False], - }, - { - 'template': - '<%= message %><% // xss-lint: disable=underscore-not-escaped %>\n' - '<%= message %>', - 'is_disabled': [True, False], - }, - { - 'template': - '<%= message %>\n' - '<% // xss-lint: disable=underscore-not-escaped %>', - 'is_disabled': [False], - }, - ) - def test_check_underscore_file_disable_rule(self, data): - """ - Test check_underscore_file_is_safe with various disabled pragmas - """ - linter = UnderscoreTemplateLinter() - results = FileResults('') - - linter.check_underscore_file_is_safe(data['template'], results) - - violation_count = len(data['is_disabled']) - self.assertEqual(len(results.violations), violation_count) - for index in range(0, violation_count - 1): - self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index]) - - def test_check_underscore_file_disables_one_violation(self): - """ - Test check_underscore_file_is_safe with disabled before a line only - disables for the violation following - """ - linter = UnderscoreTemplateLinter() - results = FileResults('') - - template = textwrap.dedent(""" - <% // xss-lint: disable=underscore-not-escaped %> - <%= message %> - <%= message %> - """) - - linter.check_underscore_file_is_safe(template, results) - - self.assertEqual(len(results.violations), 2) - self.assertEqual(results.violations[0].is_disabled, True) - self.assertEqual(results.violations[1].is_disabled, False) - - @data( - {'template': '<%= HtmlUtils.ensureHtml(message) %>'}, - {'template': '<%= _.escape(message) %>'}, - ) - def test_check_underscore_no_escape_allowed(self, data): - """ - Test check_underscore_file_is_safe with expressions that are allowed - without escaping because the internal calls properly escape. - """ - linter = UnderscoreTemplateLinter() - results = FileResults('') - - linter.check_underscore_file_is_safe(data['template'], results) - - self.assertEqual(len(results.violations), 0) - - -@ddt -class TestJavaScriptLinter(TestLinter): - """ - Test JavaScriptLinter - """ - @data( - {'template': 'var m = "Plain text " + message + "plain text"', 'rule': None}, - {'template': 'var m = "檌檒濦 " + message + "plain text"', 'rule': None}, - { - 'template': - ("""$email_header.append($('', type: "button", name: "copy-email-body-text",""" - """ value: gettext("Copy Email To Editor"), id: 'copy_email_' + email_id))"""), - 'rule': None - }, - {'template': 'var m = "

" + message + "

"', 'rule': Rules.javascript_concat_html}, - { - 'template': r'var m = "

\"escaped quote\"" + message + "\"escaped quote\"

"', - 'rule': Rules.javascript_concat_html - }, - {'template': ' // var m = "

" + commentedOutMessage + "

"', 'rule': None}, - {'template': 'var m = "

" + message + "

"', 'rule': Rules.javascript_concat_html}, - {'template': 'var m = "

" + message + " broken string', 'rule': Rules.javascript_concat_html}, - ) - def test_concat_with_html(self, data): - """ - Test check_javascript_file_is_safe with concatenating strings and HTML - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - self._validate_data_rules(data, results) - - @data( - {'template': 'test.append( test.render().el )', 'rule': None}, - {'template': 'test.append(test.render().el)', 'rule': None}, - {'template': 'test.append(test.render().$el)', 'rule': None}, - {'template': 'test.append(testEl)', 'rule': None}, - {'template': 'test.append($test)', 'rule': None}, - # plain text is ok because any & will be escaped, and it stops false - # negatives on some other objects with an append() method - {'template': 'test.append("plain text")', 'rule': None}, - {'template': 'test.append("

")', 'rule': Rules.javascript_jquery_append}, - {'template': 'graph.svg.append("g")', 'rule': None}, - {'template': 'test.append( $( "
" ) )', 'rule': None}, - {'template': 'test.append($("
"))', 'rule': None}, - {'template': 'test.append($("
"))', 'rule': None}, - {'template': 'test.append(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'HtmlUtils.append($el, someHtml)', 'rule': None}, - {'template': 'test.append("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_append}, - {'template': 'test.append("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_append}, - {'template': 'test.append(message)', 'rule': Rules.javascript_jquery_append}, - ) - def test_jquery_append(self, data): - """ - Test check_javascript_file_is_safe with JQuery append() - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - @data( - {'template': 'test.prepend( test.render().el )', 'rule': None}, - {'template': 'test.prepend(test.render().el)', 'rule': None}, - {'template': 'test.prepend(test.render().$el)', 'rule': None}, - {'template': 'test.prepend(testEl)', 'rule': None}, - {'template': 'test.prepend($test)', 'rule': None}, - {'template': 'test.prepend("text")', 'rule': None}, - {'template': 'test.prepend( $( "
" ) )', 'rule': None}, - {'template': 'test.prepend($("
"))', 'rule': None}, - {'template': 'test.prepend($("
"))', 'rule': None}, - {'template': 'test.prepend(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'HtmlUtils.prepend($el, someHtml)', 'rule': None}, - {'template': 'test.prepend("broken string)', 'rule': Rules.javascript_jquery_prepend}, - {'template': 'test.prepend("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_prepend}, - {'template': 'test.prepend("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_prepend}, - {'template': 'test.prepend(message)', 'rule': Rules.javascript_jquery_prepend}, - ) - def test_jquery_prepend(self, data): - """ - Test check_javascript_file_is_safe with JQuery prepend() - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - @data( - {'template': 'test.unwrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.wrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.wrapAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.wrapInner(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.after(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.before(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.replaceAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.replaceWith(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'test.replaceWith(edx.HtmlUtils.HTML(htmlString).toString())', 'rule': None}, - {'template': 'test.unwrap(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.wrap(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.wrapAll(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.wrapInner(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.after(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.before(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.replaceAll(anything)', 'rule': Rules.javascript_jquery_insertion}, - {'template': 'test.replaceWith(anything)', 'rule': Rules.javascript_jquery_insertion}, - ) - def test_jquery_insertion(self, data): - """ - Test check_javascript_file_is_safe with JQuery insertion functions - other than append(), prepend() and html() that take content as an - argument (e.g. before(), after()). - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - @data( - {'template': ' element.parentNode.appendTo(target);', 'rule': None}, - {'template': ' test.render().el.appendTo(target);', 'rule': None}, - {'template': ' test.render().$el.appendTo(target);', 'rule': None}, - {'template': ' test.$element.appendTo(target);', 'rule': None}, - {'template': ' test.testEl.appendTo(target);', 'rule': None}, - {'template': '$element.appendTo(target);', 'rule': None}, - {'template': 'el.appendTo(target);', 'rule': None}, - {'template': 'testEl.appendTo(target);', 'rule': None}, - {'template': 'testEl.prependTo(target);', 'rule': None}, - {'template': 'testEl.insertAfter(target);', 'rule': None}, - {'template': 'testEl.insertBefore(target);', 'rule': None}, - {'template': 'anycall().appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target}, - {'template': 'anything.appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target}, - {'template': 'anything.prependTo(target)', 'rule': Rules.javascript_jquery_insert_into_target}, - {'template': 'anything.insertAfter(target)', 'rule': Rules.javascript_jquery_insert_into_target}, - {'template': 'anything.insertBefore(target)', 'rule': Rules.javascript_jquery_insert_into_target}, - ) - def test_jquery_insert_to_target(self, data): - """ - Test check_javascript_file_is_safe with JQuery insert to target - functions that take a target as an argument, like appendTo() and - prependTo(). - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - @data( - {'template': 'test.html()', 'rule': None}, - {'template': 'test.html( )', 'rule': None}, - {'template': "test.html( '' )", 'rule': None}, - {'template': "test.html('')", 'rule': None}, - {'template': 'test.html("")', 'rule': None}, - {'template': 'test.html(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None}, - {'template': 'HtmlUtils.setHtml($el, someHtml)', 'rule': None}, - {'template': 'test.html("any string")', 'rule': Rules.javascript_jquery_html}, - {'template': 'test.html("broken string)', 'rule': Rules.javascript_jquery_html}, - {'template': 'test.html("檌檒濦")', 'rule': Rules.javascript_jquery_html}, - {'template': 'test.html(anything)', 'rule': Rules.javascript_jquery_html}, - ) - def test_jquery_html(self, data): - """ - Test check_javascript_file_is_safe with JQuery html() - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - self._validate_data_rules(data, results) - - @data( - {'template': 'StringUtils.interpolate()', 'rule': None}, - {'template': 'HtmlUtils.interpolateHtml()', 'rule': None}, - {'template': 'interpolate(anything)', 'rule': Rules.javascript_interpolate}, - ) - def test_javascript_interpolate(self, data): - """ - Test check_javascript_file_is_safe with interpolate() - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - @data( - {'template': '_.escape(message)', 'rule': None}, - {'template': 'anything.escape(message)', 'rule': Rules.javascript_escape}, - ) - def test_javascript_interpolate(self, data): - """ - Test check_javascript_file_is_safe with interpolate() - """ - linter = JavaScriptLinter() - results = FileResults('') - - linter.check_javascript_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - -@ddt -class TestPythonLinter(TestLinter): - """ - Test PythonLinter - """ - @data( - {'template': 'm = "Plain text " + message + "plain text"', 'rule': None}, - {'template': 'm = "檌檒濦 " + message + "plain text"', 'rule': None}, - {'template': ' # m = "

" + commentedOutMessage + "

"', 'rule': None}, - {'template': 'm = "

" + message + "

"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, - {'template': 'm = "

" + message + "

"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]}, - {'template': 'm = "

" + message + " broken string', 'rule': Rules.python_parse_error}, - ) - def test_concat_with_html(self, data): - """ - Test check_python_file_is_safe with concatenating strings and HTML - """ - linter = PythonLinter() - results = FileResults('') - - linter.check_python_file_is_safe(data['template'], results) - - self._validate_data_rules(data, results) - - def test_check_python_expression_display_name(self): - """ - Test _check_python_file_is_safe with display_name_with_default_escaped - fails. - """ - linter = PythonLinter() - results = FileResults('') - - python_file = textwrap.dedent(""" - context = { - 'display_name': self.display_name_with_default_escaped, - } - """) - - linter.check_python_file_is_safe(python_file, results) - - self.assertEqual(len(results.violations), 1) - self.assertEqual(results.violations[0].rule, Rules.python_deprecated_display_name) - - def test_check_custom_escaping(self): - """ - Test _check_python_file_is_safe fails when custom escapins is used. - """ - linter = PythonLinter() - results = FileResults('') - - python_file = textwrap.dedent(""" - msg = mmlans.replace('<', '<') - """) - - linter.check_python_file_is_safe(python_file, results) - - self.assertEqual(len(results.violations), 1) - self.assertEqual(results.violations[0].rule, Rules.python_custom_escape) - - @data( - { - 'python': - textwrap.dedent(""" - msg = Text("Mixed {span_start}text{span_end}").format( - span_start=HTML(""), - span_end=HTML(""), - ) - """), - 'rule': None - }, - { - 'python': - textwrap.dedent(""" - msg = "Mixed {span_start}text{span_end}".format( - span_start=HTML(""), - span_end=HTML(""), - ) - """), - 'rule': Rules.python_requires_html_or_text - }, - { - 'python': - textwrap.dedent(""" - msg = "Mixed {span_start}{text}{span_end}".format( - span_start=HTML(""), - text=Text("This should still break."), - span_end=HTML(""), - ) - """), - 'rule': Rules.python_requires_html_or_text - }, - { - 'python': - textwrap.dedent(""" - msg = Text("Mixed {link_start}text{link_end}".format( - link_start=HTML("").format(url), - link_end=HTML(""), - )) - """), - 'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text] - }, - { - 'python': - textwrap.dedent(""" - msg = Text("Mixed {link_start}text{link_end}").format( - link_start=HTML("".format(url)), - link_end=HTML(""), - ) - """), - 'rule': Rules.python_close_before_format - }, - { - 'python': - textwrap.dedent(""" - msg = Text("Mixed {link_start}text{link_end}".format( - link_start=HTML("".format(HTML(''))), - link_end=HTML(""), - )) - """), - 'rule': - [ - Rules.python_close_before_format, - Rules.python_requires_html_or_text, - Rules.python_close_before_format, - Rules.python_requires_html_or_text - ] - }, - { - 'python': - textwrap.dedent(""" - msg = "Mixed {span_start}text{span_end}".format( - span_start="", - span_end="", - ) - """), - 'rule': [Rules.python_wrap_html, Rules.python_wrap_html] - }, - { - 'python': - textwrap.dedent(""" - msg = Text(_("String with multiple lines " - "{link_start}unenroll{link_end} " - "and final line")).format( - link_start=HTML( - '' - ).format( - # Line comment with ' to throw off parser - course_id=course_overview.id - ), - link_end=HTML(''), - ) - """), - 'rule': None - }, - { - 'python': "msg = ''", - 'rule': None - }, - { - 'python': "msg = HTML('')", - 'rule': None - }, - { - 'python': r"""msg = '\d+)?$'.format(test)""", - 'rule': None - }, - { - 'python': r"""msg = ''.format(self.title) - """), - 'rule': None - }, - { - 'python': r"""msg = '%s

' % message""", - 'rule': Rules.python_interpolate_html - }, - { - 'python': "msg = HTML(''", - 'rule': Rules.python_parse_error - }, - ) - def test_check_python_with_text_and_html(self, data): - """ - Test _check_python_file_is_safe tests for proper use of Text() and - Html(). - - """ - linter = PythonLinter() - results = FileResults('') - - file_content = textwrap.dedent(data['python']) - - linter.check_python_file_is_safe(file_content, results) - - self._validate_data_rules(data, results) - - def test_check_python_with_text_and_html_mixed(self): - """ - Test _check_python_file_is_safe tests for proper use of Text() and - Html() for a Python file with a mix of rules. - - """ - linter = PythonLinter() - results = FileResults('') - - file_content = textwrap.dedent(""" - msg1 = '".format(url)), - link_end="", - ) - msg3 = ' -

{response}

-
- ''').format(response=response_text) - """, - 'rule': Rules.python_wrap_html, - 'start_line': 2, - }, - { - 'python': - """ - def function(self): - ''' - Function comment. - ''' - response_str = textwrap.dedent(''' -
-

{response}

-
- ''').format(response=response_text) - """, - 'rule': Rules.python_wrap_html, - 'start_line': 6, - }, - { - 'python': - """ - def function(self): - ''' - Function comment. - ''' - response_str = '''

{response}

'''.format(response=response_text) - """, - 'rule': Rules.python_wrap_html, - 'start_line': 6, - }, - { - 'python': - """ - def function(self): - ''' - Function comment. - ''' - response_str = textwrap.dedent(''' -
- \"\"\" Do we care about a nested triple quote? \"\"\" -

{response}

-
- ''').format(response=response_text) - """, - 'rule': Rules.python_wrap_html, - 'start_line': 6, - }, - ) - def test_check_python_with_triple_quotes(self, data): - """ - Test _check_python_file_is_safe with triple quotes. - - """ - linter = PythonLinter() - results = FileResults('') - - file_content = textwrap.dedent(data['python']) - - linter.check_python_file_is_safe(file_content, results) - - self._validate_data_rules(data, results) - self.assertEqual(results.violations[0].start_line, data['start_line']) diff --git a/scripts/xsslint/tests/test_main.py b/scripts/xsslint/tests/test_main.py new file mode 100644 index 0000000000..dc99ddb7ca --- /dev/null +++ b/scripts/xsslint/tests/test_main.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +""" +Tests for main.py +""" +import re +import textwrap +from StringIO import StringIO +from unittest import TestCase + +import mock + +from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter +from xsslint.main import _lint +from xsslint.reporting import SummaryResults +from xsslint.rules import Rules + + +class TestXSSLinter(TestCase): + """ + Test some top-level linter functions + """ + + def setUp(self): + """ + Setup patches on linters for testing. + """ + self.patch_is_valid_directory(MakoTemplateLinter) + self.patch_is_valid_directory(JavaScriptLinter) + self.patch_is_valid_directory(UnderscoreTemplateLinter) + self.patch_is_valid_directory(PythonLinter) + + patcher = mock.patch('xsslint.main.is_skip_dir', return_value=False) + patcher.start() + self.addCleanup(patcher.stop) + + def patch_is_valid_directory(self, linter_class): + """ + Creates a mock patch for _is_valid_directory on a Linter to always + return true. This avoids nested patch calls. + + Arguments: + linter_class: The linter class to be patched + """ + patcher = mock.patch.object(linter_class, '_is_valid_directory', return_value=True) + patch_start = patcher.start() + self.addCleanup(patcher.stop) + return patch_start + + def test_lint_defaults(self): + """ + Tests the top-level linting with default options. + """ + out = StringIO() + summary_results = SummaryResults() + + _lint( + 'scripts/xsslint/tests/templates', + template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], + options={ + 'list_files': False, + 'verbose': False, + 'rule_totals': False, + }, + summary_results=summary_results, + out=out, + ) + + output = out.getvalue() + # Assert violation details are displayed. + self.assertIsNotNone(re.search('test\.html.*{}'.format(Rules.mako_missing_default.rule_id), output)) + self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.javascript_concat_html.rule_id), output)) + self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.underscore_not_escaped.rule_id), output)) + self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.javascript_concat_html.rule_id), output)) + self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.underscore_not_escaped.rule_id), output)) + lines_with_rule = 0 + lines_without_rule = 0 # Output with verbose setting only. + for underscore_match in re.finditer('test\.underscore:.*\n', output): + if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None: + lines_with_rule += 1 + else: + lines_without_rule += 1 + self.assertGreaterEqual(lines_with_rule, 1) + self.assertEquals(lines_without_rule, 0) + self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_parse_error.rule_id), output)) + self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) + # Assert no rule totals. + self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) + # Assert final total + self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + + def test_lint_with_verbose(self): + """ + Tests the top-level linting with verbose option. + """ + out = StringIO() + summary_results = SummaryResults() + + _lint( + 'scripts/xsslint/tests/templates', + template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], + options={ + 'list_files': False, + 'verbose': True, + 'rule_totals': False, + }, + summary_results=summary_results, + out=out, + ) + + output = out.getvalue() + lines_with_rule = 0 + lines_without_rule = 0 # Output with verbose setting only. + for underscore_match in re.finditer('test\.underscore:.*\n', output): + if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None: + lines_with_rule += 1 + else: + lines_without_rule += 1 + self.assertGreaterEqual(lines_with_rule, 1) + self.assertGreaterEqual(lines_without_rule, 1) + # Assert no rule totals. + self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) + # Assert final total + self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + + def test_lint_with_rule_totals(self): + """ + Tests the top-level linting with rule totals option. + """ + out = StringIO() + summary_results = SummaryResults() + + _lint( + 'scripts/xsslint/tests/templates', + template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], + options={ + 'list_files': False, + 'verbose': False, + 'rule_totals': True, + }, + summary_results=summary_results, + out=out, + ) + + output = out.getvalue() + self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) + + # Assert totals output. + self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) + self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_wrap_html.rule_id, 1), output)) + self.assertIsNotNone(re.search('{} violations total'.format(7), output)) + + def test_lint_with_list_files(self): + """ + Tests the top-level linting with list files option. + """ + out = StringIO() + summary_results = SummaryResults() + + _lint( + 'scripts/xsslint/tests/templates', + template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()], + options={ + 'list_files': True, + 'verbose': False, + 'rule_totals': False, + }, + summary_results=summary_results, + out=out, + ) + + output = out.getvalue() + # Assert file with rule is not output. + self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output)) + # Assert file is output. + self.assertIsNotNone(re.search('test\.py', output)) + + # Assert no totals. + self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output)) + self.assertIsNone(re.search('{} violations total'.format(7), output)) diff --git a/scripts/xsslint/tests/test_utils.py b/scripts/xsslint/tests/test_utils.py new file mode 100644 index 0000000000..9521590296 --- /dev/null +++ b/scripts/xsslint/tests/test_utils.py @@ -0,0 +1,44 @@ +from unittest import TestCase + +from ddt import data, ddt + +from xsslint.utils import StringLines + + +@ddt +class TestStringLines(TestCase): + """ + Test StringLines class. + """ + @data( + {'string': 'test', 'index': 0, 'line_start_index': 0, 'line_end_index': 4}, + {'string': 'test', 'index': 2, 'line_start_index': 0, 'line_end_index': 4}, + {'string': 'test', 'index': 3, 'line_start_index': 0, 'line_end_index': 4}, + {'string': '\ntest', 'index': 0, 'line_start_index': 0, 'line_end_index': 1}, + {'string': '\ntest', 'index': 2, 'line_start_index': 1, 'line_end_index': 5}, + {'string': '\ntest\n', 'index': 0, 'line_start_index': 0, 'line_end_index': 1}, + {'string': '\ntest\n', 'index': 2, 'line_start_index': 1, 'line_end_index': 6}, + {'string': '\ntest\n', 'index': 6, 'line_start_index': 6, 'line_end_index': 6}, + ) + def test_string_lines_start_end_index(self, data): + """ + Test StringLines index_to_line_start_index and index_to_line_end_index. + """ + lines = StringLines(data['string']) + self.assertEqual(lines.index_to_line_start_index(data['index']), data['line_start_index']) + self.assertEqual(lines.index_to_line_end_index(data['index']), data['line_end_index']) + + @data( + {'string': 'test', 'line_number': 1, 'line': 'test'}, + {'string': '\ntest', 'line_number': 1, 'line': ''}, + {'string': '\ntest', 'line_number': 2, 'line': 'test'}, + {'string': '\ntest\n', 'line_number': 1, 'line': ''}, + {'string': '\ntest\n', 'line_number': 2, 'line': 'test'}, + {'string': '\ntest\n', 'line_number': 3, 'line': ''}, + ) + def test_string_lines_start_end_index(self, data): + """ + Test line_number_to_line. + """ + lines = StringLines(data['string']) + self.assertEqual(lines.line_number_to_line(data['line_number']), data['line']) diff --git a/scripts/xsslint/xss_linter.py b/scripts/xsslint/xss_linter.py new file mode 100755 index 0000000000..a35038c3de --- /dev/null +++ b/scripts/xsslint/xss_linter.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +""" +A linting tool to check for xss vulnerabilities. +""" + + +if __name__ == "__main__": + from xsslint.main import main + main() diff --git a/scripts/xsslint/xsslint/__init__.py b/scripts/xsslint/xsslint/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/xss_linter.py b/scripts/xsslint/xsslint/linters.py old mode 100755 new mode 100644 similarity index 57% rename from scripts/xss_linter.py rename to scripts/xsslint/xsslint/linters.py index 985bc44761..6d32cdb24f --- a/scripts/xss_linter.py +++ b/scripts/xsslint/xsslint/linters.py @@ -1,749 +1,15 @@ -#!/usr/bin/env python """ -A linting tool to check for xss vulnerabilities. +Linter classes containing logic for checking various filetypes. """ -from __future__ import print_function - -import argparse import ast import os import re -import sys import textwrap -from enum import Enum - - -class StringLines(object): - """ - StringLines provides utility methods to work with a string in terms of - lines. As an example, it can convert an index into a line number or column - number (i.e. index into the line). - """ - - def __init__(self, string): - """ - Init method. - - Arguments: - string: The string to work with. - - """ - self._string = string - self._line_start_indexes = self._process_line_breaks(string) - # this is an exclusive index used in the case that the template doesn't - # end with a new line - self.eof_index = len(string) - - def _process_line_breaks(self, string): - """ - Creates a list, where each entry represents the index into the string - where the next line break was found. - - Arguments: - string: The string in which to find line breaks. - - Returns: - A list of indices into the string at which each line begins. - - """ - line_start_indexes = [0] - index = 0 - while True: - index = string.find('\n', index) - if index < 0: - break - index += 1 - line_start_indexes.append(index) - return line_start_indexes - - def get_string(self): - """ - Get the original string. - """ - return self._string - - def index_to_line_number(self, index): - """ - Given an index, determines the line of the index. - - Arguments: - index: The index into the original string for which we want to know - the line number - - Returns: - The line number of the provided index. - - """ - current_line_number = 0 - for line_break_index in self._line_start_indexes: - if line_break_index <= index: - current_line_number += 1 - else: - break - return current_line_number - - def index_to_column_number(self, index): - """ - Gets the column (i.e. index into the line) for the given index into the - original string. - - Arguments: - index: The index into the original string. - - Returns: - The column (i.e. index into the line) for the given index into the - original string. - - """ - start_index = self.index_to_line_start_index(index) - column = index - start_index + 1 - return column - - def index_to_line_start_index(self, index): - """ - Gets the index of the start of the line of the given index. - - Arguments: - index: The index into the original string. - - Returns: - The index of the start of the line of the given index. - - """ - line_number = self.index_to_line_number(index) - return self.line_number_to_start_index(line_number) - - def index_to_line_end_index(self, index): - """ - Gets the index of the end of the line of the given index. - - Arguments: - index: The index into the original string. - - Returns: - The index of the end of the line of the given index. - - """ - line_number = self.index_to_line_number(index) - return self.line_number_to_end_index(line_number) - - def line_number_to_start_index(self, line_number): - """ - Gets the starting index for the provided line number. - - Arguments: - line_number: The line number of the line for which we want to find - the start index. - - Returns: - The starting index for the provided line number. - - """ - return self._line_start_indexes[line_number - 1] - - def line_number_to_end_index(self, line_number): - """ - Gets the ending index for the provided line number. - - Arguments: - line_number: The line number of the line for which we want to find - the end index. - - Returns: - The ending index for the provided line number. - - """ - if line_number < len(self._line_start_indexes): - return self._line_start_indexes[line_number] - else: - # an exclusive index in the case that the file didn't end with a - # newline. - return self.eof_index - - def line_number_to_line(self, line_number): - """ - Gets the line of text designated by the provided line number. - - Arguments: - line_number: The line number of the line we want to find. - - Returns: - The line of text designated by the provided line number. - - """ - start_index = self._line_start_indexes[line_number - 1] - if len(self._line_start_indexes) == line_number: - line = self._string[start_index:] - else: - end_index = self._line_start_indexes[line_number] - line = self._string[start_index:end_index - 1] - return line - - def line_count(self): - """ - Gets the number of lines in the string. - """ - return len(self._line_start_indexes) - - -class Rules(Enum): - """ - An Enum of each rule which the linter will check. - """ - # IMPORTANT: Do not edit without also updating the docs: - # - http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter - mako_missing_default = 'mako-missing-default' - mako_multiple_page_tags = 'mako-multiple-page-tags' - mako_unparseable_expression = 'mako-unparseable-expression' - mako_unwanted_html_filter = 'mako-unwanted-html-filter' - mako_invalid_html_filter = 'mako-invalid-html-filter' - mako_invalid_js_filter = 'mako-invalid-js-filter' - mako_js_missing_quotes = 'mako-js-missing-quotes' - mako_js_html_string = 'mako-js-html-string' - mako_html_entities = 'mako-html-entities' - mako_unknown_context = 'mako-unknown-context' - underscore_not_escaped = 'underscore-not-escaped' - javascript_jquery_append = 'javascript-jquery-append' - javascript_jquery_prepend = 'javascript-jquery-prepend' - javascript_jquery_insertion = 'javascript-jquery-insertion' - javascript_jquery_insert_into_target = 'javascript-jquery-insert-into-target' - javascript_jquery_html = 'javascript-jquery-html' - javascript_concat_html = 'javascript-concat-html' - javascript_escape = 'javascript-escape' - javascript_interpolate = 'javascript-interpolate' - python_concat_html = 'python-concat-html' - python_custom_escape = 'python-custom-escape' - python_deprecated_display_name = 'python-deprecated-display-name' - python_requires_html_or_text = 'python-requires-html-or-text' - python_close_before_format = 'python-close-before-format' - python_wrap_html = 'python-wrap-html' - python_interpolate_html = 'python-interpolate-html' - python_parse_error = 'python-parse-error' - - def __init__(self, rule_id): - self.rule_id = rule_id - - -class Expression(object): - """ - Represents an arbitrary expression. - - An expression can be any type of code snippet. It will sometimes have a - starting and ending delimiter, but not always. - - Here are some example expressions:: - - ${x | n, decode.utf8} - <%= x %> - function(x) - "

" + message + "

" - - Other details of note: - - Only a start_index is required for a valid expression. - - If end_index is None, it means we couldn't parse the rest of the - expression. - - All other details of the expression are optional, and are only added if - and when supplied and needed for additional checks. They are not necessary - for the final results output. - - """ - - def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None): - """ - Init method. - - Arguments: - start_index: the starting index of the expression - end_index: the index immediately following the expression, or None - if the expression was unparseable - template: optional template code in which the expression was found - start_delim: optional starting delimiter of the expression - end_delim: optional ending delimeter of the expression - strings: optional list of ParseStrings - - """ - self.start_index = start_index - self.end_index = end_index - self.start_delim = start_delim - self.end_delim = end_delim - self.strings = strings - if template is not None and self.end_index is not None: - self.expression = template[start_index:end_index] - self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip() - else: - self.expression = None - self.expression_inner = None - - -class RuleViolation(object): - """ - Base class representing a rule violation which can be used for reporting. - """ - - def __init__(self, rule): - """ - Init method. - - Arguments: - rule: The Rule which was violated. - - """ - self.rule = rule - self.full_path = '' - self.is_disabled = False - - def _mark_disabled(self, string, scope_start_string=False): - """ - Performs the disable pragma search and marks the rule as disabled if a - matching pragma is found. - - Pragma format:: - - xss-lint: disable=violation-name,other-violation-name - - Arguments: - string: The string of code in which to search for the pragma. - scope_start_string: True if the pragma must be at the start of the - string, False otherwise. The pragma is considered at the start - of the string if it has a maximum of 5 non-whitespace characters - preceding it. - - Side Effect: - Sets self.is_disabled as appropriate based on whether the pragma is - found. - - """ - pragma_match = re.search(r'xss-lint:\s*disable=([a-zA-Z,\- ]+)', string) - if pragma_match is None: - return - if scope_start_string: - spaces_count = string.count(' ', 0, pragma_match.start()) - non_space_count = pragma_match.start() - spaces_count - if non_space_count > 5: - return - - for disabled_rule in pragma_match.group(1).split(','): - if disabled_rule.strip() == self.rule.rule_id: - self.is_disabled = True - return - - def sort_key(self): - """ - Returns a key that can be sorted on - """ - return (0, 0, self.rule.rule_id) - - def first_line(self): - """ - Since a file level rule has no first line, returns empty string. - """ - return '' - - def prepare_results(self, full_path, string_lines): - """ - Preps this instance for results reporting. - - Arguments: - full_path: Path of the file in violation. - string_lines: A StringLines containing the contents of the file in - violation. - - """ - self.full_path = full_path - self._mark_disabled(string_lines.get_string()) - - def print_results(self, _options, out): - """ - Prints the results represented by this rule violation. - - Arguments: - _options: ignored - out: output file - """ - print("{}: {}".format(self.full_path, self.rule.rule_id), file=out) - - -class ExpressionRuleViolation(RuleViolation): - """ - A class representing a particular rule violation for expressions which - contain more specific details of the location of the violation for reporting - purposes. - - """ - - def __init__(self, rule, expression): - """ - Init method. - - Arguments: - rule: The Rule which was violated. - expression: The Expression that was in violation. - - """ - super(ExpressionRuleViolation, self).__init__(rule) - self.expression = expression - self.start_line = 0 - self.start_column = 0 - self.end_line = 0 - self.end_column = 0 - self.lines = [] - self.is_disabled = False - - def _mark_expression_disabled(self, string_lines): - """ - Marks the expression violation as disabled if it finds the disable - pragma anywhere on the first line of the violation, or at the start of - the line preceding the violation. - - Pragma format:: - - xss-lint: disable=violation-name,other-violation-name - - Examples:: - - <% // xss-lint: disable=underscore-not-escaped %> - <%= gettext('Single Line') %> - - <%= gettext('Single Line') %><% // xss-lint: disable=underscore-not-escaped %> - - Arguments: - string_lines: A StringLines containing the contents of the file in - violation. - - Side Effect: - Sets self.is_disabled as appropriate based on whether the pragma is - found. - - """ - # disable pragma can be at the start of the preceding line - has_previous_line = self.start_line > 1 - if has_previous_line: - line_to_check = string_lines.line_number_to_line(self.start_line - 1) - self._mark_disabled(line_to_check, scope_start_string=True) - if self.is_disabled: - return - - # TODO: this should work at end of any line of the violation - # disable pragma can be anywhere on the first line of the violation - line_to_check = string_lines.line_number_to_line(self.start_line) - self._mark_disabled(line_to_check, scope_start_string=False) - - def sort_key(self): - """ - Returns a key that can be sorted on - """ - return (self.start_line, self.start_column, self.rule.rule_id) - - def first_line(self): - """ - Returns the initial line of code of the violation. - """ - return self.lines[0] - - def prepare_results(self, full_path, string_lines): - """ - Preps this instance for results reporting. - - Arguments: - full_path: Path of the file in violation. - string_lines: A StringLines containing the contents of the file in - violation. - - """ - self.full_path = full_path - start_index = self.expression.start_index - self.start_line = string_lines.index_to_line_number(start_index) - self.start_column = string_lines.index_to_column_number(start_index) - end_index = self.expression.end_index - if end_index is not None: - self.end_line = string_lines.index_to_line_number(end_index) - self.end_column = string_lines.index_to_column_number(end_index) - else: - self.end_line = self.start_line - self.end_column = '?' - for line_number in range(self.start_line, self.end_line + 1): - self.lines.append(string_lines.line_number_to_line(line_number)) - self._mark_expression_disabled(string_lines) - - def print_results(self, options, out): - """ - Prints the results represented by this rule violation. - - Arguments: - options: A list of the following options: - list_files: True to print only file names, and False to print - all violations. - verbose: True for multiple lines of context, False single line. - out: output file - - """ - if options['verbose']: - end_line = self.end_line + 1 - else: - end_line = self.start_line + 1 - for line_number in range(self.start_line, end_line): - if line_number == self.start_line: - column = self.start_column - rule_id = self.rule.rule_id + ":" - else: - column = 1 - rule_id = " " * (len(self.rule.rule_id) + 1) - line = self.lines[line_number - self.start_line].encode(encoding='utf-8') - print("{}: {}:{}: {} {}".format( - self.full_path, - line_number, - column, - rule_id, - line - ), file=out) - - -class SummaryResults(object): - """ - Contains the summary results for all violations. - """ - - def __init__(self): - """ - Init method. - """ - self.total_violations = 0 - self.totals_by_rule = dict.fromkeys( - [rule.rule_id for rule in Rules.__members__.values()], 0 - ) - - def add_violation(self, violation): - """ - Adds a violation to the summary details. - - Arguments: - violation: The violation to add to the summary. - - """ - self.total_violations += 1 - self.totals_by_rule[violation.rule.rule_id] += 1 - - def print_results(self, options, out): - """ - Prints the results (i.e. violations) in this file. - - Arguments: - options: A list of the following options: - list_files: True to print only file names, and False to print - all violations. - rule_totals: If True include totals by rule. - out: output file - - """ - if options['list_files'] is False: - if options['rule_totals']: - max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule) - print("", file=out) - for rule_id in sorted(self.totals_by_rule.keys()): - padding = " " * (max_rule_id_len - len(rule_id)) - print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out) - print("", file=out) - - # matches output of eslint for simplicity - print("", file=out) - print("{} violations total".format(self.total_violations), file=out) - - -class FileResults(object): - """ - Contains the results, or violations, for a file. - """ - - def __init__(self, full_path): - """ - Init method. - - Arguments: - full_path: The full path for this file. - - """ - self.full_path = full_path - self.directory = os.path.dirname(full_path) - self.is_file = os.path.isfile(full_path) - self.violations = [] - - def prepare_results(self, file_string, line_comment_delim=None): - """ - Prepares the results for output for this file. - - Arguments: - file_string: The string of content for this file. - line_comment_delim: A string representing the start of a line - comment. For example "##" for Mako and "//" for JavaScript. - - """ - string_lines = StringLines(file_string) - for violation in self.violations: - violation.prepare_results(self.full_path, string_lines) - if line_comment_delim is not None: - self._filter_commented_code(line_comment_delim) - - def print_results(self, options, summary_results, out): - """ - Prints the results (i.e. violations) in this file. - - Arguments: - options: A list of the following options: - list_files: True to print only file names, and False to print - all violations. - summary_results: A SummaryResults with a summary of the violations. - verbose: True for multiple lines of context, False single line. - out: output file - - Side effect: - Updates the passed SummaryResults. - - """ - if options['list_files']: - if self.violations is not None and 0 < len(self.violations): - print(self.full_path, file=out) - else: - self.violations.sort(key=lambda violation: violation.sort_key()) - for violation in self.violations: - if not violation.is_disabled: - violation.print_results(options, out) - summary_results.add_violation(violation) - - def _filter_commented_code(self, line_comment_delim): - """ - Remove any violations that were found in commented out code. - - Arguments: - line_comment_delim: A string representing the start of a line - comment. For example "##" for Mako and "//" for JavaScript. - - """ - self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)] - - def _is_commented(self, violation, line_comment_delim): - """ - Checks if violation line is commented out. - - Arguments: - violation: The violation to check - line_comment_delim: A string representing the start of a line - comment. For example "##" for Mako and "//" for JavaScript. - - Returns: - True if the first line of the violation is actually commented out, - False otherwise. - """ - if 'parse' in violation.rule.rule_id: - # For parse rules, don't filter them because the comment could be a - # part of the parse issue to begin with. - return False - else: - return violation.first_line().lstrip().startswith(line_comment_delim) - - -class ParseString(object): - """ - ParseString is the result of parsing a string out of a template. - - A ParseString has the following attributes: - start_index: The index of the first quote, or None if none found - end_index: The index following the closing quote, or None if - unparseable - quote_length: The length of the quote. Could be 3 for a Python - triple quote. Or None if none found. - string: the text of the parsed string, or None if none found. - string_inner: the text inside the quotes of the parsed string, or None - if none found. - - """ - - def __init__(self, template, start_index, end_index): - """ - Init method. - - Arguments: - template: The template to be searched. - start_index: The start index to search. - end_index: The end index to search before. - - """ - self.end_index = None - self.quote_length = None - self.string = None - self.string_inner = None - self.start_index = self._find_string_start(template, start_index, end_index) - if self.start_index is not None: - result = self._parse_string(template, self.start_index) - if result is not None: - self.end_index = result['end_index'] - self.quote_length = result['quote_length'] - self.string = result['string'] - self.string_inner = result['string_inner'] - - def _find_string_start(self, template, start_index, end_index): - """ - Finds the index of the end of start of a string. In other words, the - first single or double quote. - - Arguments: - template: The template to be searched. - start_index: The start index to search. - end_index: The end index to search before. - - Returns: - The start index of the first single or double quote, or None if no - quote was found. - """ - quote_regex = re.compile(r"""['"]""") - start_match = quote_regex.search(template, start_index, end_index) - if start_match is None: - return None - else: - return start_match.start() - - def _parse_string(self, template, start_index): - """ - Finds the indices of a string inside a template. - - Arguments: - template: The template to be searched. - start_index: The start index of the open quote. - - Returns: - A dict containing the following, or None if not parseable: - end_index: The index following the closing quote - quote_length: The length of the quote. Could be 3 for a Python - triple quote. - string: the text of the parsed string - string_inner: the text inside the quotes of the parsed string - - """ - quote = template[start_index] - if quote not in ["'", '"']: - raise ValueError("start_index must refer to a single or double quote.") - triple_quote = quote * 3 - if template.startswith(triple_quote, start_index): - quote = triple_quote - - next_start_index = start_index + len(quote) - while True: - quote_end_index = template.find(quote, next_start_index) - backslash_index = template.find("\\", next_start_index) - if quote_end_index < 0: - return None - if 0 <= backslash_index < quote_end_index: - next_start_index = backslash_index + 2 - else: - end_index = quote_end_index + len(quote) - quote_length = len(quote) - string = template[start_index:end_index] - return { - 'end_index': end_index, - 'quote_length': quote_length, - 'string': string, - 'string_inner': string[quote_length:-quote_length], - } +from xsslint.reporting import ExpressionRuleViolation, FileResults, RuleViolation +from xsslint.rules import Rules +from xsslint.utils import SKIP_DIRS, Expression, ParseString, StringLines, is_skip_dir +from xsslint.visitors import AllNodeVisitor, HtmlStringVisitor, OuterFormatVisitor class BaseLinter(object): @@ -1419,328 +685,6 @@ class JavaScriptLinter(BaseLinter): )) -class BaseVisitor(ast.NodeVisitor): - """ - Base class for AST NodeVisitor used for Python xss linting. - - Important: This base visitor skips all __repr__ function definitions. - """ - def __init__(self, file_contents, results): - """ - Init method. - - Arguments: - file_contents: The contents of the Python file. - results: A file results objects to which violations will be added. - - """ - super(BaseVisitor, self).__init__() - self.file_contents = file_contents - self.lines = StringLines(self.file_contents) - self.results = results - - def node_to_expression(self, node): - """ - Takes a node and translates it to an expression to be used with - violations. - - Arguments: - node: An AST node. - - """ - line_start_index = self.lines.line_number_to_start_index(node.lineno) - start_index = line_start_index + node.col_offset - if isinstance(node, ast.Str): - # Triple quotes give col_offset of -1 on the last line of the string. - if node.col_offset == -1: - triple_quote_regex = re.compile("""['"]{3}""") - end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index) - open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start()) - if open_quote_index > 0: - start_index = open_quote_index - else: - # If we can't find a starting quote, let's assume that what - # we considered the end quote is really the start quote. - start_index = end_triple_quote_match.start() - string = ParseString(self.file_contents, start_index, len(self.file_contents)) - return Expression(string.start_index, string.end_index) - else: - return Expression(start_index) - - def visit_FunctionDef(self, node): - """ - Skips processing of __repr__ functions, since these sometimes use '<' - for non-HTML purposes. - - Arguments: - node: An AST node. - """ - if node.name != '__repr__': - self.generic_visit(node) - - -class HtmlStringVisitor(BaseVisitor): - """ - Checks for strings that contain HTML. Assumes any string with < or > is - considered potential HTML. - - To be used only with strings in context of format or concat. - - """ - def __init__(self, file_contents, results, skip_wrapped_html=False): - """ - Init function. - - Arguments: - file_contents: The contents of the Python file. - results: A file results objects to which violations will be added. - skip_wrapped_html: True if visitor should skip strings wrapped with - HTML() or Text(), and False otherwise. - """ - super(HtmlStringVisitor, self).__init__(file_contents, results) - self.skip_wrapped_html = skip_wrapped_html - self.unsafe_html_string_nodes = [] - self.over_escaped_entity_string_nodes = [] - self.has_text_or_html_call = False - - def visit_Str(self, node): - """ - When strings are visited, checks if it contains HTML. - - Arguments: - node: An AST node. - """ - # Skips '<' (and '>') in regex named groups. For example, "(?P)". - if re.search('[(][?]P<', node.s) is None and re.search('<', node.s) is not None: - self.unsafe_html_string_nodes.append(node) - if re.search(r"&[#]?[a-zA-Z0-9]+;", node.s): - self.over_escaped_entity_string_nodes.append(node) - - def visit_Call(self, node): - """ - Skips processing of string contained inside HTML() and Text() calls when - skip_wrapped_html is True. - - Arguments: - node: An AST node. - - """ - is_html_or_text_call = isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text'] - if self.skip_wrapped_html and is_html_or_text_call: - self.has_text_or_html_call = True - else: - self.generic_visit(node) - - -class ContainsFormatVisitor(BaseVisitor): - """ - Checks if there are any nested format() calls. - - This visitor is meant to be called on HTML() and Text() ast.Call nodes to - search for any illegal nested format() calls. - - """ - def __init__(self, file_contents, results): - """ - Init function. - - Arguments: - file_contents: The contents of the Python file. - results: A file results objects to which violations will be added. - - """ - super(ContainsFormatVisitor, self).__init__(file_contents, results) - self.contains_format_call = False - - def visit_Attribute(self, node): - """ - Simple check for format calls (attribute). - - Arguments: - node: An AST node. - - """ - # Attribute(expr value, identifier attr, expr_context ctx) - if node.attr == 'format': - self.contains_format_call = True - else: - self.generic_visit(node) - - -class FormatInterpolateVisitor(BaseVisitor): - """ - Checks if format() interpolates any HTML() or Text() calls. In other words, - are Text() or HTML() calls nested inside the call to format(). - - This visitor is meant to be called on a format() attribute node. - - """ - def __init__(self, file_contents, results): - """ - Init function. - - Arguments: - file_contents: The contents of the Python file. - results: A file results objects to which violations will be added. - - """ - super(FormatInterpolateVisitor, self).__init__(file_contents, results) - self.interpolates_text_or_html = False - self.format_caller_node = None - - def visit_Call(self, node): - """ - Checks all calls. Remembers the caller of the initial format() call, or - in other words, the left-hand side of the call. Also tracks if HTML() - or Text() calls were seen. - - Arguments: - node: The AST root node. - - """ - if isinstance(node.func, ast.Attribute) and node.func.attr is 'format': - if self.format_caller_node is None: - # Store the caller, or left-hand-side node of the initial - # format() call. - self.format_caller_node = node.func.value - elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']: - # found Text() or HTML() call in arguments passed to format() - self.interpolates_text_or_html = True - self.generic_visit(node) - - def generic_visit(self, node): - """ - Determines whether or not to continue to visit nodes according to the - following rules: - - Once a Text() or HTML() call has been found, stop visiting more nodes. - - Skip the caller of the outer-most format() call, or in other words, - the left-hand side of the call. - - Arguments: - node: The AST root node. - - """ - if self.interpolates_text_or_html is False: - if self.format_caller_node is not node: - super(FormatInterpolateVisitor, self).generic_visit(node) - - -class OuterFormatVisitor(BaseVisitor): - """ - Only visits outer most Python format() calls. These checks are not repeated - for any nested format() calls. - - This visitor is meant to be used once from the root. - - """ - def visit_Call(self, node): - """ - Checks that format() calls which contain HTML() or Text() use HTML() or - Text() as the caller. In other words, Text() or HTML() must be used - before format() for any arguments to format() that contain HTML() or - Text(). - - Arguments: - node: An AST node. - """ - if isinstance(node.func, ast.Attribute) and node.func.attr == 'format': - visitor = HtmlStringVisitor(self.file_contents, self.results, True) - visitor.visit(node) - for unsafe_html_string_node in visitor.unsafe_html_string_nodes: - self.results.violations.append(ExpressionRuleViolation( - Rules.python_wrap_html, self.node_to_expression(unsafe_html_string_node) - )) - # Do not continue processing child nodes of this format() node. - else: - self.generic_visit(node) - - -class AllNodeVisitor(BaseVisitor): - """ - Visits all nodes and does not interfere with calls to generic_visit(). This - is used in conjunction with other visitors to check for a variety of - violations. - - This visitor is meant to be used once from the root. - - """ - - def visit_Attribute(self, node): - """ - Checks for uses of deprecated `display_name_with_default_escaped`. - - Arguments: - node: An AST node. - """ - if node.attr == 'display_name_with_default_escaped': - self.results.violations.append(ExpressionRuleViolation( - Rules.python_deprecated_display_name, self.node_to_expression(node) - )) - self.generic_visit(node) - - def visit_Call(self, node): - """ - Checks for a variety of violations: - - Checks that format() calls with nested HTML() or Text() calls use - HTML() or Text() on the left-hand side. - - For each HTML() and Text() call, calls into separate visitor to check - for inner format() calls. - - Arguments: - node: An AST node. - - """ - if isinstance(node.func, ast.Attribute) and node.func.attr == 'format': - visitor = FormatInterpolateVisitor(self.file_contents, self.results) - visitor.visit(node) - if visitor.interpolates_text_or_html: - format_caller = node.func.value - is_caller_html_or_text = isinstance(format_caller, ast.Call) and \ - isinstance(format_caller.func, ast.Name) and \ - format_caller.func.id in ['Text', 'HTML'] - # If format call has nested Text() or HTML(), then the caller, - # or left-hand-side of the format() call, must be a call to - # Text() or HTML(). - if is_caller_html_or_text is False: - self.results.violations.append(ExpressionRuleViolation( - Rules.python_requires_html_or_text, self.node_to_expression(node.func) - )) - elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']: - visitor = ContainsFormatVisitor(self.file_contents, self.results) - visitor.visit(node) - if visitor.contains_format_call: - self.results.violations.append(ExpressionRuleViolation( - Rules.python_close_before_format, self.node_to_expression(node.func) - )) - - self.generic_visit(node) - - def visit_BinOp(self, node): - """ - Checks for concat using '+' and interpolation using '%' with strings - containing HTML. - - """ - rule = None - if isinstance(node.op, ast.Mod): - rule = Rules.python_interpolate_html - elif isinstance(node.op, ast.Add): - rule = Rules.python_concat_html - if rule is not None: - visitor = HtmlStringVisitor(self.file_contents, self.results) - visitor.visit(node.left) - has_illegal_html_string = len(visitor.unsafe_html_string_nodes) > 0 - # Create new visitor to clear state. - visitor = HtmlStringVisitor(self.file_contents, self.results) - visitor.visit(node.right) - has_illegal_html_string = has_illegal_html_string or len(visitor.unsafe_html_string_nodes) > 0 - if has_illegal_html_string: - self.results.violations.append(ExpressionRuleViolation( - rule, self.node_to_expression(node) - )) - self.generic_visit(node) - - class PythonLinter(BaseLinter): """ The linter for Python files. @@ -2494,172 +1438,3 @@ class MakoTemplateLinter(BaseLinter): start_index = expression.end_index expressions.append(expression) return expressions - - -SKIP_DIRS = ( - '.git', - '.pycharm_helpers', - 'common/static/xmodule/modules', - 'common/static/bundles', - 'perf_tests', - 'node_modules', - 'reports/diff_quality', - 'scripts/tests/templates', - 'spec', - 'test_root', - 'vendor', -) - - -def is_skip_dir(skip_dirs, directory): - """ - Determines whether a directory should be skipped or linted. - - Arguments: - skip_dirs: The configured directories to be skipped. - directory: The current directory to be tested. - - Returns: - True if the directory should be skipped, and False otherwise. - - """ - for skip_dir in skip_dirs: - skip_dir_regex = re.compile( - "(.*/)*{}(/.*)*".format(re.escape(skip_dir))) - if skip_dir_regex.match(directory) is not None: - return True - return False - - -def _process_file(full_path, template_linters, options, summary_results, out): - """ - For each linter, lints the provided file. This means finding and printing - violations. - - Arguments: - full_path: The full path of the file to lint. - template_linters: A list of linting objects. - options: A list of the options. - summary_results: A SummaryResults with a summary of the violations. - out: output file - - """ - num_violations = 0 - directory = os.path.dirname(full_path) - file_name = os.path.basename(full_path) - for template_linter in template_linters: - results = template_linter.process_file(directory, file_name) - results.print_results(options, summary_results, out) - - -def _process_os_dir(directory, files, template_linters, options, summary_results, out): - """ - Calls out to lint each file in the passed list of files. - - Arguments: - directory: Directory being linted. - files: All files in the directory to be linted. - template_linters: A list of linting objects. - options: A list of the options. - summary_results: A SummaryResults with a summary of the violations. - out: output file - - """ - for current_file in sorted(files, key=lambda s: s.lower()): - full_path = os.path.join(directory, current_file) - _process_file(full_path, template_linters, options, summary_results, out) - - -def _process_os_dirs(starting_dir, template_linters, options, summary_results, out): - """ - For each linter, lints all the directories in the starting directory. - - Arguments: - starting_dir: The initial directory to begin the walk. - template_linters: A list of linting objects. - options: A list of the options. - summary_results: A SummaryResults with a summary of the violations. - out: output file - - """ - for root, dirs, files in os.walk(starting_dir): - if is_skip_dir(SKIP_DIRS, root): - del dirs - continue - dirs.sort(key=lambda s: s.lower()) - _process_os_dir(root, files, template_linters, options, summary_results, out) - - -def _lint(file_or_dir, template_linters, options, summary_results, out): - """ - For each linter, lints the provided file or directory. - - Arguments: - file_or_dir: The file or initial directory to lint. - template_linters: A list of linting objects. - options: A list of the options. - summary_results: A SummaryResults with a summary of the violations. - out: output file - - """ - - if file_or_dir is not None and os.path.isfile(file_or_dir): - _process_file(file_or_dir, template_linters, options, summary_results, out) - else: - directory = "." - if file_or_dir is not None: - if os.path.exists(file_or_dir): - directory = file_or_dir - else: - raise ValueError("Path [{}] is not a valid file or directory.".format(file_or_dir)) - _process_os_dirs(directory, template_linters, options, summary_results, out) - - summary_results.print_results(options, out) - - -def main(): - """ - Used to execute the linter. Use --help option for help. - - Prints all violations. - """ - epilog = "For more help using the xss linter, including details on how to\n" - epilog += "understand and fix any violations, read the docs here:\n" - epilog += "\n" - # pylint: disable=line-too-long - epilog += " http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter\n" - - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description='Checks that templates are safe.', - epilog=epilog, - ) - parser.add_argument( - '--list-files', dest='list_files', action='store_true', - help='Only display the filenames that contain violations.' - ) - parser.add_argument( - '--rule-totals', dest='rule_totals', action='store_true', - help='Display the totals for each rule.' - ) - parser.add_argument( - '--verbose', dest='verbose', action='store_true', - help='Print multiple lines where possible for additional context of violations.' - ) - parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.') - - args = parser.parse_args() - - options = { - 'list_files': args.list_files, - 'rule_totals': args.rule_totals, - 'verbose': args.verbose, - } - template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()] - summary_results = SummaryResults() - - _lint(args.path, template_linters, options, summary_results, out=sys.stdout) - - -if __name__ == "__main__": - main() diff --git a/scripts/xsslint/xsslint/main.py b/scripts/xsslint/xsslint/main.py new file mode 100644 index 0000000000..fd133759fc --- /dev/null +++ b/scripts/xsslint/xsslint/main.py @@ -0,0 +1,140 @@ +""" +The main function for the XSS linter. +""" +import argparse +import os +import sys + +from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter +from xsslint.reporting import SummaryResults +from xsslint.utils import SKIP_DIRS, is_skip_dir + + +def _process_file(full_path, template_linters, options, summary_results, out): + """ + For each linter, lints the provided file. This means finding and printing + violations. + + Arguments: + full_path: The full path of the file to lint. + template_linters: A list of linting objects. + options: A list of the options. + summary_results: A SummaryResults with a summary of the violations. + out: output file + + """ + num_violations = 0 + directory = os.path.dirname(full_path) + file_name = os.path.basename(full_path) + for template_linter in template_linters: + results = template_linter.process_file(directory, file_name) + results.print_results(options, summary_results, out) + + +def _process_os_dir(directory, files, template_linters, options, summary_results, out): + """ + Calls out to lint each file in the passed list of files. + + Arguments: + directory: Directory being linted. + files: All files in the directory to be linted. + template_linters: A list of linting objects. + options: A list of the options. + summary_results: A SummaryResults with a summary of the violations. + out: output file + + """ + for current_file in sorted(files, key=lambda s: s.lower()): + full_path = os.path.join(directory, current_file) + _process_file(full_path, template_linters, options, summary_results, out) + + +def _process_os_dirs(starting_dir, template_linters, options, summary_results, out): + """ + For each linter, lints all the directories in the starting directory. + + Arguments: + starting_dir: The initial directory to begin the walk. + template_linters: A list of linting objects. + options: A list of the options. + summary_results: A SummaryResults with a summary of the violations. + out: output file + + """ + for root, dirs, files in os.walk(starting_dir): + if is_skip_dir(SKIP_DIRS, root): + del dirs + continue + dirs.sort(key=lambda s: s.lower()) + _process_os_dir(root, files, template_linters, options, summary_results, out) + + +def _lint(file_or_dir, template_linters, options, summary_results, out): + """ + For each linter, lints the provided file or directory. + + Arguments: + file_or_dir: The file or initial directory to lint. + template_linters: A list of linting objects. + options: A list of the options. + summary_results: A SummaryResults with a summary of the violations. + out: output file + + """ + + if file_or_dir is not None and os.path.isfile(file_or_dir): + _process_file(file_or_dir, template_linters, options, summary_results, out) + else: + directory = "." + if file_or_dir is not None: + if os.path.exists(file_or_dir): + directory = file_or_dir + else: + raise ValueError("Path [{}] is not a valid file or directory.".format(file_or_dir)) + _process_os_dirs(directory, template_linters, options, summary_results, out) + + summary_results.print_results(options, out) + + +def main(): + """ + Used to execute the linter. Use --help option for help. + + Prints all violations. + """ + epilog = "For more help using the xss linter, including details on how to\n" + epilog += "understand and fix any violations, read the docs here:\n" + epilog += "\n" + # pylint: disable=line-too-long + epilog += " http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter\n" + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='Checks that templates are safe.', + epilog=epilog, + ) + parser.add_argument( + '--list-files', dest='list_files', action='store_true', + help='Only display the filenames that contain violations.' + ) + parser.add_argument( + '--rule-totals', dest='rule_totals', action='store_true', + help='Display the totals for each rule.' + ) + parser.add_argument( + '--verbose', dest='verbose', action='store_true', + help='Print multiple lines where possible for additional context of violations.' + ) + parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.') + + args = parser.parse_args() + + options = { + 'list_files': args.list_files, + 'rule_totals': args.rule_totals, + 'verbose': args.verbose, + } + template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()] + summary_results = SummaryResults() + + _lint(args.path, template_linters, options, summary_results, out=sys.stdout) diff --git a/scripts/xsslint/xsslint/reporting.py b/scripts/xsslint/xsslint/reporting.py new file mode 100644 index 0000000000..1db02d29b4 --- /dev/null +++ b/scripts/xsslint/xsslint/reporting.py @@ -0,0 +1,376 @@ +""" +Utility classes for reporting linter results. +""" +from __future__ import print_function + +import os +import re + +from xsslint.rules import Rules +from xsslint.utils import StringLines + + +class RuleViolation(object): + """ + Base class representing a rule violation which can be used for reporting. + """ + + def __init__(self, rule): + """ + Init method. + + Arguments: + rule: The Rule which was violated. + + """ + self.rule = rule + self.full_path = '' + self.is_disabled = False + + def _mark_disabled(self, string, scope_start_string=False): + """ + Performs the disable pragma search and marks the rule as disabled if a + matching pragma is found. + + Pragma format:: + + xss-lint: disable=violation-name,other-violation-name + + Arguments: + string: The string of code in which to search for the pragma. + scope_start_string: True if the pragma must be at the start of the + string, False otherwise. The pragma is considered at the start + of the string if it has a maximum of 5 non-whitespace characters + preceding it. + + Side Effect: + Sets self.is_disabled as appropriate based on whether the pragma is + found. + + """ + pragma_match = re.search(r'xss-lint:\s*disable=([a-zA-Z,\- ]+)', string) + if pragma_match is None: + return + if scope_start_string: + spaces_count = string.count(' ', 0, pragma_match.start()) + non_space_count = pragma_match.start() - spaces_count + if non_space_count > 5: + return + + for disabled_rule in pragma_match.group(1).split(','): + if disabled_rule.strip() == self.rule.rule_id: + self.is_disabled = True + return + + def sort_key(self): + """ + Returns a key that can be sorted on + """ + return (0, 0, self.rule.rule_id) + + def first_line(self): + """ + Since a file level rule has no first line, returns empty string. + """ + return '' + + def prepare_results(self, full_path, string_lines): + """ + Preps this instance for results reporting. + + Arguments: + full_path: Path of the file in violation. + string_lines: A StringLines containing the contents of the file in + violation. + + """ + self.full_path = full_path + self._mark_disabled(string_lines.get_string()) + + def print_results(self, _options, out): + """ + Prints the results represented by this rule violation. + + Arguments: + _options: ignored + out: output file + """ + print("{}: {}".format(self.full_path, self.rule.rule_id), file=out) + + +class ExpressionRuleViolation(RuleViolation): + """ + A class representing a particular rule violation for expressions which + contain more specific details of the location of the violation for reporting + purposes. + + """ + + def __init__(self, rule, expression): + """ + Init method. + + Arguments: + rule: The Rule which was violated. + expression: The Expression that was in violation. + + """ + super(ExpressionRuleViolation, self).__init__(rule) + self.expression = expression + self.start_line = 0 + self.start_column = 0 + self.end_line = 0 + self.end_column = 0 + self.lines = [] + self.is_disabled = False + + def _mark_expression_disabled(self, string_lines): + """ + Marks the expression violation as disabled if it finds the disable + pragma anywhere on the first line of the violation, or at the start of + the line preceding the violation. + + Pragma format:: + + xss-lint: disable=violation-name,other-violation-name + + Examples:: + + <% // xss-lint: disable=underscore-not-escaped %> + <%= gettext('Single Line') %> + + <%= gettext('Single Line') %><% // xss-lint: disable=underscore-not-escaped %> + + Arguments: + string_lines: A StringLines containing the contents of the file in + violation. + + Side Effect: + Sets self.is_disabled as appropriate based on whether the pragma is + found. + + """ + # disable pragma can be at the start of the preceding line + has_previous_line = self.start_line > 1 + if has_previous_line: + line_to_check = string_lines.line_number_to_line(self.start_line - 1) + self._mark_disabled(line_to_check, scope_start_string=True) + if self.is_disabled: + return + + # TODO: this should work at end of any line of the violation + # disable pragma can be anywhere on the first line of the violation + line_to_check = string_lines.line_number_to_line(self.start_line) + self._mark_disabled(line_to_check, scope_start_string=False) + + def sort_key(self): + """ + Returns a key that can be sorted on + """ + return (self.start_line, self.start_column, self.rule.rule_id) + + def first_line(self): + """ + Returns the initial line of code of the violation. + """ + return self.lines[0] + + def prepare_results(self, full_path, string_lines): + """ + Preps this instance for results reporting. + + Arguments: + full_path: Path of the file in violation. + string_lines: A StringLines containing the contents of the file in + violation. + + """ + self.full_path = full_path + start_index = self.expression.start_index + self.start_line = string_lines.index_to_line_number(start_index) + self.start_column = string_lines.index_to_column_number(start_index) + end_index = self.expression.end_index + if end_index is not None: + self.end_line = string_lines.index_to_line_number(end_index) + self.end_column = string_lines.index_to_column_number(end_index) + else: + self.end_line = self.start_line + self.end_column = '?' + for line_number in range(self.start_line, self.end_line + 1): + self.lines.append(string_lines.line_number_to_line(line_number)) + self._mark_expression_disabled(string_lines) + + def print_results(self, options, out): + """ + Prints the results represented by this rule violation. + + Arguments: + options: A list of the following options: + list_files: True to print only file names, and False to print + all violations. + verbose: True for multiple lines of context, False single line. + out: output file + + """ + if options['verbose']: + end_line = self.end_line + 1 + else: + end_line = self.start_line + 1 + for line_number in range(self.start_line, end_line): + if line_number == self.start_line: + column = self.start_column + rule_id = self.rule.rule_id + ":" + else: + column = 1 + rule_id = " " * (len(self.rule.rule_id) + 1) + line = self.lines[line_number - self.start_line].encode(encoding='utf-8') + print("{}: {}:{}: {} {}".format( + self.full_path, + line_number, + column, + rule_id, + line + ), file=out) + + +class SummaryResults(object): + """ + Contains the summary results for all violations. + """ + + def __init__(self): + """ + Init method. + """ + self.total_violations = 0 + self.totals_by_rule = dict.fromkeys( + [rule.rule_id for rule in Rules.__members__.values()], 0 + ) + + def add_violation(self, violation): + """ + Adds a violation to the summary details. + + Arguments: + violation: The violation to add to the summary. + + """ + self.total_violations += 1 + self.totals_by_rule[violation.rule.rule_id] += 1 + + def print_results(self, options, out): + """ + Prints the results (i.e. violations) in this file. + + Arguments: + options: A list of the following options: + list_files: True to print only file names, and False to print + all violations. + rule_totals: If True include totals by rule. + out: output file + + """ + if options['list_files'] is False: + if options['rule_totals']: + max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule) + print("", file=out) + for rule_id in sorted(self.totals_by_rule.keys()): + padding = " " * (max_rule_id_len - len(rule_id)) + print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out) + print("", file=out) + + # matches output of eslint for simplicity + print("", file=out) + print("{} violations total".format(self.total_violations), file=out) + + +class FileResults(object): + """ + Contains the results, or violations, for a file. + """ + + def __init__(self, full_path): + """ + Init method. + + Arguments: + full_path: The full path for this file. + + """ + self.full_path = full_path + self.directory = os.path.dirname(full_path) + self.is_file = os.path.isfile(full_path) + self.violations = [] + + def prepare_results(self, file_string, line_comment_delim=None): + """ + Prepares the results for output for this file. + + Arguments: + file_string: The string of content for this file. + line_comment_delim: A string representing the start of a line + comment. For example "##" for Mako and "//" for JavaScript. + + """ + string_lines = StringLines(file_string) + for violation in self.violations: + violation.prepare_results(self.full_path, string_lines) + if line_comment_delim is not None: + self._filter_commented_code(line_comment_delim) + + def print_results(self, options, summary_results, out): + """ + Prints the results (i.e. violations) in this file. + + Arguments: + options: A list of the following options: + list_files: True to print only file names, and False to print + all violations. + summary_results: A SummaryResults with a summary of the violations. + verbose: True for multiple lines of context, False single line. + out: output file + + Side effect: + Updates the passed SummaryResults. + + """ + if options['list_files']: + if self.violations is not None and 0 < len(self.violations): + print(self.full_path, file=out) + else: + self.violations.sort(key=lambda violation: violation.sort_key()) + for violation in self.violations: + if not violation.is_disabled: + violation.print_results(options, out) + summary_results.add_violation(violation) + + def _filter_commented_code(self, line_comment_delim): + """ + Remove any violations that were found in commented out code. + + Arguments: + line_comment_delim: A string representing the start of a line + comment. For example "##" for Mako and "//" for JavaScript. + + """ + self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)] + + def _is_commented(self, violation, line_comment_delim): + """ + Checks if violation line is commented out. + + Arguments: + violation: The violation to check + line_comment_delim: A string representing the start of a line + comment. For example "##" for Mako and "//" for JavaScript. + + Returns: + True if the first line of the violation is actually commented out, + False otherwise. + """ + if 'parse' in violation.rule.rule_id: + # For parse rules, don't filter them because the comment could be a + # part of the parse issue to begin with. + return False + else: + return violation.first_line().lstrip().startswith(line_comment_delim) diff --git a/scripts/xsslint/xsslint/rules.py b/scripts/xsslint/xsslint/rules.py new file mode 100644 index 0000000000..699d92e872 --- /dev/null +++ b/scripts/xsslint/xsslint/rules.py @@ -0,0 +1,39 @@ +from enum import Enum + + +class Rules(Enum): + """ + An Enum of each rule which the linter will check. + """ + # IMPORTANT: Do not edit without also updating the docs: + # - http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter + mako_missing_default = 'mako-missing-default' + mako_multiple_page_tags = 'mako-multiple-page-tags' + mako_unparseable_expression = 'mako-unparseable-expression' + mako_unwanted_html_filter = 'mako-unwanted-html-filter' + mako_invalid_html_filter = 'mako-invalid-html-filter' + mako_invalid_js_filter = 'mako-invalid-js-filter' + mako_js_missing_quotes = 'mako-js-missing-quotes' + mako_js_html_string = 'mako-js-html-string' + mako_html_entities = 'mako-html-entities' + mako_unknown_context = 'mako-unknown-context' + underscore_not_escaped = 'underscore-not-escaped' + javascript_jquery_append = 'javascript-jquery-append' + javascript_jquery_prepend = 'javascript-jquery-prepend' + javascript_jquery_insertion = 'javascript-jquery-insertion' + javascript_jquery_insert_into_target = 'javascript-jquery-insert-into-target' + javascript_jquery_html = 'javascript-jquery-html' + javascript_concat_html = 'javascript-concat-html' + javascript_escape = 'javascript-escape' + javascript_interpolate = 'javascript-interpolate' + python_concat_html = 'python-concat-html' + python_custom_escape = 'python-custom-escape' + python_deprecated_display_name = 'python-deprecated-display-name' + python_requires_html_or_text = 'python-requires-html-or-text' + python_close_before_format = 'python-close-before-format' + python_wrap_html = 'python-wrap-html' + python_interpolate_html = 'python-interpolate-html' + python_parse_error = 'python-parse-error' + + def __init__(self, rule_id): + self.rule_id = rule_id diff --git a/scripts/xsslint/xsslint/utils.py b/scripts/xsslint/xsslint/utils.py new file mode 100644 index 0000000000..f01f7f4de9 --- /dev/null +++ b/scripts/xsslint/xsslint/utils.py @@ -0,0 +1,366 @@ +""" +Utility classes/functions for the XSS Linter. +""" +import re + +SKIP_DIRS = ( + '.git', + '.pycharm_helpers', + 'common/static/xmodule/modules', + 'common/static/bundles', + 'perf_tests', + 'node_modules', + 'reports/diff_quality', + 'scripts/xsslint', + 'spec', + 'test_root', + 'vendor', +) + + +def is_skip_dir(skip_dirs, directory): + """ + Determines whether a directory should be skipped or linted. + + Arguments: + skip_dirs: The configured directories to be skipped. + directory: The current directory to be tested. + + Returns: + True if the directory should be skipped, and False otherwise. + + """ + for skip_dir in skip_dirs: + skip_dir_regex = re.compile( + "(.*/)*{}(/.*)*".format(re.escape(skip_dir))) + if skip_dir_regex.match(directory) is not None: + return True + return False + + +class StringLines(object): + """ + StringLines provides utility methods to work with a string in terms of + lines. As an example, it can convert an index into a line number or column + number (i.e. index into the line). + """ + + def __init__(self, string): + """ + Init method. + + Arguments: + string: The string to work with. + + """ + self._string = string + self._line_start_indexes = self._process_line_breaks(string) + # this is an exclusive index used in the case that the template doesn't + # end with a new line + self.eof_index = len(string) + + def _process_line_breaks(self, string): + """ + Creates a list, where each entry represents the index into the string + where the next line break was found. + + Arguments: + string: The string in which to find line breaks. + + Returns: + A list of indices into the string at which each line begins. + + """ + line_start_indexes = [0] + index = 0 + while True: + index = string.find('\n', index) + if index < 0: + break + index += 1 + line_start_indexes.append(index) + return line_start_indexes + + def get_string(self): + """ + Get the original string. + """ + return self._string + + def index_to_line_number(self, index): + """ + Given an index, determines the line of the index. + + Arguments: + index: The index into the original string for which we want to know + the line number + + Returns: + The line number of the provided index. + + """ + current_line_number = 0 + for line_break_index in self._line_start_indexes: + if line_break_index <= index: + current_line_number += 1 + else: + break + return current_line_number + + def index_to_column_number(self, index): + """ + Gets the column (i.e. index into the line) for the given index into the + original string. + + Arguments: + index: The index into the original string. + + Returns: + The column (i.e. index into the line) for the given index into the + original string. + + """ + start_index = self.index_to_line_start_index(index) + column = index - start_index + 1 + return column + + def index_to_line_start_index(self, index): + """ + Gets the index of the start of the line of the given index. + + Arguments: + index: The index into the original string. + + Returns: + The index of the start of the line of the given index. + + """ + line_number = self.index_to_line_number(index) + return self.line_number_to_start_index(line_number) + + def index_to_line_end_index(self, index): + """ + Gets the index of the end of the line of the given index. + + Arguments: + index: The index into the original string. + + Returns: + The index of the end of the line of the given index. + + """ + line_number = self.index_to_line_number(index) + return self.line_number_to_end_index(line_number) + + def line_number_to_start_index(self, line_number): + """ + Gets the starting index for the provided line number. + + Arguments: + line_number: The line number of the line for which we want to find + the start index. + + Returns: + The starting index for the provided line number. + + """ + return self._line_start_indexes[line_number - 1] + + def line_number_to_end_index(self, line_number): + """ + Gets the ending index for the provided line number. + + Arguments: + line_number: The line number of the line for which we want to find + the end index. + + Returns: + The ending index for the provided line number. + + """ + if line_number < len(self._line_start_indexes): + return self._line_start_indexes[line_number] + else: + # an exclusive index in the case that the file didn't end with a + # newline. + return self.eof_index + + def line_number_to_line(self, line_number): + """ + Gets the line of text designated by the provided line number. + + Arguments: + line_number: The line number of the line we want to find. + + Returns: + The line of text designated by the provided line number. + + """ + start_index = self._line_start_indexes[line_number - 1] + if len(self._line_start_indexes) == line_number: + line = self._string[start_index:] + else: + end_index = self._line_start_indexes[line_number] + line = self._string[start_index:end_index - 1] + return line + + def line_count(self): + """ + Gets the number of lines in the string. + """ + return len(self._line_start_indexes) + + +class ParseString(object): + """ + ParseString is the result of parsing a string out of a template. + + A ParseString has the following attributes: + start_index: The index of the first quote, or None if none found + end_index: The index following the closing quote, or None if + unparseable + quote_length: The length of the quote. Could be 3 for a Python + triple quote. Or None if none found. + string: the text of the parsed string, or None if none found. + string_inner: the text inside the quotes of the parsed string, or None + if none found. + + """ + + def __init__(self, template, start_index, end_index): + """ + Init method. + + Arguments: + template: The template to be searched. + start_index: The start index to search. + end_index: The end index to search before. + + """ + self.end_index = None + self.quote_length = None + self.string = None + self.string_inner = None + self.start_index = self._find_string_start(template, start_index, end_index) + if self.start_index is not None: + result = self._parse_string(template, self.start_index) + if result is not None: + self.end_index = result['end_index'] + self.quote_length = result['quote_length'] + self.string = result['string'] + self.string_inner = result['string_inner'] + + def _find_string_start(self, template, start_index, end_index): + """ + Finds the index of the end of start of a string. In other words, the + first single or double quote. + + Arguments: + template: The template to be searched. + start_index: The start index to search. + end_index: The end index to search before. + + Returns: + The start index of the first single or double quote, or None if no + quote was found. + """ + quote_regex = re.compile(r"""['"]""") + start_match = quote_regex.search(template, start_index, end_index) + if start_match is None: + return None + else: + return start_match.start() + + def _parse_string(self, template, start_index): + """ + Finds the indices of a string inside a template. + + Arguments: + template: The template to be searched. + start_index: The start index of the open quote. + + Returns: + A dict containing the following, or None if not parseable: + end_index: The index following the closing quote + quote_length: The length of the quote. Could be 3 for a Python + triple quote. + string: the text of the parsed string + string_inner: the text inside the quotes of the parsed string + + """ + quote = template[start_index] + if quote not in ["'", '"']: + raise ValueError("start_index must refer to a single or double quote.") + triple_quote = quote * 3 + if template.startswith(triple_quote, start_index): + quote = triple_quote + + next_start_index = start_index + len(quote) + while True: + quote_end_index = template.find(quote, next_start_index) + backslash_index = template.find("\\", next_start_index) + if quote_end_index < 0: + return None + if 0 <= backslash_index < quote_end_index: + next_start_index = backslash_index + 2 + else: + end_index = quote_end_index + len(quote) + quote_length = len(quote) + string = template[start_index:end_index] + return { + 'end_index': end_index, + 'quote_length': quote_length, + 'string': string, + 'string_inner': string[quote_length:-quote_length], + } + + +class Expression(object): + """ + Represents an arbitrary expression. + + An expression can be any type of code snippet. It will sometimes have a + starting and ending delimiter, but not always. + + Here are some example expressions:: + + ${x | n, decode.utf8} + <%= x %> + function(x) + "

" + message + "

" + + Other details of note: + - Only a start_index is required for a valid expression. + - If end_index is None, it means we couldn't parse the rest of the + expression. + - All other details of the expression are optional, and are only added if + and when supplied and needed for additional checks. They are not necessary + for the final results output. + + """ + + def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None): + """ + Init method. + + Arguments: + start_index: the starting index of the expression + end_index: the index immediately following the expression, or None + if the expression was unparseable + template: optional template code in which the expression was found + start_delim: optional starting delimiter of the expression + end_delim: optional ending delimeter of the expression + strings: optional list of ParseStrings + + """ + self.start_index = start_index + self.end_index = end_index + self.start_delim = start_delim + self.end_delim = end_delim + self.strings = strings + if template is not None and self.end_index is not None: + self.expression = template[start_index:end_index] + self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip() + else: + self.expression = None + self.expression_inner = None diff --git a/scripts/xsslint/xsslint/visitors.py b/scripts/xsslint/xsslint/visitors.py new file mode 100644 index 0000000000..581a563cea --- /dev/null +++ b/scripts/xsslint/xsslint/visitors.py @@ -0,0 +1,331 @@ +""" +Custom AST NodeVisitor classes uses for Python xss linting. +""" +import ast +import re + +from xsslint.reporting import ExpressionRuleViolation +from xsslint.rules import Rules +from xsslint.utils import Expression, ParseString, StringLines + + +class BaseVisitor(ast.NodeVisitor): + """ + Base class for AST NodeVisitor used for Python xss linting. + + Important: This base visitor skips all __repr__ function definitions. + """ + def __init__(self, file_contents, results): + """ + Init method. + + Arguments: + file_contents: The contents of the Python file. + results: A file results objects to which violations will be added. + + """ + super(BaseVisitor, self).__init__() + self.file_contents = file_contents + self.lines = StringLines(self.file_contents) + self.results = results + + def node_to_expression(self, node): + """ + Takes a node and translates it to an expression to be used with + violations. + + Arguments: + node: An AST node. + + """ + line_start_index = self.lines.line_number_to_start_index(node.lineno) + start_index = line_start_index + node.col_offset + if isinstance(node, ast.Str): + # Triple quotes give col_offset of -1 on the last line of the string. + if node.col_offset == -1: + triple_quote_regex = re.compile("""['"]{3}""") + end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index) + open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start()) + if open_quote_index > 0: + start_index = open_quote_index + else: + # If we can't find a starting quote, let's assume that what + # we considered the end quote is really the start quote. + start_index = end_triple_quote_match.start() + string = ParseString(self.file_contents, start_index, len(self.file_contents)) + return Expression(string.start_index, string.end_index) + else: + return Expression(start_index) + + def visit_FunctionDef(self, node): + """ + Skips processing of __repr__ functions, since these sometimes use '<' + for non-HTML purposes. + + Arguments: + node: An AST node. + """ + if node.name != '__repr__': + self.generic_visit(node) + + +class HtmlStringVisitor(BaseVisitor): + """ + Checks for strings that contain HTML. Assumes any string with < or > is + considered potential HTML. + + To be used only with strings in context of format or concat. + + """ + def __init__(self, file_contents, results, skip_wrapped_html=False): + """ + Init function. + + Arguments: + file_contents: The contents of the Python file. + results: A file results objects to which violations will be added. + skip_wrapped_html: True if visitor should skip strings wrapped with + HTML() or Text(), and False otherwise. + """ + super(HtmlStringVisitor, self).__init__(file_contents, results) + self.skip_wrapped_html = skip_wrapped_html + self.unsafe_html_string_nodes = [] + self.over_escaped_entity_string_nodes = [] + self.has_text_or_html_call = False + + def visit_Str(self, node): + """ + When strings are visited, checks if it contains HTML. + + Arguments: + node: An AST node. + """ + # Skips '<' (and '>') in regex named groups. For example, "(?P)". + if re.search('[(][?]P<', node.s) is None and re.search('<', node.s) is not None: + self.unsafe_html_string_nodes.append(node) + if re.search(r"&[#]?[a-zA-Z0-9]+;", node.s): + self.over_escaped_entity_string_nodes.append(node) + + def visit_Call(self, node): + """ + Skips processing of string contained inside HTML() and Text() calls when + skip_wrapped_html is True. + + Arguments: + node: An AST node. + + """ + is_html_or_text_call = isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text'] + if self.skip_wrapped_html and is_html_or_text_call: + self.has_text_or_html_call = True + else: + self.generic_visit(node) + + +class ContainsFormatVisitor(BaseVisitor): + """ + Checks if there are any nested format() calls. + + This visitor is meant to be called on HTML() and Text() ast.Call nodes to + search for any illegal nested format() calls. + + """ + def __init__(self, file_contents, results): + """ + Init function. + + Arguments: + file_contents: The contents of the Python file. + results: A file results objects to which violations will be added. + + """ + super(ContainsFormatVisitor, self).__init__(file_contents, results) + self.contains_format_call = False + + def visit_Attribute(self, node): + """ + Simple check for format calls (attribute). + + Arguments: + node: An AST node. + + """ + # Attribute(expr value, identifier attr, expr_context ctx) + if node.attr == 'format': + self.contains_format_call = True + else: + self.generic_visit(node) + + +class FormatInterpolateVisitor(BaseVisitor): + """ + Checks if format() interpolates any HTML() or Text() calls. In other words, + are Text() or HTML() calls nested inside the call to format(). + + This visitor is meant to be called on a format() attribute node. + + """ + def __init__(self, file_contents, results): + """ + Init function. + + Arguments: + file_contents: The contents of the Python file. + results: A file results objects to which violations will be added. + + """ + super(FormatInterpolateVisitor, self).__init__(file_contents, results) + self.interpolates_text_or_html = False + self.format_caller_node = None + + def visit_Call(self, node): + """ + Checks all calls. Remembers the caller of the initial format() call, or + in other words, the left-hand side of the call. Also tracks if HTML() + or Text() calls were seen. + + Arguments: + node: The AST root node. + + """ + if isinstance(node.func, ast.Attribute) and node.func.attr is 'format': + if self.format_caller_node is None: + # Store the caller, or left-hand-side node of the initial + # format() call. + self.format_caller_node = node.func.value + elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']: + # found Text() or HTML() call in arguments passed to format() + self.interpolates_text_or_html = True + self.generic_visit(node) + + def generic_visit(self, node): + """ + Determines whether or not to continue to visit nodes according to the + following rules: + - Once a Text() or HTML() call has been found, stop visiting more nodes. + - Skip the caller of the outer-most format() call, or in other words, + the left-hand side of the call. + + Arguments: + node: The AST root node. + + """ + if self.interpolates_text_or_html is False: + if self.format_caller_node is not node: + super(FormatInterpolateVisitor, self).generic_visit(node) + + +class OuterFormatVisitor(BaseVisitor): + """ + Only visits outer most Python format() calls. These checks are not repeated + for any nested format() calls. + + This visitor is meant to be used once from the root. + + """ + def visit_Call(self, node): + """ + Checks that format() calls which contain HTML() or Text() use HTML() or + Text() as the caller. In other words, Text() or HTML() must be used + before format() for any arguments to format() that contain HTML() or + Text(). + + Arguments: + node: An AST node. + """ + if isinstance(node.func, ast.Attribute) and node.func.attr == 'format': + visitor = HtmlStringVisitor(self.file_contents, self.results, True) + visitor.visit(node) + for unsafe_html_string_node in visitor.unsafe_html_string_nodes: + self.results.violations.append(ExpressionRuleViolation( + Rules.python_wrap_html, self.node_to_expression(unsafe_html_string_node) + )) + # Do not continue processing child nodes of this format() node. + else: + self.generic_visit(node) + + +class AllNodeVisitor(BaseVisitor): + """ + Visits all nodes and does not interfere with calls to generic_visit(). This + is used in conjunction with other visitors to check for a variety of + violations. + + This visitor is meant to be used once from the root. + + """ + + def visit_Attribute(self, node): + """ + Checks for uses of deprecated `display_name_with_default_escaped`. + + Arguments: + node: An AST node. + """ + if node.attr == 'display_name_with_default_escaped': + self.results.violations.append(ExpressionRuleViolation( + Rules.python_deprecated_display_name, self.node_to_expression(node) + )) + self.generic_visit(node) + + def visit_Call(self, node): + """ + Checks for a variety of violations: + - Checks that format() calls with nested HTML() or Text() calls use + HTML() or Text() on the left-hand side. + - For each HTML() and Text() call, calls into separate visitor to check + for inner format() calls. + + Arguments: + node: An AST node. + + """ + if isinstance(node.func, ast.Attribute) and node.func.attr == 'format': + visitor = FormatInterpolateVisitor(self.file_contents, self.results) + visitor.visit(node) + if visitor.interpolates_text_or_html: + format_caller = node.func.value + is_caller_html_or_text = isinstance(format_caller, ast.Call) and \ + isinstance(format_caller.func, ast.Name) and \ + format_caller.func.id in ['Text', 'HTML'] + # If format call has nested Text() or HTML(), then the caller, + # or left-hand-side of the format() call, must be a call to + # Text() or HTML(). + if is_caller_html_or_text is False: + self.results.violations.append(ExpressionRuleViolation( + Rules.python_requires_html_or_text, self.node_to_expression(node.func) + )) + elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']: + visitor = ContainsFormatVisitor(self.file_contents, self.results) + visitor.visit(node) + if visitor.contains_format_call: + self.results.violations.append(ExpressionRuleViolation( + Rules.python_close_before_format, self.node_to_expression(node.func) + )) + + self.generic_visit(node) + + def visit_BinOp(self, node): + """ + Checks for concat using '+' and interpolation using '%' with strings + containing HTML. + + """ + rule = None + if isinstance(node.op, ast.Mod): + rule = Rules.python_interpolate_html + elif isinstance(node.op, ast.Add): + rule = Rules.python_concat_html + if rule is not None: + visitor = HtmlStringVisitor(self.file_contents, self.results) + visitor.visit(node.left) + has_illegal_html_string = len(visitor.unsafe_html_string_nodes) > 0 + # Create new visitor to clear state. + visitor = HtmlStringVisitor(self.file_contents, self.results) + visitor.visit(node.right) + has_illegal_html_string = has_illegal_html_string or len(visitor.unsafe_html_string_nodes) > 0 + if has_illegal_html_string: + self.results.violations.append(ExpressionRuleViolation( + rule, self.node_to_expression(node) + )) + self.generic_visit(node)