Merge pull request #17448 from edx/refactor-xss-linter

Refactor XSS Linter into smaller files
This commit is contained in:
Anthony Mangano
2018-03-01 11:40:58 -05:00
committed by GitHub
19 changed files with 2141 additions and 2094 deletions

View File

@@ -409,7 +409,7 @@ def run_stylelint(options):
@timed
def run_xsslint(options):
"""
Runs xss_linter.py on the codebase
Runs xsslint/xss_linter.py on the codebase
"""
thresholds_option = getattr(options, 'thresholds', '{}')
@@ -434,7 +434,7 @@ def run_xsslint(options):
_prepare_report_dir(xsslint_report_dir)
sh(
"{repo_root}/scripts/{xsslint_script} --rule-totals >> {xsslint_report}".format(
"{repo_root}/scripts/xsslint/{xsslint_script} --rule-totals >> {xsslint_report}".format(
repo_root=Env.REPO_ROOT,
xsslint_script=xsslint_script,
xsslint_report=xsslint_report,

View File

@@ -5,7 +5,7 @@ set -e
#
# xss-commit-linter.sh
#
# Executes xss_linter.py on the set of files in a particular git commit.
# Executes xsslint/xss_linter.py on the set of files in a particular git commit.
#
###############################################################################
@@ -82,6 +82,6 @@ else
for f in $diff_files; do
echo ""
echo "Linting $f:"
./scripts/xss_linter.py $f
./scripts/xsslint/xss_linter.py $f
done
fi

View File

@@ -1,66 +1,18 @@
# -*- coding: utf-8 -*-
"""
Tests for xss_linter.py
Tests for linters.py
"""
import re
from __future__ import print_function
import textwrap
from StringIO import StringIO
from unittest import TestCase
import mock
from ddt import data, ddt
from scripts.xss_linter import (
FileResults,
JavaScriptLinter,
MakoTemplateLinter,
ParseString,
PythonLinter,
Rules,
StringLines,
SummaryResults,
UnderscoreTemplateLinter,
_lint
)
@ddt
class TestStringLines(TestCase):
"""
Test StringLines class.
"""
@data(
{'string': 'test', 'index': 0, 'line_start_index': 0, 'line_end_index': 4},
{'string': 'test', 'index': 2, 'line_start_index': 0, 'line_end_index': 4},
{'string': 'test', 'index': 3, 'line_start_index': 0, 'line_end_index': 4},
{'string': '\ntest', 'index': 0, 'line_start_index': 0, 'line_end_index': 1},
{'string': '\ntest', 'index': 2, 'line_start_index': 1, 'line_end_index': 5},
{'string': '\ntest\n', 'index': 0, 'line_start_index': 0, 'line_end_index': 1},
{'string': '\ntest\n', 'index': 2, 'line_start_index': 1, 'line_end_index': 6},
{'string': '\ntest\n', 'index': 6, 'line_start_index': 6, 'line_end_index': 6},
)
def test_string_lines_start_end_index(self, data):
"""
Test StringLines index_to_line_start_index and index_to_line_end_index.
"""
lines = StringLines(data['string'])
self.assertEqual(lines.index_to_line_start_index(data['index']), data['line_start_index'])
self.assertEqual(lines.index_to_line_end_index(data['index']), data['line_end_index'])
@data(
{'string': 'test', 'line_number': 1, 'line': 'test'},
{'string': '\ntest', 'line_number': 1, 'line': ''},
{'string': '\ntest', 'line_number': 2, 'line': 'test'},
{'string': '\ntest\n', 'line_number': 1, 'line': ''},
{'string': '\ntest\n', 'line_number': 2, 'line': 'test'},
{'string': '\ntest\n', 'line_number': 3, 'line': ''},
)
def test_string_lines_start_end_index(self, data):
"""
Test line_number_to_line.
"""
lines = StringLines(data['string'])
self.assertEqual(lines.line_number_to_line(data['line_number']), data['line'])
from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter
from xsslint.reporting import FileResults
from xsslint.rules import Rules
from xsslint.utils import ParseString
class TestLinter(TestCase):
@@ -93,168 +45,678 @@ class TestLinter(TestCase):
self.assertEqual(violation.rule, rule)
class TestXSSLinter(TestCase):
@ddt
class TestUnderscoreTemplateLinter(TestLinter):
"""
Test some top-level linter functions
Test UnderscoreTemplateLinter
"""
def setUp(self):
def test_check_underscore_file_is_safe(self):
"""
Setup patches on linters for testing.
Test check_underscore_file_is_safe with safe template
"""
self.patch_is_valid_directory(MakoTemplateLinter)
self.patch_is_valid_directory(JavaScriptLinter)
self.patch_is_valid_directory(UnderscoreTemplateLinter)
self.patch_is_valid_directory(PythonLinter)
linter = UnderscoreTemplateLinter()
results = FileResults('')
patcher = mock.patch('scripts.xss_linter.is_skip_dir', return_value=False)
patcher.start()
self.addCleanup(patcher.stop)
template = textwrap.dedent("""
<%- gettext('Single Line') %>
def patch_is_valid_directory(self, linter_class):
<%-
gettext('Multiple Lines')
%>
""")
linter.check_underscore_file_is_safe(template, results)
self.assertEqual(len(results.violations), 0)
def test_check_underscore_file_is_not_safe(self):
"""
Creates a mock patch for _is_valid_directory on a Linter to always
return true. This avoids nested patch calls.
Arguments:
linter_class: The linter class to be patched
Test check_underscore_file_is_safe with unsafe template
"""
patcher = mock.patch.object(linter_class, '_is_valid_directory', return_value=True)
patch_start = patcher.start()
self.addCleanup(patcher.stop)
return patch_start
linter = UnderscoreTemplateLinter()
results = FileResults('')
def test_lint_defaults(self):
template = textwrap.dedent("""
<%= gettext('Single Line') %>
<%=
gettext('Multiple Lines')
%>
""")
linter.check_underscore_file_is_safe(template, results)
self.assertEqual(len(results.violations), 2)
self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped)
self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped)
@data(
{
'template':
'<% // xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True],
},
{
'template':
'<% // xss-lint: disable=another-rule,underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True],
},
{
'template':
'<% // xss-lint: disable=another-rule %>\n'
'<%= message %>',
'is_disabled': [False],
},
{
'template':
'<% // xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>\n'
'<%= message %>',
'is_disabled': [True, False],
},
{
'template':
'// This test does not use proper Underscore.js Template syntax\n'
'// But, it is just testing that a maximum of 5 non-whitespace\n'
'// are used to designate start of line for disabling the next line.\n'
' 1 2 3 4 5 xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>\n'
' 1 2 3 4 5 6 xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True, False],
},
{
'template':
'<%= message %><% // xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True, False],
},
{
'template':
'<%= message %>\n'
'<% // xss-lint: disable=underscore-not-escaped %>',
'is_disabled': [False],
},
)
def test_check_underscore_file_disable_rule(self, data):
"""
Tests the top-level linting with default options.
Test check_underscore_file_is_safe with various disabled pragmas
"""
out = StringIO()
summary_results = SummaryResults()
linter = UnderscoreTemplateLinter()
results = FileResults('')
_lint(
'scripts/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': False,
'verbose': False,
'rule_totals': False,
},
summary_results=summary_results,
out=out,
)
linter.check_underscore_file_is_safe(data['template'], results)
output = out.getvalue()
# Assert violation details are displayed.
self.assertIsNotNone(re.search('test\.html.*{}'.format(Rules.mako_missing_default.rule_id), output))
self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.javascript_concat_html.rule_id), output))
self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.javascript_concat_html.rule_id), output))
self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
lines_with_rule = 0
lines_without_rule = 0 # Output with verbose setting only.
for underscore_match in re.finditer('test\.underscore:.*\n', output):
if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None:
lines_with_rule += 1
else:
lines_without_rule += 1
self.assertGreaterEqual(lines_with_rule, 1)
self.assertEquals(lines_without_rule, 0)
self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_parse_error.rule_id), output))
self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
# Assert no rule totals.
self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
# Assert final total
self.assertIsNotNone(re.search('{} violations total'.format(7), output))
violation_count = len(data['is_disabled'])
self.assertEqual(len(results.violations), violation_count)
for index in range(0, violation_count - 1):
self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index])
def test_lint_with_verbose(self):
def test_check_underscore_file_disables_one_violation(self):
"""
Tests the top-level linting with verbose option.
Test check_underscore_file_is_safe with disabled before a line only
disables for the violation following
"""
out = StringIO()
summary_results = SummaryResults()
linter = UnderscoreTemplateLinter()
results = FileResults('')
_lint(
'scripts/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': False,
'verbose': True,
'rule_totals': False,
},
summary_results=summary_results,
out=out,
)
template = textwrap.dedent("""
<% // xss-lint: disable=underscore-not-escaped %>
<%= message %>
<%= message %>
""")
output = out.getvalue()
lines_with_rule = 0
lines_without_rule = 0 # Output with verbose setting only.
for underscore_match in re.finditer('test\.underscore:.*\n', output):
if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None:
lines_with_rule += 1
else:
lines_without_rule += 1
self.assertGreaterEqual(lines_with_rule, 1)
self.assertGreaterEqual(lines_without_rule, 1)
# Assert no rule totals.
self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
# Assert final total
self.assertIsNotNone(re.search('{} violations total'.format(7), output))
linter.check_underscore_file_is_safe(template, results)
def test_lint_with_rule_totals(self):
self.assertEqual(len(results.violations), 2)
self.assertEqual(results.violations[0].is_disabled, True)
self.assertEqual(results.violations[1].is_disabled, False)
@data(
{'template': '<%= HtmlUtils.ensureHtml(message) %>'},
{'template': '<%= _.escape(message) %>'},
)
def test_check_underscore_no_escape_allowed(self, data):
"""
Tests the top-level linting with rule totals option.
Test check_underscore_file_is_safe with expressions that are allowed
without escaping because the internal calls properly escape.
"""
out = StringIO()
summary_results = SummaryResults()
linter = UnderscoreTemplateLinter()
results = FileResults('')
_lint(
'scripts/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': False,
'verbose': False,
'rule_totals': True,
},
summary_results=summary_results,
out=out,
)
linter.check_underscore_file_is_safe(data['template'], results)
output = out.getvalue()
self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
self.assertEqual(len(results.violations), 0)
# Assert totals output.
self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_wrap_html.rule_id, 1), output))
self.assertIsNotNone(re.search('{} violations total'.format(7), output))
def test_lint_with_list_files(self):
@ddt
class TestJavaScriptLinter(TestLinter):
"""
Test JavaScriptLinter
"""
@data(
{'template': 'var m = "Plain text " + message + "plain text"', 'rule': None},
{'template': 'var m = "檌檒濦 " + message + "plain text"', 'rule': None},
{
'template':
("""$email_header.append($('<input>', type: "button", name: "copy-email-body-text","""
""" value: gettext("Copy Email To Editor"), id: 'copy_email_' + email_id))"""),
'rule': None
},
{'template': 'var m = "<p>" + message + "</p>"', 'rule': Rules.javascript_concat_html},
{
'template': r'var m = "<p>\"escaped quote\"" + message + "\"escaped quote\"</p>"',
'rule': Rules.javascript_concat_html
},
{'template': ' // var m = "<p>" + commentedOutMessage + "</p>"', 'rule': None},
{'template': 'var m = " <p> " + message + " </p> "', 'rule': Rules.javascript_concat_html},
{'template': 'var m = " <p> " + message + " broken string', 'rule': Rules.javascript_concat_html},
)
def test_concat_with_html(self, data):
"""
Tests the top-level linting with list files option.
Test check_javascript_file_is_safe with concatenating strings and HTML
"""
out = StringIO()
summary_results = SummaryResults()
linter = JavaScriptLinter()
results = FileResults('')
_lint(
'scripts/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': True,
'verbose': False,
'rule_totals': False,
},
summary_results=summary_results,
out=out,
)
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
output = out.getvalue()
# Assert file with rule is not output.
self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
# Assert file is output.
self.assertIsNotNone(re.search('test\.py', output))
@data(
{'template': 'test.append( test.render().el )', 'rule': None},
{'template': 'test.append(test.render().el)', 'rule': None},
{'template': 'test.append(test.render().$el)', 'rule': None},
{'template': 'test.append(testEl)', 'rule': None},
{'template': 'test.append($test)', 'rule': None},
# plain text is ok because any & will be escaped, and it stops false
# negatives on some other objects with an append() method
{'template': 'test.append("plain text")', 'rule': None},
{'template': 'test.append("<div/>")', 'rule': Rules.javascript_jquery_append},
{'template': 'graph.svg.append("g")', 'rule': None},
{'template': 'test.append( $( "<div>" ) )', 'rule': None},
{'template': 'test.append($("<div>"))', 'rule': None},
{'template': 'test.append($("<div/>"))', 'rule': None},
{'template': 'test.append(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'HtmlUtils.append($el, someHtml)', 'rule': None},
{'template': 'test.append("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_append},
{'template': 'test.append("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_append},
{'template': 'test.append(message)', 'rule': Rules.javascript_jquery_append},
)
def test_jquery_append(self, data):
"""
Test check_javascript_file_is_safe with JQuery append()
"""
linter = JavaScriptLinter()
results = FileResults('')
# Assert no totals.
self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
self.assertIsNone(re.search('{} violations total'.format(7), output))
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.prepend( test.render().el )', 'rule': None},
{'template': 'test.prepend(test.render().el)', 'rule': None},
{'template': 'test.prepend(test.render().$el)', 'rule': None},
{'template': 'test.prepend(testEl)', 'rule': None},
{'template': 'test.prepend($test)', 'rule': None},
{'template': 'test.prepend("text")', 'rule': None},
{'template': 'test.prepend( $( "<div>" ) )', 'rule': None},
{'template': 'test.prepend($("<div>"))', 'rule': None},
{'template': 'test.prepend($("<div/>"))', 'rule': None},
{'template': 'test.prepend(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'HtmlUtils.prepend($el, someHtml)', 'rule': None},
{'template': 'test.prepend("broken string)', 'rule': Rules.javascript_jquery_prepend},
{'template': 'test.prepend("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_prepend},
{'template': 'test.prepend("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_prepend},
{'template': 'test.prepend(message)', 'rule': Rules.javascript_jquery_prepend},
)
def test_jquery_prepend(self, data):
"""
Test check_javascript_file_is_safe with JQuery prepend()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.unwrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.wrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.wrapAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.wrapInner(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.after(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.before(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.replaceAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.replaceWith(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.replaceWith(edx.HtmlUtils.HTML(htmlString).toString())', 'rule': None},
{'template': 'test.unwrap(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.wrap(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.wrapAll(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.wrapInner(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.after(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.before(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.replaceAll(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.replaceWith(anything)', 'rule': Rules.javascript_jquery_insertion},
)
def test_jquery_insertion(self, data):
"""
Test check_javascript_file_is_safe with JQuery insertion functions
other than append(), prepend() and html() that take content as an
argument (e.g. before(), after()).
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': ' element.parentNode.appendTo(target);', 'rule': None},
{'template': ' test.render().el.appendTo(target);', 'rule': None},
{'template': ' test.render().$el.appendTo(target);', 'rule': None},
{'template': ' test.$element.appendTo(target);', 'rule': None},
{'template': ' test.testEl.appendTo(target);', 'rule': None},
{'template': '$element.appendTo(target);', 'rule': None},
{'template': 'el.appendTo(target);', 'rule': None},
{'template': 'testEl.appendTo(target);', 'rule': None},
{'template': 'testEl.prependTo(target);', 'rule': None},
{'template': 'testEl.insertAfter(target);', 'rule': None},
{'template': 'testEl.insertBefore(target);', 'rule': None},
{'template': 'anycall().appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.prependTo(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.insertAfter(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.insertBefore(target)', 'rule': Rules.javascript_jquery_insert_into_target},
)
def test_jquery_insert_to_target(self, data):
"""
Test check_javascript_file_is_safe with JQuery insert to target
functions that take a target as an argument, like appendTo() and
prependTo().
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.html()', 'rule': None},
{'template': 'test.html( )', 'rule': None},
{'template': "test.html( '' )", 'rule': None},
{'template': "test.html('')", 'rule': None},
{'template': 'test.html("")', 'rule': None},
{'template': 'test.html(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'HtmlUtils.setHtml($el, someHtml)', 'rule': None},
{'template': 'test.html("any string")', 'rule': Rules.javascript_jquery_html},
{'template': 'test.html("broken string)', 'rule': Rules.javascript_jquery_html},
{'template': 'test.html("檌檒濦")', 'rule': Rules.javascript_jquery_html},
{'template': 'test.html(anything)', 'rule': Rules.javascript_jquery_html},
)
def test_jquery_html(self, data):
"""
Test check_javascript_file_is_safe with JQuery html()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'StringUtils.interpolate()', 'rule': None},
{'template': 'HtmlUtils.interpolateHtml()', 'rule': None},
{'template': 'interpolate(anything)', 'rule': Rules.javascript_interpolate},
)
def test_javascript_interpolate(self, data):
"""
Test check_javascript_file_is_safe with interpolate()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': '_.escape(message)', 'rule': None},
{'template': 'anything.escape(message)', 'rule': Rules.javascript_escape},
)
def test_javascript_interpolate(self, data):
"""
Test check_javascript_file_is_safe with interpolate()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@ddt
class TestPythonLinter(TestLinter):
"""
Test PythonLinter
"""
@data(
{'template': 'm = "Plain text " + message + "plain text"', 'rule': None},
{'template': 'm = "檌檒濦 " + message + "plain text"', 'rule': None},
{'template': ' # m = "<p>" + commentedOutMessage + "</p>"', 'rule': None},
{'template': 'm = "<p>" + message + "</p>"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]},
{'template': 'm = " <p> " + message + " </p> "', 'rule': [Rules.python_concat_html, Rules.python_concat_html]},
{'template': 'm = " <p> " + message + " broken string', 'rule': Rules.python_parse_error},
)
def test_concat_with_html(self, data):
"""
Test check_python_file_is_safe with concatenating strings and HTML
"""
linter = PythonLinter()
results = FileResults('')
linter.check_python_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
def test_check_python_expression_display_name(self):
"""
Test _check_python_file_is_safe with display_name_with_default_escaped
fails.
"""
linter = PythonLinter()
results = FileResults('')
python_file = textwrap.dedent("""
context = {
'display_name': self.display_name_with_default_escaped,
}
""")
linter.check_python_file_is_safe(python_file, results)
self.assertEqual(len(results.violations), 1)
self.assertEqual(results.violations[0].rule, Rules.python_deprecated_display_name)
def test_check_custom_escaping(self):
"""
Test _check_python_file_is_safe fails when custom escapins is used.
"""
linter = PythonLinter()
results = FileResults('')
python_file = textwrap.dedent("""
msg = mmlans.replace('<', '&lt;')
""")
linter.check_python_file_is_safe(python_file, results)
self.assertEqual(len(results.violations), 1)
self.assertEqual(results.violations[0].rule, Rules.python_custom_escape)
@data(
{
'python':
textwrap.dedent("""
msg = Text("Mixed {span_start}text{span_end}").format(
span_start=HTML("<span>"),
span_end=HTML("</span>"),
)
"""),
'rule': None
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}text{span_end}".format(
span_start=HTML("<span>"),
span_end=HTML("</span>"),
)
"""),
'rule': Rules.python_requires_html_or_text
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}{text}{span_end}".format(
span_start=HTML("<span>"),
text=Text("This should still break."),
span_end=HTML("</span>"),
)
"""),
'rule': Rules.python_requires_html_or_text
},
{
'python':
textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>").format(url),
link_end=HTML("</a>"),
))
"""),
'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text]
},
{
'python':
textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}").format(
link_start=HTML("<a href='{}'>".format(url)),
link_end=HTML("</a>"),
)
"""),
'rule': Rules.python_close_before_format
},
{
'python':
textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>".format(HTML('<b>'))),
link_end=HTML("</a>"),
))
"""),
'rule':
[
Rules.python_close_before_format,
Rules.python_requires_html_or_text,
Rules.python_close_before_format,
Rules.python_requires_html_or_text
]
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}text{span_end}".format(
span_start="<span>",
span_end="</span>",
)
"""),
'rule': [Rules.python_wrap_html, Rules.python_wrap_html]
},
{
'python':
textwrap.dedent("""
msg = Text(_("String with multiple lines "
"{link_start}unenroll{link_end} "
"and final line")).format(
link_start=HTML(
'<a id="link__over_multiple_lines" '
'data-course-id="{course_id}" '
'href="#test-modal">'
).format(
# Line comment with ' to throw off parser
course_id=course_overview.id
),
link_end=HTML('</a>'),
)
"""),
'rule': None
},
{
'python': "msg = '<span></span>'",
'rule': None
},
{
'python': "msg = HTML('<span></span>')",
'rule': None
},
{
'python': r"""msg = '<a href="{}"'.format(url)""",
'rule': Rules.python_wrap_html
},
{
'python': r"""msg = '{}</p>'.format(message)""",
'rule': Rules.python_wrap_html
},
{
'python': r"""r'regex with {} and named group(?P<id>\d+)?$'.format(test)""",
'rule': None
},
{
'python': r"""msg = '<a href="%s"' % url""",
'rule': Rules.python_interpolate_html
},
{
'python':
textwrap.dedent("""
def __repr__(self):
# Assume repr implementations are safe, and not HTML
return '<CCXCon {}>'.format(self.title)
"""),
'rule': None
},
{
'python': r"""msg = '%s</p>' % message""",
'rule': Rules.python_interpolate_html
},
{
'python': "msg = HTML('<span></span>'",
'rule': Rules.python_parse_error
},
)
def test_check_python_with_text_and_html(self, data):
"""
Test _check_python_file_is_safe tests for proper use of Text() and
Html().
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent(data['python'])
linter.check_python_file_is_safe(file_content, results)
self._validate_data_rules(data, results)
def test_check_python_with_text_and_html_mixed(self):
"""
Test _check_python_file_is_safe tests for proper use of Text() and
Html() for a Python file with a mix of rules.
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent("""
msg1 = '<a href="{}"'.format(url)
msg2 = "Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>".format(url)),
link_end="</a>",
)
msg3 = '<a href="%s"' % url
""")
linter.check_python_file_is_safe(file_content, results)
results.violations.sort(key=lambda violation: violation.sort_key())
self.assertEqual(len(results.violations), 5)
self.assertEqual(results.violations[0].rule, Rules.python_wrap_html)
self.assertEqual(results.violations[1].rule, Rules.python_requires_html_or_text)
self.assertEqual(results.violations[2].rule, Rules.python_close_before_format)
self.assertEqual(results.violations[3].rule, Rules.python_wrap_html)
self.assertEqual(results.violations[4].rule, Rules.python_interpolate_html)
@data(
{
'python':
"""
response_str = textwrap.dedent('''
<div>
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 2,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = textwrap.dedent('''
<div>
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = '''<h3 class="result">{response}</h3>'''.format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = textwrap.dedent('''
<div>
\"\"\" Do we care about a nested triple quote? \"\"\"
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
)
def test_check_python_with_triple_quotes(self, data):
"""
Test _check_python_file_is_safe with triple quotes.
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent(data['python'])
linter.check_python_file_is_safe(file_content, results)
self._validate_data_rules(data, results)
self.assertEqual(results.violations[0].start_line, data['start_line'])
@ddt
@@ -954,677 +1416,3 @@ class TestMakoTemplateLinter(TestLinter):
start_inner_index = parse_string.start_index + parse_string.quote_length
end_inner_index = parse_string.end_index - parse_string.quote_length
self.assertEqual(data['template'][start_inner_index:end_inner_index], parse_string.string_inner)
@ddt
class TestUnderscoreTemplateLinter(TestLinter):
"""
Test UnderscoreTemplateLinter
"""
def test_check_underscore_file_is_safe(self):
"""
Test check_underscore_file_is_safe with safe template
"""
linter = UnderscoreTemplateLinter()
results = FileResults('')
template = textwrap.dedent("""
<%- gettext('Single Line') %>
<%-
gettext('Multiple Lines')
%>
""")
linter.check_underscore_file_is_safe(template, results)
self.assertEqual(len(results.violations), 0)
def test_check_underscore_file_is_not_safe(self):
"""
Test check_underscore_file_is_safe with unsafe template
"""
linter = UnderscoreTemplateLinter()
results = FileResults('')
template = textwrap.dedent("""
<%= gettext('Single Line') %>
<%=
gettext('Multiple Lines')
%>
""")
linter.check_underscore_file_is_safe(template, results)
self.assertEqual(len(results.violations), 2)
self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped)
self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped)
@data(
{
'template':
'<% // xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True],
},
{
'template':
'<% // xss-lint: disable=another-rule,underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True],
},
{
'template':
'<% // xss-lint: disable=another-rule %>\n'
'<%= message %>',
'is_disabled': [False],
},
{
'template':
'<% // xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>\n'
'<%= message %>',
'is_disabled': [True, False],
},
{
'template':
'// This test does not use proper Underscore.js Template syntax\n'
'// But, it is just testing that a maximum of 5 non-whitespace\n'
'// are used to designate start of line for disabling the next line.\n'
' 1 2 3 4 5 xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>\n'
' 1 2 3 4 5 6 xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True, False],
},
{
'template':
'<%= message %><% // xss-lint: disable=underscore-not-escaped %>\n'
'<%= message %>',
'is_disabled': [True, False],
},
{
'template':
'<%= message %>\n'
'<% // xss-lint: disable=underscore-not-escaped %>',
'is_disabled': [False],
},
)
def test_check_underscore_file_disable_rule(self, data):
"""
Test check_underscore_file_is_safe with various disabled pragmas
"""
linter = UnderscoreTemplateLinter()
results = FileResults('')
linter.check_underscore_file_is_safe(data['template'], results)
violation_count = len(data['is_disabled'])
self.assertEqual(len(results.violations), violation_count)
for index in range(0, violation_count - 1):
self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index])
def test_check_underscore_file_disables_one_violation(self):
"""
Test check_underscore_file_is_safe with disabled before a line only
disables for the violation following
"""
linter = UnderscoreTemplateLinter()
results = FileResults('')
template = textwrap.dedent("""
<% // xss-lint: disable=underscore-not-escaped %>
<%= message %>
<%= message %>
""")
linter.check_underscore_file_is_safe(template, results)
self.assertEqual(len(results.violations), 2)
self.assertEqual(results.violations[0].is_disabled, True)
self.assertEqual(results.violations[1].is_disabled, False)
@data(
{'template': '<%= HtmlUtils.ensureHtml(message) %>'},
{'template': '<%= _.escape(message) %>'},
)
def test_check_underscore_no_escape_allowed(self, data):
"""
Test check_underscore_file_is_safe with expressions that are allowed
without escaping because the internal calls properly escape.
"""
linter = UnderscoreTemplateLinter()
results = FileResults('')
linter.check_underscore_file_is_safe(data['template'], results)
self.assertEqual(len(results.violations), 0)
@ddt
class TestJavaScriptLinter(TestLinter):
"""
Test JavaScriptLinter
"""
@data(
{'template': 'var m = "Plain text " + message + "plain text"', 'rule': None},
{'template': 'var m = "檌檒濦 " + message + "plain text"', 'rule': None},
{
'template':
("""$email_header.append($('<input>', type: "button", name: "copy-email-body-text","""
""" value: gettext("Copy Email To Editor"), id: 'copy_email_' + email_id))"""),
'rule': None
},
{'template': 'var m = "<p>" + message + "</p>"', 'rule': Rules.javascript_concat_html},
{
'template': r'var m = "<p>\"escaped quote\"" + message + "\"escaped quote\"</p>"',
'rule': Rules.javascript_concat_html
},
{'template': ' // var m = "<p>" + commentedOutMessage + "</p>"', 'rule': None},
{'template': 'var m = " <p> " + message + " </p> "', 'rule': Rules.javascript_concat_html},
{'template': 'var m = " <p> " + message + " broken string', 'rule': Rules.javascript_concat_html},
)
def test_concat_with_html(self, data):
"""
Test check_javascript_file_is_safe with concatenating strings and HTML
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.append( test.render().el )', 'rule': None},
{'template': 'test.append(test.render().el)', 'rule': None},
{'template': 'test.append(test.render().$el)', 'rule': None},
{'template': 'test.append(testEl)', 'rule': None},
{'template': 'test.append($test)', 'rule': None},
# plain text is ok because any & will be escaped, and it stops false
# negatives on some other objects with an append() method
{'template': 'test.append("plain text")', 'rule': None},
{'template': 'test.append("<div/>")', 'rule': Rules.javascript_jquery_append},
{'template': 'graph.svg.append("g")', 'rule': None},
{'template': 'test.append( $( "<div>" ) )', 'rule': None},
{'template': 'test.append($("<div>"))', 'rule': None},
{'template': 'test.append($("<div/>"))', 'rule': None},
{'template': 'test.append(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'HtmlUtils.append($el, someHtml)', 'rule': None},
{'template': 'test.append("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_append},
{'template': 'test.append("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_append},
{'template': 'test.append(message)', 'rule': Rules.javascript_jquery_append},
)
def test_jquery_append(self, data):
"""
Test check_javascript_file_is_safe with JQuery append()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.prepend( test.render().el )', 'rule': None},
{'template': 'test.prepend(test.render().el)', 'rule': None},
{'template': 'test.prepend(test.render().$el)', 'rule': None},
{'template': 'test.prepend(testEl)', 'rule': None},
{'template': 'test.prepend($test)', 'rule': None},
{'template': 'test.prepend("text")', 'rule': None},
{'template': 'test.prepend( $( "<div>" ) )', 'rule': None},
{'template': 'test.prepend($("<div>"))', 'rule': None},
{'template': 'test.prepend($("<div/>"))', 'rule': None},
{'template': 'test.prepend(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'HtmlUtils.prepend($el, someHtml)', 'rule': None},
{'template': 'test.prepend("broken string)', 'rule': Rules.javascript_jquery_prepend},
{'template': 'test.prepend("fail on concat" + test.render().el)', 'rule': Rules.javascript_jquery_prepend},
{'template': 'test.prepend("fail on concat" + testEl)', 'rule': Rules.javascript_jquery_prepend},
{'template': 'test.prepend(message)', 'rule': Rules.javascript_jquery_prepend},
)
def test_jquery_prepend(self, data):
"""
Test check_javascript_file_is_safe with JQuery prepend()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.unwrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.wrap(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.wrapAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.wrapInner(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.after(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.before(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.replaceAll(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.replaceWith(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'test.replaceWith(edx.HtmlUtils.HTML(htmlString).toString())', 'rule': None},
{'template': 'test.unwrap(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.wrap(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.wrapAll(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.wrapInner(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.after(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.before(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.replaceAll(anything)', 'rule': Rules.javascript_jquery_insertion},
{'template': 'test.replaceWith(anything)', 'rule': Rules.javascript_jquery_insertion},
)
def test_jquery_insertion(self, data):
"""
Test check_javascript_file_is_safe with JQuery insertion functions
other than append(), prepend() and html() that take content as an
argument (e.g. before(), after()).
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': ' element.parentNode.appendTo(target);', 'rule': None},
{'template': ' test.render().el.appendTo(target);', 'rule': None},
{'template': ' test.render().$el.appendTo(target);', 'rule': None},
{'template': ' test.$element.appendTo(target);', 'rule': None},
{'template': ' test.testEl.appendTo(target);', 'rule': None},
{'template': '$element.appendTo(target);', 'rule': None},
{'template': 'el.appendTo(target);', 'rule': None},
{'template': 'testEl.appendTo(target);', 'rule': None},
{'template': 'testEl.prependTo(target);', 'rule': None},
{'template': 'testEl.insertAfter(target);', 'rule': None},
{'template': 'testEl.insertBefore(target);', 'rule': None},
{'template': 'anycall().appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.appendTo(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.prependTo(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.insertAfter(target)', 'rule': Rules.javascript_jquery_insert_into_target},
{'template': 'anything.insertBefore(target)', 'rule': Rules.javascript_jquery_insert_into_target},
)
def test_jquery_insert_to_target(self, data):
"""
Test check_javascript_file_is_safe with JQuery insert to target
functions that take a target as an argument, like appendTo() and
prependTo().
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'test.html()', 'rule': None},
{'template': 'test.html( )', 'rule': None},
{'template': "test.html( '' )", 'rule': None},
{'template': "test.html('')", 'rule': None},
{'template': 'test.html("")', 'rule': None},
{'template': 'test.html(HtmlUtils.ensureHtml(htmlSnippet).toString())', 'rule': None},
{'template': 'HtmlUtils.setHtml($el, someHtml)', 'rule': None},
{'template': 'test.html("any string")', 'rule': Rules.javascript_jquery_html},
{'template': 'test.html("broken string)', 'rule': Rules.javascript_jquery_html},
{'template': 'test.html("檌檒濦")', 'rule': Rules.javascript_jquery_html},
{'template': 'test.html(anything)', 'rule': Rules.javascript_jquery_html},
)
def test_jquery_html(self, data):
"""
Test check_javascript_file_is_safe with JQuery html()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': 'StringUtils.interpolate()', 'rule': None},
{'template': 'HtmlUtils.interpolateHtml()', 'rule': None},
{'template': 'interpolate(anything)', 'rule': Rules.javascript_interpolate},
)
def test_javascript_interpolate(self, data):
"""
Test check_javascript_file_is_safe with interpolate()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@data(
{'template': '_.escape(message)', 'rule': None},
{'template': 'anything.escape(message)', 'rule': Rules.javascript_escape},
)
def test_javascript_interpolate(self, data):
"""
Test check_javascript_file_is_safe with interpolate()
"""
linter = JavaScriptLinter()
results = FileResults('')
linter.check_javascript_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
@ddt
class TestPythonLinter(TestLinter):
"""
Test PythonLinter
"""
@data(
{'template': 'm = "Plain text " + message + "plain text"', 'rule': None},
{'template': 'm = "檌檒濦 " + message + "plain text"', 'rule': None},
{'template': ' # m = "<p>" + commentedOutMessage + "</p>"', 'rule': None},
{'template': 'm = "<p>" + message + "</p>"', 'rule': [Rules.python_concat_html, Rules.python_concat_html]},
{'template': 'm = " <p> " + message + " </p> "', 'rule': [Rules.python_concat_html, Rules.python_concat_html]},
{'template': 'm = " <p> " + message + " broken string', 'rule': Rules.python_parse_error},
)
def test_concat_with_html(self, data):
"""
Test check_python_file_is_safe with concatenating strings and HTML
"""
linter = PythonLinter()
results = FileResults('')
linter.check_python_file_is_safe(data['template'], results)
self._validate_data_rules(data, results)
def test_check_python_expression_display_name(self):
"""
Test _check_python_file_is_safe with display_name_with_default_escaped
fails.
"""
linter = PythonLinter()
results = FileResults('')
python_file = textwrap.dedent("""
context = {
'display_name': self.display_name_with_default_escaped,
}
""")
linter.check_python_file_is_safe(python_file, results)
self.assertEqual(len(results.violations), 1)
self.assertEqual(results.violations[0].rule, Rules.python_deprecated_display_name)
def test_check_custom_escaping(self):
"""
Test _check_python_file_is_safe fails when custom escapins is used.
"""
linter = PythonLinter()
results = FileResults('')
python_file = textwrap.dedent("""
msg = mmlans.replace('<', '&lt;')
""")
linter.check_python_file_is_safe(python_file, results)
self.assertEqual(len(results.violations), 1)
self.assertEqual(results.violations[0].rule, Rules.python_custom_escape)
@data(
{
'python':
textwrap.dedent("""
msg = Text("Mixed {span_start}text{span_end}").format(
span_start=HTML("<span>"),
span_end=HTML("</span>"),
)
"""),
'rule': None
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}text{span_end}".format(
span_start=HTML("<span>"),
span_end=HTML("</span>"),
)
"""),
'rule': Rules.python_requires_html_or_text
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}{text}{span_end}".format(
span_start=HTML("<span>"),
text=Text("This should still break."),
span_end=HTML("</span>"),
)
"""),
'rule': Rules.python_requires_html_or_text
},
{
'python':
textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>").format(url),
link_end=HTML("</a>"),
))
"""),
'rule': [Rules.python_close_before_format, Rules.python_requires_html_or_text]
},
{
'python':
textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}").format(
link_start=HTML("<a href='{}'>".format(url)),
link_end=HTML("</a>"),
)
"""),
'rule': Rules.python_close_before_format
},
{
'python':
textwrap.dedent("""
msg = Text("Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>".format(HTML('<b>'))),
link_end=HTML("</a>"),
))
"""),
'rule':
[
Rules.python_close_before_format,
Rules.python_requires_html_or_text,
Rules.python_close_before_format,
Rules.python_requires_html_or_text
]
},
{
'python':
textwrap.dedent("""
msg = "Mixed {span_start}text{span_end}".format(
span_start="<span>",
span_end="</span>",
)
"""),
'rule': [Rules.python_wrap_html, Rules.python_wrap_html]
},
{
'python':
textwrap.dedent("""
msg = Text(_("String with multiple lines "
"{link_start}unenroll{link_end} "
"and final line")).format(
link_start=HTML(
'<a id="link__over_multiple_lines" '
'data-course-id="{course_id}" '
'href="#test-modal">'
).format(
# Line comment with ' to throw off parser
course_id=course_overview.id
),
link_end=HTML('</a>'),
)
"""),
'rule': None
},
{
'python': "msg = '<span></span>'",
'rule': None
},
{
'python': "msg = HTML('<span></span>')",
'rule': None
},
{
'python': r"""msg = '<a href="{}"'.format(url)""",
'rule': Rules.python_wrap_html
},
{
'python': r"""msg = '{}</p>'.format(message)""",
'rule': Rules.python_wrap_html
},
{
'python': r"""r'regex with {} and named group(?P<id>\d+)?$'.format(test)""",
'rule': None
},
{
'python': r"""msg = '<a href="%s"' % url""",
'rule': Rules.python_interpolate_html
},
{
'python':
textwrap.dedent("""
def __repr__(self):
# Assume repr implementations are safe, and not HTML
return '<CCXCon {}>'.format(self.title)
"""),
'rule': None
},
{
'python': r"""msg = '%s</p>' % message""",
'rule': Rules.python_interpolate_html
},
{
'python': "msg = HTML('<span></span>'",
'rule': Rules.python_parse_error
},
)
def test_check_python_with_text_and_html(self, data):
"""
Test _check_python_file_is_safe tests for proper use of Text() and
Html().
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent(data['python'])
linter.check_python_file_is_safe(file_content, results)
self._validate_data_rules(data, results)
def test_check_python_with_text_and_html_mixed(self):
"""
Test _check_python_file_is_safe tests for proper use of Text() and
Html() for a Python file with a mix of rules.
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent("""
msg1 = '<a href="{}"'.format(url)
msg2 = "Mixed {link_start}text{link_end}".format(
link_start=HTML("<a href='{}'>".format(url)),
link_end="</a>",
)
msg3 = '<a href="%s"' % url
""")
linter.check_python_file_is_safe(file_content, results)
results.violations.sort(key=lambda violation: violation.sort_key())
self.assertEqual(len(results.violations), 5)
self.assertEqual(results.violations[0].rule, Rules.python_wrap_html)
self.assertEqual(results.violations[1].rule, Rules.python_requires_html_or_text)
self.assertEqual(results.violations[2].rule, Rules.python_close_before_format)
self.assertEqual(results.violations[3].rule, Rules.python_wrap_html)
self.assertEqual(results.violations[4].rule, Rules.python_interpolate_html)
@data(
{
'python':
"""
response_str = textwrap.dedent('''
<div>
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 2,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = textwrap.dedent('''
<div>
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = '''<h3 class="result">{response}</h3>'''.format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
{
'python':
"""
def function(self):
'''
Function comment.
'''
response_str = textwrap.dedent('''
<div>
\"\"\" Do we care about a nested triple quote? \"\"\"
<h3 class="result">{response}</h3>
</div>
''').format(response=response_text)
""",
'rule': Rules.python_wrap_html,
'start_line': 6,
},
)
def test_check_python_with_triple_quotes(self, data):
"""
Test _check_python_file_is_safe with triple quotes.
"""
linter = PythonLinter()
results = FileResults('')
file_content = textwrap.dedent(data['python'])
linter.check_python_file_is_safe(file_content, results)
self._validate_data_rules(data, results)
self.assertEqual(results.violations[0].start_line, data['start_line'])

View File

@@ -0,0 +1,179 @@
# -*- coding: utf-8 -*-
"""
Tests for main.py
"""
import re
import textwrap
from StringIO import StringIO
from unittest import TestCase
import mock
from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter
from xsslint.main import _lint
from xsslint.reporting import SummaryResults
from xsslint.rules import Rules
class TestXSSLinter(TestCase):
"""
Test some top-level linter functions
"""
def setUp(self):
"""
Setup patches on linters for testing.
"""
self.patch_is_valid_directory(MakoTemplateLinter)
self.patch_is_valid_directory(JavaScriptLinter)
self.patch_is_valid_directory(UnderscoreTemplateLinter)
self.patch_is_valid_directory(PythonLinter)
patcher = mock.patch('xsslint.main.is_skip_dir', return_value=False)
patcher.start()
self.addCleanup(patcher.stop)
def patch_is_valid_directory(self, linter_class):
"""
Creates a mock patch for _is_valid_directory on a Linter to always
return true. This avoids nested patch calls.
Arguments:
linter_class: The linter class to be patched
"""
patcher = mock.patch.object(linter_class, '_is_valid_directory', return_value=True)
patch_start = patcher.start()
self.addCleanup(patcher.stop)
return patch_start
def test_lint_defaults(self):
"""
Tests the top-level linting with default options.
"""
out = StringIO()
summary_results = SummaryResults()
_lint(
'scripts/xsslint/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': False,
'verbose': False,
'rule_totals': False,
},
summary_results=summary_results,
out=out,
)
output = out.getvalue()
# Assert violation details are displayed.
self.assertIsNotNone(re.search('test\.html.*{}'.format(Rules.mako_missing_default.rule_id), output))
self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.javascript_concat_html.rule_id), output))
self.assertIsNotNone(re.search('test\.coffee.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.javascript_concat_html.rule_id), output))
self.assertIsNotNone(re.search('test\.js.*{}'.format(Rules.underscore_not_escaped.rule_id), output))
lines_with_rule = 0
lines_without_rule = 0 # Output with verbose setting only.
for underscore_match in re.finditer('test\.underscore:.*\n', output):
if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None:
lines_with_rule += 1
else:
lines_without_rule += 1
self.assertGreaterEqual(lines_with_rule, 1)
self.assertEquals(lines_without_rule, 0)
self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_parse_error.rule_id), output))
self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
# Assert no rule totals.
self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
# Assert final total
self.assertIsNotNone(re.search('{} violations total'.format(7), output))
def test_lint_with_verbose(self):
"""
Tests the top-level linting with verbose option.
"""
out = StringIO()
summary_results = SummaryResults()
_lint(
'scripts/xsslint/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': False,
'verbose': True,
'rule_totals': False,
},
summary_results=summary_results,
out=out,
)
output = out.getvalue()
lines_with_rule = 0
lines_without_rule = 0 # Output with verbose setting only.
for underscore_match in re.finditer('test\.underscore:.*\n', output):
if re.search(Rules.underscore_not_escaped.rule_id, underscore_match.group()) is not None:
lines_with_rule += 1
else:
lines_without_rule += 1
self.assertGreaterEqual(lines_with_rule, 1)
self.assertGreaterEqual(lines_without_rule, 1)
# Assert no rule totals.
self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
# Assert final total
self.assertIsNotNone(re.search('{} violations total'.format(7), output))
def test_lint_with_rule_totals(self):
"""
Tests the top-level linting with rule totals option.
"""
out = StringIO()
summary_results = SummaryResults()
_lint(
'scripts/xsslint/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': False,
'verbose': False,
'rule_totals': True,
},
summary_results=summary_results,
out=out,
)
output = out.getvalue()
self.assertIsNotNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
# Assert totals output.
self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
self.assertIsNotNone(re.search('{}:\s*{} violations'.format(Rules.python_wrap_html.rule_id, 1), output))
self.assertIsNotNone(re.search('{} violations total'.format(7), output))
def test_lint_with_list_files(self):
"""
Tests the top-level linting with list files option.
"""
out = StringIO()
summary_results = SummaryResults()
_lint(
'scripts/xsslint/tests/templates',
template_linters=[MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()],
options={
'list_files': True,
'verbose': False,
'rule_totals': False,
},
summary_results=summary_results,
out=out,
)
output = out.getvalue()
# Assert file with rule is not output.
self.assertIsNone(re.search('test\.py.*{}'.format(Rules.python_wrap_html.rule_id), output))
# Assert file is output.
self.assertIsNotNone(re.search('test\.py', output))
# Assert no totals.
self.assertIsNone(re.search('{}:\s*{} violations'.format(Rules.python_parse_error.rule_id, 0), output))
self.assertIsNone(re.search('{} violations total'.format(7), output))

View File

@@ -0,0 +1,44 @@
from unittest import TestCase
from ddt import data, ddt
from xsslint.utils import StringLines
@ddt
class TestStringLines(TestCase):
"""
Test StringLines class.
"""
@data(
{'string': 'test', 'index': 0, 'line_start_index': 0, 'line_end_index': 4},
{'string': 'test', 'index': 2, 'line_start_index': 0, 'line_end_index': 4},
{'string': 'test', 'index': 3, 'line_start_index': 0, 'line_end_index': 4},
{'string': '\ntest', 'index': 0, 'line_start_index': 0, 'line_end_index': 1},
{'string': '\ntest', 'index': 2, 'line_start_index': 1, 'line_end_index': 5},
{'string': '\ntest\n', 'index': 0, 'line_start_index': 0, 'line_end_index': 1},
{'string': '\ntest\n', 'index': 2, 'line_start_index': 1, 'line_end_index': 6},
{'string': '\ntest\n', 'index': 6, 'line_start_index': 6, 'line_end_index': 6},
)
def test_string_lines_start_end_index(self, data):
"""
Test StringLines index_to_line_start_index and index_to_line_end_index.
"""
lines = StringLines(data['string'])
self.assertEqual(lines.index_to_line_start_index(data['index']), data['line_start_index'])
self.assertEqual(lines.index_to_line_end_index(data['index']), data['line_end_index'])
@data(
{'string': 'test', 'line_number': 1, 'line': 'test'},
{'string': '\ntest', 'line_number': 1, 'line': ''},
{'string': '\ntest', 'line_number': 2, 'line': 'test'},
{'string': '\ntest\n', 'line_number': 1, 'line': ''},
{'string': '\ntest\n', 'line_number': 2, 'line': 'test'},
{'string': '\ntest\n', 'line_number': 3, 'line': ''},
)
def test_string_lines_start_end_index(self, data):
"""
Test line_number_to_line.
"""
lines = StringLines(data['string'])
self.assertEqual(lines.line_number_to_line(data['line_number']), data['line'])

9
scripts/xsslint/xss_linter.py Executable file
View File

@@ -0,0 +1,9 @@
#!/usr/bin/env python
"""
A linting tool to check for xss vulnerabilities.
"""
if __name__ == "__main__":
from xsslint.main import main
main()

View File

View File

@@ -1,749 +1,15 @@
#!/usr/bin/env python
"""
A linting tool to check for xss vulnerabilities.
Linter classes containing logic for checking various filetypes.
"""
from __future__ import print_function
import argparse
import ast
import os
import re
import sys
import textwrap
from enum import Enum
class StringLines(object):
"""
StringLines provides utility methods to work with a string in terms of
lines. As an example, it can convert an index into a line number or column
number (i.e. index into the line).
"""
def __init__(self, string):
"""
Init method.
Arguments:
string: The string to work with.
"""
self._string = string
self._line_start_indexes = self._process_line_breaks(string)
# this is an exclusive index used in the case that the template doesn't
# end with a new line
self.eof_index = len(string)
def _process_line_breaks(self, string):
"""
Creates a list, where each entry represents the index into the string
where the next line break was found.
Arguments:
string: The string in which to find line breaks.
Returns:
A list of indices into the string at which each line begins.
"""
line_start_indexes = [0]
index = 0
while True:
index = string.find('\n', index)
if index < 0:
break
index += 1
line_start_indexes.append(index)
return line_start_indexes
def get_string(self):
"""
Get the original string.
"""
return self._string
def index_to_line_number(self, index):
"""
Given an index, determines the line of the index.
Arguments:
index: The index into the original string for which we want to know
the line number
Returns:
The line number of the provided index.
"""
current_line_number = 0
for line_break_index in self._line_start_indexes:
if line_break_index <= index:
current_line_number += 1
else:
break
return current_line_number
def index_to_column_number(self, index):
"""
Gets the column (i.e. index into the line) for the given index into the
original string.
Arguments:
index: The index into the original string.
Returns:
The column (i.e. index into the line) for the given index into the
original string.
"""
start_index = self.index_to_line_start_index(index)
column = index - start_index + 1
return column
def index_to_line_start_index(self, index):
"""
Gets the index of the start of the line of the given index.
Arguments:
index: The index into the original string.
Returns:
The index of the start of the line of the given index.
"""
line_number = self.index_to_line_number(index)
return self.line_number_to_start_index(line_number)
def index_to_line_end_index(self, index):
"""
Gets the index of the end of the line of the given index.
Arguments:
index: The index into the original string.
Returns:
The index of the end of the line of the given index.
"""
line_number = self.index_to_line_number(index)
return self.line_number_to_end_index(line_number)
def line_number_to_start_index(self, line_number):
"""
Gets the starting index for the provided line number.
Arguments:
line_number: The line number of the line for which we want to find
the start index.
Returns:
The starting index for the provided line number.
"""
return self._line_start_indexes[line_number - 1]
def line_number_to_end_index(self, line_number):
"""
Gets the ending index for the provided line number.
Arguments:
line_number: The line number of the line for which we want to find
the end index.
Returns:
The ending index for the provided line number.
"""
if line_number < len(self._line_start_indexes):
return self._line_start_indexes[line_number]
else:
# an exclusive index in the case that the file didn't end with a
# newline.
return self.eof_index
def line_number_to_line(self, line_number):
"""
Gets the line of text designated by the provided line number.
Arguments:
line_number: The line number of the line we want to find.
Returns:
The line of text designated by the provided line number.
"""
start_index = self._line_start_indexes[line_number - 1]
if len(self._line_start_indexes) == line_number:
line = self._string[start_index:]
else:
end_index = self._line_start_indexes[line_number]
line = self._string[start_index:end_index - 1]
return line
def line_count(self):
"""
Gets the number of lines in the string.
"""
return len(self._line_start_indexes)
class Rules(Enum):
"""
An Enum of each rule which the linter will check.
"""
# IMPORTANT: Do not edit without also updating the docs:
# - http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter
mako_missing_default = 'mako-missing-default'
mako_multiple_page_tags = 'mako-multiple-page-tags'
mako_unparseable_expression = 'mako-unparseable-expression'
mako_unwanted_html_filter = 'mako-unwanted-html-filter'
mako_invalid_html_filter = 'mako-invalid-html-filter'
mako_invalid_js_filter = 'mako-invalid-js-filter'
mako_js_missing_quotes = 'mako-js-missing-quotes'
mako_js_html_string = 'mako-js-html-string'
mako_html_entities = 'mako-html-entities'
mako_unknown_context = 'mako-unknown-context'
underscore_not_escaped = 'underscore-not-escaped'
javascript_jquery_append = 'javascript-jquery-append'
javascript_jquery_prepend = 'javascript-jquery-prepend'
javascript_jquery_insertion = 'javascript-jquery-insertion'
javascript_jquery_insert_into_target = 'javascript-jquery-insert-into-target'
javascript_jquery_html = 'javascript-jquery-html'
javascript_concat_html = 'javascript-concat-html'
javascript_escape = 'javascript-escape'
javascript_interpolate = 'javascript-interpolate'
python_concat_html = 'python-concat-html'
python_custom_escape = 'python-custom-escape'
python_deprecated_display_name = 'python-deprecated-display-name'
python_requires_html_or_text = 'python-requires-html-or-text'
python_close_before_format = 'python-close-before-format'
python_wrap_html = 'python-wrap-html'
python_interpolate_html = 'python-interpolate-html'
python_parse_error = 'python-parse-error'
def __init__(self, rule_id):
self.rule_id = rule_id
class Expression(object):
"""
Represents an arbitrary expression.
An expression can be any type of code snippet. It will sometimes have a
starting and ending delimiter, but not always.
Here are some example expressions::
${x | n, decode.utf8}
<%= x %>
function(x)
"<p>" + message + "</p>"
Other details of note:
- Only a start_index is required for a valid expression.
- If end_index is None, it means we couldn't parse the rest of the
expression.
- All other details of the expression are optional, and are only added if
and when supplied and needed for additional checks. They are not necessary
for the final results output.
"""
def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None):
"""
Init method.
Arguments:
start_index: the starting index of the expression
end_index: the index immediately following the expression, or None
if the expression was unparseable
template: optional template code in which the expression was found
start_delim: optional starting delimiter of the expression
end_delim: optional ending delimeter of the expression
strings: optional list of ParseStrings
"""
self.start_index = start_index
self.end_index = end_index
self.start_delim = start_delim
self.end_delim = end_delim
self.strings = strings
if template is not None and self.end_index is not None:
self.expression = template[start_index:end_index]
self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip()
else:
self.expression = None
self.expression_inner = None
class RuleViolation(object):
"""
Base class representing a rule violation which can be used for reporting.
"""
def __init__(self, rule):
"""
Init method.
Arguments:
rule: The Rule which was violated.
"""
self.rule = rule
self.full_path = ''
self.is_disabled = False
def _mark_disabled(self, string, scope_start_string=False):
"""
Performs the disable pragma search and marks the rule as disabled if a
matching pragma is found.
Pragma format::
xss-lint: disable=violation-name,other-violation-name
Arguments:
string: The string of code in which to search for the pragma.
scope_start_string: True if the pragma must be at the start of the
string, False otherwise. The pragma is considered at the start
of the string if it has a maximum of 5 non-whitespace characters
preceding it.
Side Effect:
Sets self.is_disabled as appropriate based on whether the pragma is
found.
"""
pragma_match = re.search(r'xss-lint:\s*disable=([a-zA-Z,\- ]+)', string)
if pragma_match is None:
return
if scope_start_string:
spaces_count = string.count(' ', 0, pragma_match.start())
non_space_count = pragma_match.start() - spaces_count
if non_space_count > 5:
return
for disabled_rule in pragma_match.group(1).split(','):
if disabled_rule.strip() == self.rule.rule_id:
self.is_disabled = True
return
def sort_key(self):
"""
Returns a key that can be sorted on
"""
return (0, 0, self.rule.rule_id)
def first_line(self):
"""
Since a file level rule has no first line, returns empty string.
"""
return ''
def prepare_results(self, full_path, string_lines):
"""
Preps this instance for results reporting.
Arguments:
full_path: Path of the file in violation.
string_lines: A StringLines containing the contents of the file in
violation.
"""
self.full_path = full_path
self._mark_disabled(string_lines.get_string())
def print_results(self, _options, out):
"""
Prints the results represented by this rule violation.
Arguments:
_options: ignored
out: output file
"""
print("{}: {}".format(self.full_path, self.rule.rule_id), file=out)
class ExpressionRuleViolation(RuleViolation):
"""
A class representing a particular rule violation for expressions which
contain more specific details of the location of the violation for reporting
purposes.
"""
def __init__(self, rule, expression):
"""
Init method.
Arguments:
rule: The Rule which was violated.
expression: The Expression that was in violation.
"""
super(ExpressionRuleViolation, self).__init__(rule)
self.expression = expression
self.start_line = 0
self.start_column = 0
self.end_line = 0
self.end_column = 0
self.lines = []
self.is_disabled = False
def _mark_expression_disabled(self, string_lines):
"""
Marks the expression violation as disabled if it finds the disable
pragma anywhere on the first line of the violation, or at the start of
the line preceding the violation.
Pragma format::
xss-lint: disable=violation-name,other-violation-name
Examples::
<% // xss-lint: disable=underscore-not-escaped %>
<%= gettext('Single Line') %>
<%= gettext('Single Line') %><% // xss-lint: disable=underscore-not-escaped %>
Arguments:
string_lines: A StringLines containing the contents of the file in
violation.
Side Effect:
Sets self.is_disabled as appropriate based on whether the pragma is
found.
"""
# disable pragma can be at the start of the preceding line
has_previous_line = self.start_line > 1
if has_previous_line:
line_to_check = string_lines.line_number_to_line(self.start_line - 1)
self._mark_disabled(line_to_check, scope_start_string=True)
if self.is_disabled:
return
# TODO: this should work at end of any line of the violation
# disable pragma can be anywhere on the first line of the violation
line_to_check = string_lines.line_number_to_line(self.start_line)
self._mark_disabled(line_to_check, scope_start_string=False)
def sort_key(self):
"""
Returns a key that can be sorted on
"""
return (self.start_line, self.start_column, self.rule.rule_id)
def first_line(self):
"""
Returns the initial line of code of the violation.
"""
return self.lines[0]
def prepare_results(self, full_path, string_lines):
"""
Preps this instance for results reporting.
Arguments:
full_path: Path of the file in violation.
string_lines: A StringLines containing the contents of the file in
violation.
"""
self.full_path = full_path
start_index = self.expression.start_index
self.start_line = string_lines.index_to_line_number(start_index)
self.start_column = string_lines.index_to_column_number(start_index)
end_index = self.expression.end_index
if end_index is not None:
self.end_line = string_lines.index_to_line_number(end_index)
self.end_column = string_lines.index_to_column_number(end_index)
else:
self.end_line = self.start_line
self.end_column = '?'
for line_number in range(self.start_line, self.end_line + 1):
self.lines.append(string_lines.line_number_to_line(line_number))
self._mark_expression_disabled(string_lines)
def print_results(self, options, out):
"""
Prints the results represented by this rule violation.
Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
verbose: True for multiple lines of context, False single line.
out: output file
"""
if options['verbose']:
end_line = self.end_line + 1
else:
end_line = self.start_line + 1
for line_number in range(self.start_line, end_line):
if line_number == self.start_line:
column = self.start_column
rule_id = self.rule.rule_id + ":"
else:
column = 1
rule_id = " " * (len(self.rule.rule_id) + 1)
line = self.lines[line_number - self.start_line].encode(encoding='utf-8')
print("{}: {}:{}: {} {}".format(
self.full_path,
line_number,
column,
rule_id,
line
), file=out)
class SummaryResults(object):
"""
Contains the summary results for all violations.
"""
def __init__(self):
"""
Init method.
"""
self.total_violations = 0
self.totals_by_rule = dict.fromkeys(
[rule.rule_id for rule in Rules.__members__.values()], 0
)
def add_violation(self, violation):
"""
Adds a violation to the summary details.
Arguments:
violation: The violation to add to the summary.
"""
self.total_violations += 1
self.totals_by_rule[violation.rule.rule_id] += 1
def print_results(self, options, out):
"""
Prints the results (i.e. violations) in this file.
Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
rule_totals: If True include totals by rule.
out: output file
"""
if options['list_files'] is False:
if options['rule_totals']:
max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule)
print("", file=out)
for rule_id in sorted(self.totals_by_rule.keys()):
padding = " " * (max_rule_id_len - len(rule_id))
print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out)
print("", file=out)
# matches output of eslint for simplicity
print("", file=out)
print("{} violations total".format(self.total_violations), file=out)
class FileResults(object):
"""
Contains the results, or violations, for a file.
"""
def __init__(self, full_path):
"""
Init method.
Arguments:
full_path: The full path for this file.
"""
self.full_path = full_path
self.directory = os.path.dirname(full_path)
self.is_file = os.path.isfile(full_path)
self.violations = []
def prepare_results(self, file_string, line_comment_delim=None):
"""
Prepares the results for output for this file.
Arguments:
file_string: The string of content for this file.
line_comment_delim: A string representing the start of a line
comment. For example "##" for Mako and "//" for JavaScript.
"""
string_lines = StringLines(file_string)
for violation in self.violations:
violation.prepare_results(self.full_path, string_lines)
if line_comment_delim is not None:
self._filter_commented_code(line_comment_delim)
def print_results(self, options, summary_results, out):
"""
Prints the results (i.e. violations) in this file.
Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
summary_results: A SummaryResults with a summary of the violations.
verbose: True for multiple lines of context, False single line.
out: output file
Side effect:
Updates the passed SummaryResults.
"""
if options['list_files']:
if self.violations is not None and 0 < len(self.violations):
print(self.full_path, file=out)
else:
self.violations.sort(key=lambda violation: violation.sort_key())
for violation in self.violations:
if not violation.is_disabled:
violation.print_results(options, out)
summary_results.add_violation(violation)
def _filter_commented_code(self, line_comment_delim):
"""
Remove any violations that were found in commented out code.
Arguments:
line_comment_delim: A string representing the start of a line
comment. For example "##" for Mako and "//" for JavaScript.
"""
self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)]
def _is_commented(self, violation, line_comment_delim):
"""
Checks if violation line is commented out.
Arguments:
violation: The violation to check
line_comment_delim: A string representing the start of a line
comment. For example "##" for Mako and "//" for JavaScript.
Returns:
True if the first line of the violation is actually commented out,
False otherwise.
"""
if 'parse' in violation.rule.rule_id:
# For parse rules, don't filter them because the comment could be a
# part of the parse issue to begin with.
return False
else:
return violation.first_line().lstrip().startswith(line_comment_delim)
class ParseString(object):
"""
ParseString is the result of parsing a string out of a template.
A ParseString has the following attributes:
start_index: The index of the first quote, or None if none found
end_index: The index following the closing quote, or None if
unparseable
quote_length: The length of the quote. Could be 3 for a Python
triple quote. Or None if none found.
string: the text of the parsed string, or None if none found.
string_inner: the text inside the quotes of the parsed string, or None
if none found.
"""
def __init__(self, template, start_index, end_index):
"""
Init method.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
"""
self.end_index = None
self.quote_length = None
self.string = None
self.string_inner = None
self.start_index = self._find_string_start(template, start_index, end_index)
if self.start_index is not None:
result = self._parse_string(template, self.start_index)
if result is not None:
self.end_index = result['end_index']
self.quote_length = result['quote_length']
self.string = result['string']
self.string_inner = result['string_inner']
def _find_string_start(self, template, start_index, end_index):
"""
Finds the index of the end of start of a string. In other words, the
first single or double quote.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
Returns:
The start index of the first single or double quote, or None if no
quote was found.
"""
quote_regex = re.compile(r"""['"]""")
start_match = quote_regex.search(template, start_index, end_index)
if start_match is None:
return None
else:
return start_match.start()
def _parse_string(self, template, start_index):
"""
Finds the indices of a string inside a template.
Arguments:
template: The template to be searched.
start_index: The start index of the open quote.
Returns:
A dict containing the following, or None if not parseable:
end_index: The index following the closing quote
quote_length: The length of the quote. Could be 3 for a Python
triple quote.
string: the text of the parsed string
string_inner: the text inside the quotes of the parsed string
"""
quote = template[start_index]
if quote not in ["'", '"']:
raise ValueError("start_index must refer to a single or double quote.")
triple_quote = quote * 3
if template.startswith(triple_quote, start_index):
quote = triple_quote
next_start_index = start_index + len(quote)
while True:
quote_end_index = template.find(quote, next_start_index)
backslash_index = template.find("\\", next_start_index)
if quote_end_index < 0:
return None
if 0 <= backslash_index < quote_end_index:
next_start_index = backslash_index + 2
else:
end_index = quote_end_index + len(quote)
quote_length = len(quote)
string = template[start_index:end_index]
return {
'end_index': end_index,
'quote_length': quote_length,
'string': string,
'string_inner': string[quote_length:-quote_length],
}
from xsslint.reporting import ExpressionRuleViolation, FileResults, RuleViolation
from xsslint.rules import Rules
from xsslint.utils import SKIP_DIRS, Expression, ParseString, StringLines, is_skip_dir
from xsslint.visitors import AllNodeVisitor, HtmlStringVisitor, OuterFormatVisitor
class BaseLinter(object):
@@ -1419,328 +685,6 @@ class JavaScriptLinter(BaseLinter):
))
class BaseVisitor(ast.NodeVisitor):
"""
Base class for AST NodeVisitor used for Python xss linting.
Important: This base visitor skips all __repr__ function definitions.
"""
def __init__(self, file_contents, results):
"""
Init method.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(BaseVisitor, self).__init__()
self.file_contents = file_contents
self.lines = StringLines(self.file_contents)
self.results = results
def node_to_expression(self, node):
"""
Takes a node and translates it to an expression to be used with
violations.
Arguments:
node: An AST node.
"""
line_start_index = self.lines.line_number_to_start_index(node.lineno)
start_index = line_start_index + node.col_offset
if isinstance(node, ast.Str):
# Triple quotes give col_offset of -1 on the last line of the string.
if node.col_offset == -1:
triple_quote_regex = re.compile("""['"]{3}""")
end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index)
open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start())
if open_quote_index > 0:
start_index = open_quote_index
else:
# If we can't find a starting quote, let's assume that what
# we considered the end quote is really the start quote.
start_index = end_triple_quote_match.start()
string = ParseString(self.file_contents, start_index, len(self.file_contents))
return Expression(string.start_index, string.end_index)
else:
return Expression(start_index)
def visit_FunctionDef(self, node):
"""
Skips processing of __repr__ functions, since these sometimes use '<'
for non-HTML purposes.
Arguments:
node: An AST node.
"""
if node.name != '__repr__':
self.generic_visit(node)
class HtmlStringVisitor(BaseVisitor):
"""
Checks for strings that contain HTML. Assumes any string with < or > is
considered potential HTML.
To be used only with strings in context of format or concat.
"""
def __init__(self, file_contents, results, skip_wrapped_html=False):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
skip_wrapped_html: True if visitor should skip strings wrapped with
HTML() or Text(), and False otherwise.
"""
super(HtmlStringVisitor, self).__init__(file_contents, results)
self.skip_wrapped_html = skip_wrapped_html
self.unsafe_html_string_nodes = []
self.over_escaped_entity_string_nodes = []
self.has_text_or_html_call = False
def visit_Str(self, node):
"""
When strings are visited, checks if it contains HTML.
Arguments:
node: An AST node.
"""
# Skips '<' (and '>') in regex named groups. For example, "(?P<group>)".
if re.search('[(][?]P<', node.s) is None and re.search('<', node.s) is not None:
self.unsafe_html_string_nodes.append(node)
if re.search(r"&[#]?[a-zA-Z0-9]+;", node.s):
self.over_escaped_entity_string_nodes.append(node)
def visit_Call(self, node):
"""
Skips processing of string contained inside HTML() and Text() calls when
skip_wrapped_html is True.
Arguments:
node: An AST node.
"""
is_html_or_text_call = isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']
if self.skip_wrapped_html and is_html_or_text_call:
self.has_text_or_html_call = True
else:
self.generic_visit(node)
class ContainsFormatVisitor(BaseVisitor):
"""
Checks if there are any nested format() calls.
This visitor is meant to be called on HTML() and Text() ast.Call nodes to
search for any illegal nested format() calls.
"""
def __init__(self, file_contents, results):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(ContainsFormatVisitor, self).__init__(file_contents, results)
self.contains_format_call = False
def visit_Attribute(self, node):
"""
Simple check for format calls (attribute).
Arguments:
node: An AST node.
"""
# Attribute(expr value, identifier attr, expr_context ctx)
if node.attr == 'format':
self.contains_format_call = True
else:
self.generic_visit(node)
class FormatInterpolateVisitor(BaseVisitor):
"""
Checks if format() interpolates any HTML() or Text() calls. In other words,
are Text() or HTML() calls nested inside the call to format().
This visitor is meant to be called on a format() attribute node.
"""
def __init__(self, file_contents, results):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(FormatInterpolateVisitor, self).__init__(file_contents, results)
self.interpolates_text_or_html = False
self.format_caller_node = None
def visit_Call(self, node):
"""
Checks all calls. Remembers the caller of the initial format() call, or
in other words, the left-hand side of the call. Also tracks if HTML()
or Text() calls were seen.
Arguments:
node: The AST root node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr is 'format':
if self.format_caller_node is None:
# Store the caller, or left-hand-side node of the initial
# format() call.
self.format_caller_node = node.func.value
elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
# found Text() or HTML() call in arguments passed to format()
self.interpolates_text_or_html = True
self.generic_visit(node)
def generic_visit(self, node):
"""
Determines whether or not to continue to visit nodes according to the
following rules:
- Once a Text() or HTML() call has been found, stop visiting more nodes.
- Skip the caller of the outer-most format() call, or in other words,
the left-hand side of the call.
Arguments:
node: The AST root node.
"""
if self.interpolates_text_or_html is False:
if self.format_caller_node is not node:
super(FormatInterpolateVisitor, self).generic_visit(node)
class OuterFormatVisitor(BaseVisitor):
"""
Only visits outer most Python format() calls. These checks are not repeated
for any nested format() calls.
This visitor is meant to be used once from the root.
"""
def visit_Call(self, node):
"""
Checks that format() calls which contain HTML() or Text() use HTML() or
Text() as the caller. In other words, Text() or HTML() must be used
before format() for any arguments to format() that contain HTML() or
Text().
Arguments:
node: An AST node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
visitor = HtmlStringVisitor(self.file_contents, self.results, True)
visitor.visit(node)
for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, self.node_to_expression(unsafe_html_string_node)
))
# Do not continue processing child nodes of this format() node.
else:
self.generic_visit(node)
class AllNodeVisitor(BaseVisitor):
"""
Visits all nodes and does not interfere with calls to generic_visit(). This
is used in conjunction with other visitors to check for a variety of
violations.
This visitor is meant to be used once from the root.
"""
def visit_Attribute(self, node):
"""
Checks for uses of deprecated `display_name_with_default_escaped`.
Arguments:
node: An AST node.
"""
if node.attr == 'display_name_with_default_escaped':
self.results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, self.node_to_expression(node)
))
self.generic_visit(node)
def visit_Call(self, node):
"""
Checks for a variety of violations:
- Checks that format() calls with nested HTML() or Text() calls use
HTML() or Text() on the left-hand side.
- For each HTML() and Text() call, calls into separate visitor to check
for inner format() calls.
Arguments:
node: An AST node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
visitor = FormatInterpolateVisitor(self.file_contents, self.results)
visitor.visit(node)
if visitor.interpolates_text_or_html:
format_caller = node.func.value
is_caller_html_or_text = isinstance(format_caller, ast.Call) and \
isinstance(format_caller.func, ast.Name) and \
format_caller.func.id in ['Text', 'HTML']
# If format call has nested Text() or HTML(), then the caller,
# or left-hand-side of the format() call, must be a call to
# Text() or HTML().
if is_caller_html_or_text is False:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, self.node_to_expression(node.func)
))
elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
visitor = ContainsFormatVisitor(self.file_contents, self.results)
visitor.visit(node)
if visitor.contains_format_call:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, self.node_to_expression(node.func)
))
self.generic_visit(node)
def visit_BinOp(self, node):
"""
Checks for concat using '+' and interpolation using '%' with strings
containing HTML.
"""
rule = None
if isinstance(node.op, ast.Mod):
rule = Rules.python_interpolate_html
elif isinstance(node.op, ast.Add):
rule = Rules.python_concat_html
if rule is not None:
visitor = HtmlStringVisitor(self.file_contents, self.results)
visitor.visit(node.left)
has_illegal_html_string = len(visitor.unsafe_html_string_nodes) > 0
# Create new visitor to clear state.
visitor = HtmlStringVisitor(self.file_contents, self.results)
visitor.visit(node.right)
has_illegal_html_string = has_illegal_html_string or len(visitor.unsafe_html_string_nodes) > 0
if has_illegal_html_string:
self.results.violations.append(ExpressionRuleViolation(
rule, self.node_to_expression(node)
))
self.generic_visit(node)
class PythonLinter(BaseLinter):
"""
The linter for Python files.
@@ -2494,172 +1438,3 @@ class MakoTemplateLinter(BaseLinter):
start_index = expression.end_index
expressions.append(expression)
return expressions
SKIP_DIRS = (
'.git',
'.pycharm_helpers',
'common/static/xmodule/modules',
'common/static/bundles',
'perf_tests',
'node_modules',
'reports/diff_quality',
'scripts/tests/templates',
'spec',
'test_root',
'vendor',
)
def is_skip_dir(skip_dirs, directory):
"""
Determines whether a directory should be skipped or linted.
Arguments:
skip_dirs: The configured directories to be skipped.
directory: The current directory to be tested.
Returns:
True if the directory should be skipped, and False otherwise.
"""
for skip_dir in skip_dirs:
skip_dir_regex = re.compile(
"(.*/)*{}(/.*)*".format(re.escape(skip_dir)))
if skip_dir_regex.match(directory) is not None:
return True
return False
def _process_file(full_path, template_linters, options, summary_results, out):
"""
For each linter, lints the provided file. This means finding and printing
violations.
Arguments:
full_path: The full path of the file to lint.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
num_violations = 0
directory = os.path.dirname(full_path)
file_name = os.path.basename(full_path)
for template_linter in template_linters:
results = template_linter.process_file(directory, file_name)
results.print_results(options, summary_results, out)
def _process_os_dir(directory, files, template_linters, options, summary_results, out):
"""
Calls out to lint each file in the passed list of files.
Arguments:
directory: Directory being linted.
files: All files in the directory to be linted.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
for current_file in sorted(files, key=lambda s: s.lower()):
full_path = os.path.join(directory, current_file)
_process_file(full_path, template_linters, options, summary_results, out)
def _process_os_dirs(starting_dir, template_linters, options, summary_results, out):
"""
For each linter, lints all the directories in the starting directory.
Arguments:
starting_dir: The initial directory to begin the walk.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
for root, dirs, files in os.walk(starting_dir):
if is_skip_dir(SKIP_DIRS, root):
del dirs
continue
dirs.sort(key=lambda s: s.lower())
_process_os_dir(root, files, template_linters, options, summary_results, out)
def _lint(file_or_dir, template_linters, options, summary_results, out):
"""
For each linter, lints the provided file or directory.
Arguments:
file_or_dir: The file or initial directory to lint.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
if file_or_dir is not None and os.path.isfile(file_or_dir):
_process_file(file_or_dir, template_linters, options, summary_results, out)
else:
directory = "."
if file_or_dir is not None:
if os.path.exists(file_or_dir):
directory = file_or_dir
else:
raise ValueError("Path [{}] is not a valid file or directory.".format(file_or_dir))
_process_os_dirs(directory, template_linters, options, summary_results, out)
summary_results.print_results(options, out)
def main():
"""
Used to execute the linter. Use --help option for help.
Prints all violations.
"""
epilog = "For more help using the xss linter, including details on how to\n"
epilog += "understand and fix any violations, read the docs here:\n"
epilog += "\n"
# pylint: disable=line-too-long
epilog += " http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter\n"
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='Checks that templates are safe.',
epilog=epilog,
)
parser.add_argument(
'--list-files', dest='list_files', action='store_true',
help='Only display the filenames that contain violations.'
)
parser.add_argument(
'--rule-totals', dest='rule_totals', action='store_true',
help='Display the totals for each rule.'
)
parser.add_argument(
'--verbose', dest='verbose', action='store_true',
help='Print multiple lines where possible for additional context of violations.'
)
parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.')
args = parser.parse_args()
options = {
'list_files': args.list_files,
'rule_totals': args.rule_totals,
'verbose': args.verbose,
}
template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()]
summary_results = SummaryResults()
_lint(args.path, template_linters, options, summary_results, out=sys.stdout)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,140 @@
"""
The main function for the XSS linter.
"""
import argparse
import os
import sys
from xsslint.linters import JavaScriptLinter, MakoTemplateLinter, PythonLinter, UnderscoreTemplateLinter
from xsslint.reporting import SummaryResults
from xsslint.utils import SKIP_DIRS, is_skip_dir
def _process_file(full_path, template_linters, options, summary_results, out):
"""
For each linter, lints the provided file. This means finding and printing
violations.
Arguments:
full_path: The full path of the file to lint.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
num_violations = 0
directory = os.path.dirname(full_path)
file_name = os.path.basename(full_path)
for template_linter in template_linters:
results = template_linter.process_file(directory, file_name)
results.print_results(options, summary_results, out)
def _process_os_dir(directory, files, template_linters, options, summary_results, out):
"""
Calls out to lint each file in the passed list of files.
Arguments:
directory: Directory being linted.
files: All files in the directory to be linted.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
for current_file in sorted(files, key=lambda s: s.lower()):
full_path = os.path.join(directory, current_file)
_process_file(full_path, template_linters, options, summary_results, out)
def _process_os_dirs(starting_dir, template_linters, options, summary_results, out):
"""
For each linter, lints all the directories in the starting directory.
Arguments:
starting_dir: The initial directory to begin the walk.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
for root, dirs, files in os.walk(starting_dir):
if is_skip_dir(SKIP_DIRS, root):
del dirs
continue
dirs.sort(key=lambda s: s.lower())
_process_os_dir(root, files, template_linters, options, summary_results, out)
def _lint(file_or_dir, template_linters, options, summary_results, out):
"""
For each linter, lints the provided file or directory.
Arguments:
file_or_dir: The file or initial directory to lint.
template_linters: A list of linting objects.
options: A list of the options.
summary_results: A SummaryResults with a summary of the violations.
out: output file
"""
if file_or_dir is not None and os.path.isfile(file_or_dir):
_process_file(file_or_dir, template_linters, options, summary_results, out)
else:
directory = "."
if file_or_dir is not None:
if os.path.exists(file_or_dir):
directory = file_or_dir
else:
raise ValueError("Path [{}] is not a valid file or directory.".format(file_or_dir))
_process_os_dirs(directory, template_linters, options, summary_results, out)
summary_results.print_results(options, out)
def main():
"""
Used to execute the linter. Use --help option for help.
Prints all violations.
"""
epilog = "For more help using the xss linter, including details on how to\n"
epilog += "understand and fix any violations, read the docs here:\n"
epilog += "\n"
# pylint: disable=line-too-long
epilog += " http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter\n"
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='Checks that templates are safe.',
epilog=epilog,
)
parser.add_argument(
'--list-files', dest='list_files', action='store_true',
help='Only display the filenames that contain violations.'
)
parser.add_argument(
'--rule-totals', dest='rule_totals', action='store_true',
help='Display the totals for each rule.'
)
parser.add_argument(
'--verbose', dest='verbose', action='store_true',
help='Print multiple lines where possible for additional context of violations.'
)
parser.add_argument('path', nargs="?", default=None, help='A file to lint or directory to recursively lint.')
args = parser.parse_args()
options = {
'list_files': args.list_files,
'rule_totals': args.rule_totals,
'verbose': args.verbose,
}
template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter(), JavaScriptLinter(), PythonLinter()]
summary_results = SummaryResults()
_lint(args.path, template_linters, options, summary_results, out=sys.stdout)

View File

@@ -0,0 +1,376 @@
"""
Utility classes for reporting linter results.
"""
from __future__ import print_function
import os
import re
from xsslint.rules import Rules
from xsslint.utils import StringLines
class RuleViolation(object):
"""
Base class representing a rule violation which can be used for reporting.
"""
def __init__(self, rule):
"""
Init method.
Arguments:
rule: The Rule which was violated.
"""
self.rule = rule
self.full_path = ''
self.is_disabled = False
def _mark_disabled(self, string, scope_start_string=False):
"""
Performs the disable pragma search and marks the rule as disabled if a
matching pragma is found.
Pragma format::
xss-lint: disable=violation-name,other-violation-name
Arguments:
string: The string of code in which to search for the pragma.
scope_start_string: True if the pragma must be at the start of the
string, False otherwise. The pragma is considered at the start
of the string if it has a maximum of 5 non-whitespace characters
preceding it.
Side Effect:
Sets self.is_disabled as appropriate based on whether the pragma is
found.
"""
pragma_match = re.search(r'xss-lint:\s*disable=([a-zA-Z,\- ]+)', string)
if pragma_match is None:
return
if scope_start_string:
spaces_count = string.count(' ', 0, pragma_match.start())
non_space_count = pragma_match.start() - spaces_count
if non_space_count > 5:
return
for disabled_rule in pragma_match.group(1).split(','):
if disabled_rule.strip() == self.rule.rule_id:
self.is_disabled = True
return
def sort_key(self):
"""
Returns a key that can be sorted on
"""
return (0, 0, self.rule.rule_id)
def first_line(self):
"""
Since a file level rule has no first line, returns empty string.
"""
return ''
def prepare_results(self, full_path, string_lines):
"""
Preps this instance for results reporting.
Arguments:
full_path: Path of the file in violation.
string_lines: A StringLines containing the contents of the file in
violation.
"""
self.full_path = full_path
self._mark_disabled(string_lines.get_string())
def print_results(self, _options, out):
"""
Prints the results represented by this rule violation.
Arguments:
_options: ignored
out: output file
"""
print("{}: {}".format(self.full_path, self.rule.rule_id), file=out)
class ExpressionRuleViolation(RuleViolation):
"""
A class representing a particular rule violation for expressions which
contain more specific details of the location of the violation for reporting
purposes.
"""
def __init__(self, rule, expression):
"""
Init method.
Arguments:
rule: The Rule which was violated.
expression: The Expression that was in violation.
"""
super(ExpressionRuleViolation, self).__init__(rule)
self.expression = expression
self.start_line = 0
self.start_column = 0
self.end_line = 0
self.end_column = 0
self.lines = []
self.is_disabled = False
def _mark_expression_disabled(self, string_lines):
"""
Marks the expression violation as disabled if it finds the disable
pragma anywhere on the first line of the violation, or at the start of
the line preceding the violation.
Pragma format::
xss-lint: disable=violation-name,other-violation-name
Examples::
<% // xss-lint: disable=underscore-not-escaped %>
<%= gettext('Single Line') %>
<%= gettext('Single Line') %><% // xss-lint: disable=underscore-not-escaped %>
Arguments:
string_lines: A StringLines containing the contents of the file in
violation.
Side Effect:
Sets self.is_disabled as appropriate based on whether the pragma is
found.
"""
# disable pragma can be at the start of the preceding line
has_previous_line = self.start_line > 1
if has_previous_line:
line_to_check = string_lines.line_number_to_line(self.start_line - 1)
self._mark_disabled(line_to_check, scope_start_string=True)
if self.is_disabled:
return
# TODO: this should work at end of any line of the violation
# disable pragma can be anywhere on the first line of the violation
line_to_check = string_lines.line_number_to_line(self.start_line)
self._mark_disabled(line_to_check, scope_start_string=False)
def sort_key(self):
"""
Returns a key that can be sorted on
"""
return (self.start_line, self.start_column, self.rule.rule_id)
def first_line(self):
"""
Returns the initial line of code of the violation.
"""
return self.lines[0]
def prepare_results(self, full_path, string_lines):
"""
Preps this instance for results reporting.
Arguments:
full_path: Path of the file in violation.
string_lines: A StringLines containing the contents of the file in
violation.
"""
self.full_path = full_path
start_index = self.expression.start_index
self.start_line = string_lines.index_to_line_number(start_index)
self.start_column = string_lines.index_to_column_number(start_index)
end_index = self.expression.end_index
if end_index is not None:
self.end_line = string_lines.index_to_line_number(end_index)
self.end_column = string_lines.index_to_column_number(end_index)
else:
self.end_line = self.start_line
self.end_column = '?'
for line_number in range(self.start_line, self.end_line + 1):
self.lines.append(string_lines.line_number_to_line(line_number))
self._mark_expression_disabled(string_lines)
def print_results(self, options, out):
"""
Prints the results represented by this rule violation.
Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
verbose: True for multiple lines of context, False single line.
out: output file
"""
if options['verbose']:
end_line = self.end_line + 1
else:
end_line = self.start_line + 1
for line_number in range(self.start_line, end_line):
if line_number == self.start_line:
column = self.start_column
rule_id = self.rule.rule_id + ":"
else:
column = 1
rule_id = " " * (len(self.rule.rule_id) + 1)
line = self.lines[line_number - self.start_line].encode(encoding='utf-8')
print("{}: {}:{}: {} {}".format(
self.full_path,
line_number,
column,
rule_id,
line
), file=out)
class SummaryResults(object):
"""
Contains the summary results for all violations.
"""
def __init__(self):
"""
Init method.
"""
self.total_violations = 0
self.totals_by_rule = dict.fromkeys(
[rule.rule_id for rule in Rules.__members__.values()], 0
)
def add_violation(self, violation):
"""
Adds a violation to the summary details.
Arguments:
violation: The violation to add to the summary.
"""
self.total_violations += 1
self.totals_by_rule[violation.rule.rule_id] += 1
def print_results(self, options, out):
"""
Prints the results (i.e. violations) in this file.
Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
rule_totals: If True include totals by rule.
out: output file
"""
if options['list_files'] is False:
if options['rule_totals']:
max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule)
print("", file=out)
for rule_id in sorted(self.totals_by_rule.keys()):
padding = " " * (max_rule_id_len - len(rule_id))
print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out)
print("", file=out)
# matches output of eslint for simplicity
print("", file=out)
print("{} violations total".format(self.total_violations), file=out)
class FileResults(object):
"""
Contains the results, or violations, for a file.
"""
def __init__(self, full_path):
"""
Init method.
Arguments:
full_path: The full path for this file.
"""
self.full_path = full_path
self.directory = os.path.dirname(full_path)
self.is_file = os.path.isfile(full_path)
self.violations = []
def prepare_results(self, file_string, line_comment_delim=None):
"""
Prepares the results for output for this file.
Arguments:
file_string: The string of content for this file.
line_comment_delim: A string representing the start of a line
comment. For example "##" for Mako and "//" for JavaScript.
"""
string_lines = StringLines(file_string)
for violation in self.violations:
violation.prepare_results(self.full_path, string_lines)
if line_comment_delim is not None:
self._filter_commented_code(line_comment_delim)
def print_results(self, options, summary_results, out):
"""
Prints the results (i.e. violations) in this file.
Arguments:
options: A list of the following options:
list_files: True to print only file names, and False to print
all violations.
summary_results: A SummaryResults with a summary of the violations.
verbose: True for multiple lines of context, False single line.
out: output file
Side effect:
Updates the passed SummaryResults.
"""
if options['list_files']:
if self.violations is not None and 0 < len(self.violations):
print(self.full_path, file=out)
else:
self.violations.sort(key=lambda violation: violation.sort_key())
for violation in self.violations:
if not violation.is_disabled:
violation.print_results(options, out)
summary_results.add_violation(violation)
def _filter_commented_code(self, line_comment_delim):
"""
Remove any violations that were found in commented out code.
Arguments:
line_comment_delim: A string representing the start of a line
comment. For example "##" for Mako and "//" for JavaScript.
"""
self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)]
def _is_commented(self, violation, line_comment_delim):
"""
Checks if violation line is commented out.
Arguments:
violation: The violation to check
line_comment_delim: A string representing the start of a line
comment. For example "##" for Mako and "//" for JavaScript.
Returns:
True if the first line of the violation is actually commented out,
False otherwise.
"""
if 'parse' in violation.rule.rule_id:
# For parse rules, don't filter them because the comment could be a
# part of the parse issue to begin with.
return False
else:
return violation.first_line().lstrip().startswith(line_comment_delim)

View File

@@ -0,0 +1,39 @@
from enum import Enum
class Rules(Enum):
"""
An Enum of each rule which the linter will check.
"""
# IMPORTANT: Do not edit without also updating the docs:
# - http://edx.readthedocs.org/projects/edx-developer-guide/en/latest/conventions/preventing_xss.html#xss-linter
mako_missing_default = 'mako-missing-default'
mako_multiple_page_tags = 'mako-multiple-page-tags'
mako_unparseable_expression = 'mako-unparseable-expression'
mako_unwanted_html_filter = 'mako-unwanted-html-filter'
mako_invalid_html_filter = 'mako-invalid-html-filter'
mako_invalid_js_filter = 'mako-invalid-js-filter'
mako_js_missing_quotes = 'mako-js-missing-quotes'
mako_js_html_string = 'mako-js-html-string'
mako_html_entities = 'mako-html-entities'
mako_unknown_context = 'mako-unknown-context'
underscore_not_escaped = 'underscore-not-escaped'
javascript_jquery_append = 'javascript-jquery-append'
javascript_jquery_prepend = 'javascript-jquery-prepend'
javascript_jquery_insertion = 'javascript-jquery-insertion'
javascript_jquery_insert_into_target = 'javascript-jquery-insert-into-target'
javascript_jquery_html = 'javascript-jquery-html'
javascript_concat_html = 'javascript-concat-html'
javascript_escape = 'javascript-escape'
javascript_interpolate = 'javascript-interpolate'
python_concat_html = 'python-concat-html'
python_custom_escape = 'python-custom-escape'
python_deprecated_display_name = 'python-deprecated-display-name'
python_requires_html_or_text = 'python-requires-html-or-text'
python_close_before_format = 'python-close-before-format'
python_wrap_html = 'python-wrap-html'
python_interpolate_html = 'python-interpolate-html'
python_parse_error = 'python-parse-error'
def __init__(self, rule_id):
self.rule_id = rule_id

View File

@@ -0,0 +1,366 @@
"""
Utility classes/functions for the XSS Linter.
"""
import re
SKIP_DIRS = (
'.git',
'.pycharm_helpers',
'common/static/xmodule/modules',
'common/static/bundles',
'perf_tests',
'node_modules',
'reports/diff_quality',
'scripts/xsslint',
'spec',
'test_root',
'vendor',
)
def is_skip_dir(skip_dirs, directory):
"""
Determines whether a directory should be skipped or linted.
Arguments:
skip_dirs: The configured directories to be skipped.
directory: The current directory to be tested.
Returns:
True if the directory should be skipped, and False otherwise.
"""
for skip_dir in skip_dirs:
skip_dir_regex = re.compile(
"(.*/)*{}(/.*)*".format(re.escape(skip_dir)))
if skip_dir_regex.match(directory) is not None:
return True
return False
class StringLines(object):
"""
StringLines provides utility methods to work with a string in terms of
lines. As an example, it can convert an index into a line number or column
number (i.e. index into the line).
"""
def __init__(self, string):
"""
Init method.
Arguments:
string: The string to work with.
"""
self._string = string
self._line_start_indexes = self._process_line_breaks(string)
# this is an exclusive index used in the case that the template doesn't
# end with a new line
self.eof_index = len(string)
def _process_line_breaks(self, string):
"""
Creates a list, where each entry represents the index into the string
where the next line break was found.
Arguments:
string: The string in which to find line breaks.
Returns:
A list of indices into the string at which each line begins.
"""
line_start_indexes = [0]
index = 0
while True:
index = string.find('\n', index)
if index < 0:
break
index += 1
line_start_indexes.append(index)
return line_start_indexes
def get_string(self):
"""
Get the original string.
"""
return self._string
def index_to_line_number(self, index):
"""
Given an index, determines the line of the index.
Arguments:
index: The index into the original string for which we want to know
the line number
Returns:
The line number of the provided index.
"""
current_line_number = 0
for line_break_index in self._line_start_indexes:
if line_break_index <= index:
current_line_number += 1
else:
break
return current_line_number
def index_to_column_number(self, index):
"""
Gets the column (i.e. index into the line) for the given index into the
original string.
Arguments:
index: The index into the original string.
Returns:
The column (i.e. index into the line) for the given index into the
original string.
"""
start_index = self.index_to_line_start_index(index)
column = index - start_index + 1
return column
def index_to_line_start_index(self, index):
"""
Gets the index of the start of the line of the given index.
Arguments:
index: The index into the original string.
Returns:
The index of the start of the line of the given index.
"""
line_number = self.index_to_line_number(index)
return self.line_number_to_start_index(line_number)
def index_to_line_end_index(self, index):
"""
Gets the index of the end of the line of the given index.
Arguments:
index: The index into the original string.
Returns:
The index of the end of the line of the given index.
"""
line_number = self.index_to_line_number(index)
return self.line_number_to_end_index(line_number)
def line_number_to_start_index(self, line_number):
"""
Gets the starting index for the provided line number.
Arguments:
line_number: The line number of the line for which we want to find
the start index.
Returns:
The starting index for the provided line number.
"""
return self._line_start_indexes[line_number - 1]
def line_number_to_end_index(self, line_number):
"""
Gets the ending index for the provided line number.
Arguments:
line_number: The line number of the line for which we want to find
the end index.
Returns:
The ending index for the provided line number.
"""
if line_number < len(self._line_start_indexes):
return self._line_start_indexes[line_number]
else:
# an exclusive index in the case that the file didn't end with a
# newline.
return self.eof_index
def line_number_to_line(self, line_number):
"""
Gets the line of text designated by the provided line number.
Arguments:
line_number: The line number of the line we want to find.
Returns:
The line of text designated by the provided line number.
"""
start_index = self._line_start_indexes[line_number - 1]
if len(self._line_start_indexes) == line_number:
line = self._string[start_index:]
else:
end_index = self._line_start_indexes[line_number]
line = self._string[start_index:end_index - 1]
return line
def line_count(self):
"""
Gets the number of lines in the string.
"""
return len(self._line_start_indexes)
class ParseString(object):
"""
ParseString is the result of parsing a string out of a template.
A ParseString has the following attributes:
start_index: The index of the first quote, or None if none found
end_index: The index following the closing quote, or None if
unparseable
quote_length: The length of the quote. Could be 3 for a Python
triple quote. Or None if none found.
string: the text of the parsed string, or None if none found.
string_inner: the text inside the quotes of the parsed string, or None
if none found.
"""
def __init__(self, template, start_index, end_index):
"""
Init method.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
"""
self.end_index = None
self.quote_length = None
self.string = None
self.string_inner = None
self.start_index = self._find_string_start(template, start_index, end_index)
if self.start_index is not None:
result = self._parse_string(template, self.start_index)
if result is not None:
self.end_index = result['end_index']
self.quote_length = result['quote_length']
self.string = result['string']
self.string_inner = result['string_inner']
def _find_string_start(self, template, start_index, end_index):
"""
Finds the index of the end of start of a string. In other words, the
first single or double quote.
Arguments:
template: The template to be searched.
start_index: The start index to search.
end_index: The end index to search before.
Returns:
The start index of the first single or double quote, or None if no
quote was found.
"""
quote_regex = re.compile(r"""['"]""")
start_match = quote_regex.search(template, start_index, end_index)
if start_match is None:
return None
else:
return start_match.start()
def _parse_string(self, template, start_index):
"""
Finds the indices of a string inside a template.
Arguments:
template: The template to be searched.
start_index: The start index of the open quote.
Returns:
A dict containing the following, or None if not parseable:
end_index: The index following the closing quote
quote_length: The length of the quote. Could be 3 for a Python
triple quote.
string: the text of the parsed string
string_inner: the text inside the quotes of the parsed string
"""
quote = template[start_index]
if quote not in ["'", '"']:
raise ValueError("start_index must refer to a single or double quote.")
triple_quote = quote * 3
if template.startswith(triple_quote, start_index):
quote = triple_quote
next_start_index = start_index + len(quote)
while True:
quote_end_index = template.find(quote, next_start_index)
backslash_index = template.find("\\", next_start_index)
if quote_end_index < 0:
return None
if 0 <= backslash_index < quote_end_index:
next_start_index = backslash_index + 2
else:
end_index = quote_end_index + len(quote)
quote_length = len(quote)
string = template[start_index:end_index]
return {
'end_index': end_index,
'quote_length': quote_length,
'string': string,
'string_inner': string[quote_length:-quote_length],
}
class Expression(object):
"""
Represents an arbitrary expression.
An expression can be any type of code snippet. It will sometimes have a
starting and ending delimiter, but not always.
Here are some example expressions::
${x | n, decode.utf8}
<%= x %>
function(x)
"<p>" + message + "</p>"
Other details of note:
- Only a start_index is required for a valid expression.
- If end_index is None, it means we couldn't parse the rest of the
expression.
- All other details of the expression are optional, and are only added if
and when supplied and needed for additional checks. They are not necessary
for the final results output.
"""
def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None):
"""
Init method.
Arguments:
start_index: the starting index of the expression
end_index: the index immediately following the expression, or None
if the expression was unparseable
template: optional template code in which the expression was found
start_delim: optional starting delimiter of the expression
end_delim: optional ending delimeter of the expression
strings: optional list of ParseStrings
"""
self.start_index = start_index
self.end_index = end_index
self.start_delim = start_delim
self.end_delim = end_delim
self.strings = strings
if template is not None and self.end_index is not None:
self.expression = template[start_index:end_index]
self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip()
else:
self.expression = None
self.expression_inner = None

View File

@@ -0,0 +1,331 @@
"""
Custom AST NodeVisitor classes uses for Python xss linting.
"""
import ast
import re
from xsslint.reporting import ExpressionRuleViolation
from xsslint.rules import Rules
from xsslint.utils import Expression, ParseString, StringLines
class BaseVisitor(ast.NodeVisitor):
"""
Base class for AST NodeVisitor used for Python xss linting.
Important: This base visitor skips all __repr__ function definitions.
"""
def __init__(self, file_contents, results):
"""
Init method.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(BaseVisitor, self).__init__()
self.file_contents = file_contents
self.lines = StringLines(self.file_contents)
self.results = results
def node_to_expression(self, node):
"""
Takes a node and translates it to an expression to be used with
violations.
Arguments:
node: An AST node.
"""
line_start_index = self.lines.line_number_to_start_index(node.lineno)
start_index = line_start_index + node.col_offset
if isinstance(node, ast.Str):
# Triple quotes give col_offset of -1 on the last line of the string.
if node.col_offset == -1:
triple_quote_regex = re.compile("""['"]{3}""")
end_triple_quote_match = triple_quote_regex.search(self.file_contents, line_start_index)
open_quote_index = self.file_contents.rfind(end_triple_quote_match.group(), 0, end_triple_quote_match.start())
if open_quote_index > 0:
start_index = open_quote_index
else:
# If we can't find a starting quote, let's assume that what
# we considered the end quote is really the start quote.
start_index = end_triple_quote_match.start()
string = ParseString(self.file_contents, start_index, len(self.file_contents))
return Expression(string.start_index, string.end_index)
else:
return Expression(start_index)
def visit_FunctionDef(self, node):
"""
Skips processing of __repr__ functions, since these sometimes use '<'
for non-HTML purposes.
Arguments:
node: An AST node.
"""
if node.name != '__repr__':
self.generic_visit(node)
class HtmlStringVisitor(BaseVisitor):
"""
Checks for strings that contain HTML. Assumes any string with < or > is
considered potential HTML.
To be used only with strings in context of format or concat.
"""
def __init__(self, file_contents, results, skip_wrapped_html=False):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
skip_wrapped_html: True if visitor should skip strings wrapped with
HTML() or Text(), and False otherwise.
"""
super(HtmlStringVisitor, self).__init__(file_contents, results)
self.skip_wrapped_html = skip_wrapped_html
self.unsafe_html_string_nodes = []
self.over_escaped_entity_string_nodes = []
self.has_text_or_html_call = False
def visit_Str(self, node):
"""
When strings are visited, checks if it contains HTML.
Arguments:
node: An AST node.
"""
# Skips '<' (and '>') in regex named groups. For example, "(?P<group>)".
if re.search('[(][?]P<', node.s) is None and re.search('<', node.s) is not None:
self.unsafe_html_string_nodes.append(node)
if re.search(r"&[#]?[a-zA-Z0-9]+;", node.s):
self.over_escaped_entity_string_nodes.append(node)
def visit_Call(self, node):
"""
Skips processing of string contained inside HTML() and Text() calls when
skip_wrapped_html is True.
Arguments:
node: An AST node.
"""
is_html_or_text_call = isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']
if self.skip_wrapped_html and is_html_or_text_call:
self.has_text_or_html_call = True
else:
self.generic_visit(node)
class ContainsFormatVisitor(BaseVisitor):
"""
Checks if there are any nested format() calls.
This visitor is meant to be called on HTML() and Text() ast.Call nodes to
search for any illegal nested format() calls.
"""
def __init__(self, file_contents, results):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(ContainsFormatVisitor, self).__init__(file_contents, results)
self.contains_format_call = False
def visit_Attribute(self, node):
"""
Simple check for format calls (attribute).
Arguments:
node: An AST node.
"""
# Attribute(expr value, identifier attr, expr_context ctx)
if node.attr == 'format':
self.contains_format_call = True
else:
self.generic_visit(node)
class FormatInterpolateVisitor(BaseVisitor):
"""
Checks if format() interpolates any HTML() or Text() calls. In other words,
are Text() or HTML() calls nested inside the call to format().
This visitor is meant to be called on a format() attribute node.
"""
def __init__(self, file_contents, results):
"""
Init function.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
super(FormatInterpolateVisitor, self).__init__(file_contents, results)
self.interpolates_text_or_html = False
self.format_caller_node = None
def visit_Call(self, node):
"""
Checks all calls. Remembers the caller of the initial format() call, or
in other words, the left-hand side of the call. Also tracks if HTML()
or Text() calls were seen.
Arguments:
node: The AST root node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr is 'format':
if self.format_caller_node is None:
# Store the caller, or left-hand-side node of the initial
# format() call.
self.format_caller_node = node.func.value
elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
# found Text() or HTML() call in arguments passed to format()
self.interpolates_text_or_html = True
self.generic_visit(node)
def generic_visit(self, node):
"""
Determines whether or not to continue to visit nodes according to the
following rules:
- Once a Text() or HTML() call has been found, stop visiting more nodes.
- Skip the caller of the outer-most format() call, or in other words,
the left-hand side of the call.
Arguments:
node: The AST root node.
"""
if self.interpolates_text_or_html is False:
if self.format_caller_node is not node:
super(FormatInterpolateVisitor, self).generic_visit(node)
class OuterFormatVisitor(BaseVisitor):
"""
Only visits outer most Python format() calls. These checks are not repeated
for any nested format() calls.
This visitor is meant to be used once from the root.
"""
def visit_Call(self, node):
"""
Checks that format() calls which contain HTML() or Text() use HTML() or
Text() as the caller. In other words, Text() or HTML() must be used
before format() for any arguments to format() that contain HTML() or
Text().
Arguments:
node: An AST node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
visitor = HtmlStringVisitor(self.file_contents, self.results, True)
visitor.visit(node)
for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, self.node_to_expression(unsafe_html_string_node)
))
# Do not continue processing child nodes of this format() node.
else:
self.generic_visit(node)
class AllNodeVisitor(BaseVisitor):
"""
Visits all nodes and does not interfere with calls to generic_visit(). This
is used in conjunction with other visitors to check for a variety of
violations.
This visitor is meant to be used once from the root.
"""
def visit_Attribute(self, node):
"""
Checks for uses of deprecated `display_name_with_default_escaped`.
Arguments:
node: An AST node.
"""
if node.attr == 'display_name_with_default_escaped':
self.results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, self.node_to_expression(node)
))
self.generic_visit(node)
def visit_Call(self, node):
"""
Checks for a variety of violations:
- Checks that format() calls with nested HTML() or Text() calls use
HTML() or Text() on the left-hand side.
- For each HTML() and Text() call, calls into separate visitor to check
for inner format() calls.
Arguments:
node: An AST node.
"""
if isinstance(node.func, ast.Attribute) and node.func.attr == 'format':
visitor = FormatInterpolateVisitor(self.file_contents, self.results)
visitor.visit(node)
if visitor.interpolates_text_or_html:
format_caller = node.func.value
is_caller_html_or_text = isinstance(format_caller, ast.Call) and \
isinstance(format_caller.func, ast.Name) and \
format_caller.func.id in ['Text', 'HTML']
# If format call has nested Text() or HTML(), then the caller,
# or left-hand-side of the format() call, must be a call to
# Text() or HTML().
if is_caller_html_or_text is False:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, self.node_to_expression(node.func)
))
elif isinstance(node.func, ast.Name) and node.func.id in ['HTML', 'Text']:
visitor = ContainsFormatVisitor(self.file_contents, self.results)
visitor.visit(node)
if visitor.contains_format_call:
self.results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, self.node_to_expression(node.func)
))
self.generic_visit(node)
def visit_BinOp(self, node):
"""
Checks for concat using '+' and interpolation using '%' with strings
containing HTML.
"""
rule = None
if isinstance(node.op, ast.Mod):
rule = Rules.python_interpolate_html
elif isinstance(node.op, ast.Add):
rule = Rules.python_concat_html
if rule is not None:
visitor = HtmlStringVisitor(self.file_contents, self.results)
visitor.visit(node.left)
has_illegal_html_string = len(visitor.unsafe_html_string_nodes) > 0
# Create new visitor to clear state.
visitor = HtmlStringVisitor(self.file_contents, self.results)
visitor.visit(node.right)
has_illegal_html_string = has_illegal_html_string or len(visitor.unsafe_html_string_nodes) > 0
if has_illegal_html_string:
self.results.violations.append(ExpressionRuleViolation(
rule, self.node_to_expression(node)
))
self.generic_visit(node)