From eb42cb1f387967eb40eefdba50890f1f71de9788 Mon Sep 17 00:00:00 2001 From: Robert Raposa Date: Fri, 11 Dec 2015 10:06:04 -0500 Subject: [PATCH] Add safe template linter - Initial lint of Mako templates - Initial lint of Underscore.js templates --- scripts/__init__.py | 0 scripts/safe_template_checker.py | 498 ++++++++++++++++++++ scripts/tests/__init__.py | 0 scripts/tests/test_safe_template_checker.py | 313 ++++++++++++ 4 files changed, 811 insertions(+) create mode 100644 scripts/__init__.py create mode 100755 scripts/safe_template_checker.py create mode 100644 scripts/tests/__init__.py create mode 100644 scripts/tests/test_safe_template_checker.py diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/safe_template_checker.py b/scripts/safe_template_checker.py new file mode 100755 index 0000000000..8fc5363caa --- /dev/null +++ b/scripts/safe_template_checker.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python +""" +a tool to check if templates are safe +""" +from enum import Enum +import os +import re +import sys + +_skip_dirs = ( + '/node_modules', + '/vendor', + '/spec', + '/.pycharm_helpers', + '/test_root', + '/reports/diff_quality', + '/common/static/xmodule/modules', +) +_skip_mako_dirs = _skip_dirs +_skip_underscore_dirs = _skip_dirs + ('/test',) + + +def _is_skip_dir(skip_dirs, directory): + for skip_dir in skip_dirs: + if (directory.find(skip_dir + '/') >= 0) or directory.endswith(skip_dir): + return True + return False + + +def _load_file(self, file_full_path): + input_file = open(file_full_path, 'r') + try: + file_contents = input_file.read() + finally: + input_file.close() + + if not file_contents: + return False + return file_contents.decode(encoding='utf-8') + + +def _get_line_breaks(self, file_string): + line_breaks = [0] + index = 0 + while True: + index = file_string.find('\n', index) + if index < 0: + break + index += 1 + line_breaks.append(index) + return line_breaks + + +def _get_line_number(self, line_breaks, index): + current_line_number = 0 + for line_break_index in line_breaks: + if line_break_index <= index: + current_line_number += 1 + else: + break + return current_line_number + + +def _get_line(self, file_string, line_breaks, line_number): + start_index = line_breaks[line_number - 1] + if len(line_breaks) == line_number: + line = file_string[start_index:] + else: + end_index = line_breaks[line_number] + line = file_string[start_index:end_index - 1] + return line.encode(encoding='utf-8') + + +def _get_column_number(self, line_breaks, line_number, index): + start_index = line_breaks[line_number - 1] + column = index - start_index + 1 + return column + + +class Rules(Enum): + mako_missing_default = ('mako-missing-default', 'The default page directive with h filter is missing.') + mako_unparsable_expression = ('mako-unparsable-expression', 'The expression could not be properly parsed.') + mako_unwanted_html_filter = ('mako-unwanted-html-filter', 'Remove explicit h filters when it is provided by the page directive.') + mako_invalid_html_filter = ('mako-invalid-html-filter', 'The expression is using an invalid filter in an HTML context.') + mako_invalid_js_filter = ('mako-invalid-js-filter', 'The expression is using an invalid filter in a JavaScript context.') + mako_js_string_missing_quotes = ('mako-js-string-missing-quotes', 'An expression using the js_escape_string filter must have surrounding quotes.') + + underscore_not_escaped = ('underscore-not-escaped', 'Expressions should be escaped using <%- expression %>.') + + def __init__(self, rule_id, rule_summary): + self.rule_id = rule_id + self.rule_summary = rule_summary + + +class BrokenRule(object): + + def __init__(self, rule): + self.rule = rule + self.full_path = '' + + def prepare_results(self, full_path, file_string, line_breaks): + self.full_path = full_path + + def print_results(self, options): + print "{}: {}".format(self.full_path, self.rule.rule_id) + + +class BrokenExpressionRule(BrokenRule): + + def __init__(self, rule, expression): + super(BrokenExpressionRule, self).__init__(rule) + self.expression = expression + self.start_line = 0 + self.start_column = 0 + self.end_line = 0 + self.end_column = 0 + self.lines = [] + + def prepare_results(self, full_path, file_string, line_breaks): + self.full_path = full_path + start_index = self.expression['start_index'] + self.start_line = _get_line_number(self, line_breaks, start_index) + self.start_column = _get_column_number(self, line_breaks, self.start_line, start_index) + end_index = self.expression['end_index'] + if end_index > 0: + self.end_line = _get_line_number(self, line_breaks, end_index) + self.end_column = _get_column_number(self, line_breaks, self.end_line, end_index) + else: + self.end_line = self.start_line + self.end_column = '?' + for line_number in range(self.start_line, self.end_line + 1): + self.lines.append(_get_line(self, file_string, line_breaks, line_number)) + + def print_results(self, options): + for line_number in range(self.start_line, self.end_line + 1): + if (line_number == self.start_line): + column = self.start_column + rule_id = self.rule.rule_id + ":" + else: + column = 1 + rule_id = " " * (len(self.rule.rule_id) + 1) + print "{}: {}:{}: {} {}".format( + self.full_path, + line_number, + column, + rule_id, + self.lines[line_number - self.start_line - 1] + ) + + +class FileResults(object): + + def __init__(self, full_path): + self.full_path = full_path + self.errors = [] + + def prepare_results(self, file_string): + line_breaks = _get_line_breaks(self, file_string) + for error in self.errors: + error.prepare_results(self.full_path, file_string, line_breaks) + + def print_results(self, options): + if options['is_quiet']: + print self.full_path + else: + for error in self.errors: + error.print_results(options) + + +class MakoTemplateChecker(object): + + _skip_mako_dirs = _skip_dirs + + _results = [] + + def process_file(self, directory, file_name): + """ + Process file to determine if it is a Mako template file and + if it is safe. + + Arguments: + directory (string): The directory of the file to be checked + file_name (string): A filename for a potential Mako file + + Side effects: + Adds detailed results to internal data structure for + later reporting + + """ + if not self._is_mako_directory(directory): + return + + # TODO: When safe-by-default is turned on at the platform level, will we: + # 1. Turn it on for .html only, or + # 2. Turn it on for all files, and have different rulesets that have + # different rules of .xml, .html, .js, .txt Mako templates (e.g. use + # the n filter to turn off h for some of these)? + # For now, we only check .html and .xml files + if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')): + return + + self._load_and_check_mako_file_is_safe(directory + '/' + file_name) + + def print_results(self, options): + for result in self._results: + result.print_results(options) + + def _is_mako_directory(self, directory): + if _is_skip_dir(self._skip_mako_dirs, directory): + return False + + if (directory.find('/templates/') >= 0) or directory.endswith('/templates'): + return True + + return False + + def _load_and_check_mako_file_is_safe(self, mako_file_full_path): + mako_template = _load_file(self, mako_file_full_path) + results = FileResults(mako_file_full_path) + self._check_mako_file_is_safe(mako_template, results) + if len(results.errors) > 0: + self._results.append(results) + + def _check_mako_file_is_safe(self, mako_template, results): + has_page_default = self._has_page_default(mako_template, results) + if not has_page_default: + results.errors.append(BrokenRule(Rules.mako_missing_default)) + self._check_mako_expressions(mako_template, has_page_default, results) + results.prepare_results(mako_template) + + def _has_page_default(self, mako_template, results): + page_h_filter_regex = re.compile('<%page expression_filter=(?:"h"|\'h\')\s*/>') + page_match = page_h_filter_regex.search(mako_template) + return page_match + + def _check_mako_expressions(self, mako_template, has_page_default, results): + expressions = self._find_mako_expressions(mako_template) + contexts = self._get_contexts(mako_template) + for expression in expressions: + if expression['expression'] is None: + results.errors.append(BrokenExpressionRule( + Rules.mako_unparsable_expression, expression + )) + continue + + context = self._get_context(contexts, expression['start_index']) + self._check_filters(mako_template, expression, context, has_page_default, results) + + def _check_filters(self, mako_template, expression, context, has_page_default, results): + # finds "| n, h}" when given "${x | n, h}" + filters_regex = re.compile('\|[a-zA-Z_,\s]*\}') + filters_match = filters_regex.search(expression['expression']) + if filters_match is None: + if context == 'javascript': + results.errors.append(BrokenExpressionRule( + Rules.mako_invalid_js_filter, expression + )) + return + + filters = filters_match.group()[1:-1].replace(" ", "").split(",") + if context == 'html': + if (len(filters) == 1) and (filters[0] == 'h'): + if has_page_default: + # suppress this error if the page default hasn't been set, + # otherwise the template might get less safe + results.errors.append(BrokenExpressionRule( + Rules.mako_unwanted_html_filter, expression + )) + elif (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'dump_html_escaped_json'): + # {x | n, dump_html_escaped_json} is valid + pass + else: + results.errors.append(BrokenExpressionRule( + Rules.mako_invalid_html_filter, expression + )) + + else: + if (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'dump_js_escaped_json'): + # {x | n, dump_js_escaped_json} is valid + pass + elif (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'js_escaped_string'): + # {x | n, js_escaped_string} is valid, if surrounded by quotes + prior_character = mako_template[expression['start_index'] - 1] + next_character = mako_template[expression['end_index'] + 1] + has_surrounding_quotes = (prior_character == '\'' and next_character == '\'') or \ + (prior_character == '"' and next_character == '"') + if not has_surrounding_quotes: + results.errors.append(BrokenExpressionRule( + Rules.mako_js_string_missing_quotes, expression + )) + else: + results.errors.append(BrokenExpressionRule( + Rules.mako_invalid_js_filter, expression + )) + + def _get_contexts(self, mako_template): + """ + Returns a data structure that represents the indices at which the + template changes from HTML context to JavaScript and back. + + Return: + A list of dicts where each dict contains the 'index' of the context + and the context 'type' (e.g. 'html' or 'javascript'). + """ + contexts_re = re.compile(r""" + | # script tag start + | # script tag end + <%static:require_module.*?>| # require js script tag start + # require js script tag end""", re.VERBOSE + re.IGNORECASE) + media_type_re = re.compile(r"""type=['"].*?['"]""", re.IGNORECASE) + + contexts = [{'index': 0, 'type': 'html'}] + for context in contexts_re.finditer(mako_template): + match_string = context.group().lower() + if match_string.startswith("= 0) and (open_curly_index < end_curly_index): + if mako_template[open_curly_index - 1] == '$': + # assume if we find "${" it is the start of the next expression + # and we have a parse error + return -1 + else: + return self._find_balanced_end_curly(mako_template, open_curly_index + 1, num_open_curlies + 1) + + if num_open_curlies == 0: + return end_curly_index + else: + return self._find_balanced_end_curly(mako_template, end_curly_index + 1, num_open_curlies - 1) + + +class UnderscoreTemplateChecker(object): + + _skip_underscore_dirs = _skip_dirs + + _results = [] + + def process_file(self, directory, file_name): + """ + Process file to determine if it is an Underscore template file and + if it is safe. + + Arguments: + directory (string): The directory of the file to be checked + file_name (string): A filename for a potential underscore file + + Side effects: + Adds detailed results to internal data structure for + later reporting + + """ + if not self._is_underscore_directory(directory): + return + + if not file_name.lower().endswith('.underscore'): + return + + self._load_and_check_underscore_file_is_safe(directory + '/' + file_name) + + def print_results(self, options): + for result in self._results: + result.print_results(options) + + def _is_underscore_directory(self, directory): + if _is_skip_dir(self._skip_underscore_dirs, directory): + return False + + return True + + def _load_and_check_underscore_file_is_safe(self, file_full_path): + underscore_template = _load_file(self, file_full_path) + results = FileResults(file_full_path) + self._check_underscore_file_is_safe(underscore_template, results) + if len(results.errors) > 0: + self._results.append(results) + + def _check_underscore_file_is_safe(self, underscore_template, results): + self._check_underscore_expressions(underscore_template, results) + results.prepare_results(underscore_template) + + def _check_underscore_expressions(self, underscore_template, results): + expressions = self._find_unescaped_expressions(underscore_template) + for expression in expressions: + results.errors.append(BrokenExpressionRule( + Rules.underscore_not_escaped, expression + )) + + def _find_unescaped_expressions(self, underscore_template): + unescaped_expression_regex = re.compile("<%=.*?%>") + + expressions = [] + for match in unescaped_expression_regex.finditer(underscore_template): + expression = { + 'start_index': match.start(), + 'end_index': match.end(), + 'expression': match.group(), + } + expressions.append(expression) + + return expressions + + +def _process_current_walk(current_walk, template_checkers): + walk_directory = current_walk[0] + walk_files = current_walk[2] + for walk_file in walk_files: + for template_checker in template_checkers: + template_checker.process_file(walk_directory, walk_file) + + +def _process_os_walk(starting_dir, template_checkers): + for current_walk in os.walk(starting_dir): + _process_current_walk(current_walk, template_checkers) + + +def main(): + #TODO: Use click + is_quiet = '--quiet' in sys.argv + + options = { + 'is_quiet': is_quiet, + } + + template_checkers = [MakoTemplateChecker(), UnderscoreTemplateChecker()] + _process_os_walk('.', template_checkers) + + for template_checker in template_checkers: + template_checker.print_results(options) + + +if __name__ == "__main__": + main() diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/tests/test_safe_template_checker.py b/scripts/tests/test_safe_template_checker.py new file mode 100644 index 0000000000..b399d2ea2d --- /dev/null +++ b/scripts/tests/test_safe_template_checker.py @@ -0,0 +1,313 @@ +""" +Tests for safe_template_checker.py +""" +from ddt import ddt, data +import textwrap +from unittest import TestCase + +from ..safe_template_checker import ( + FileResults, MakoTemplateChecker, Rules +) + +@ddt +class TestMakoTemplateChecker(TestCase): + """ + Test MakoTemplateChecker + """ + + @data( + {'directory': 'lms/templates', 'expected': True}, + {'directory': 'lms/templates/support', 'expected': True}, + {'directory': 'lms/templates/support', 'expected': True}, + {'directory': './test_root/staticfiles/templates', 'expected': False}, + {'directory': './some/random/path', 'expected': False}, + ) + def test_is_mako_directory(self, data): + """ + Test _is_mako_directory correctly determines mako directories + """ + checker = MakoTemplateChecker() + + self.assertEqual(checker._is_mako_directory(data['directory']), data['expected']) + + def test_check_page_default_with_default_provided(self): + """ + Test _check_mako_file_is_safe with default causes no error + """ + checker = MakoTemplateChecker() + results = FileResults('') + mako_template = """ + <%page expression_filter="h"/> + """ + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 0) + + def test_check_page_default_with_no_default_provided(self): + """ + Test _check_mako_file_is_safe with no default causes error + """ + checker = MakoTemplateChecker() + results = FileResults('') + mako_template = "" + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 1) + self.assertEqual(results.errors[0].rule, Rules.mako_missing_default) + + def test_check_mako_expressions_in_html(self): + """ + Test _check_mako_file_is_safe in html context provides appropriate errors + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + ${x} + ${'{{unbalanced-nested'} + ${x | n} + ${x | h} + ${x | n, dump_html_escaped_json} + ${x | n, dump_js_escaped_json} + """) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 4) + self.assertEqual(results.errors[0].rule, Rules.mako_unparsable_expression) + start_index = results.errors[0].expression['start_index'] + self.assertEqual(mako_template[start_index:start_index + 24], "${'{{unbalanced-nested'}") + self.assertEqual(results.errors[1].rule, Rules.mako_invalid_html_filter) + self.assertEqual(results.errors[1].expression['expression'], "${x | n}") + self.assertEqual(results.errors[2].rule, Rules.mako_unwanted_html_filter) + self.assertEqual(results.errors[2].expression['expression'], "${x | h}") + self.assertEqual(results.errors[3].rule, Rules.mako_invalid_html_filter) + self.assertEqual(results.errors[3].expression['expression'], "${x | n, dump_js_escaped_json}") + + def test_check_mako_expressions_in_html_without_default(self): + """ + Test _check_mako_file_is_safe in html context without the page level + default h filter suppresses expression level error + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + ${x | h} + """) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 1) + self.assertEqual(results.errors[0].rule, Rules.mako_missing_default) + + def test_check_mako_expressions_in_javascript(self): + """ + Test _check_mako_file_is_safe in JavaScript script context provides + appropriate errors + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + + """) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 6) + self.assertEqual(results.errors[0].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[0].expression['expression'], "${x}") + self.assertEqual(results.errors[1].rule, Rules.mako_unparsable_expression) + start_index = results.errors[1].expression['start_index'] + self.assertEqual(mako_template[start_index:start_index + 24], "${'{{unbalanced-nested'}") + self.assertEqual(results.errors[2].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[2].expression['expression'], "${x | n}") + self.assertEqual(results.errors[3].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[3].expression['expression'], "${x | h}") + self.assertEqual(results.errors[4].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[4].expression['expression'], "${x | n, dump_html_escaped_json}") + self.assertEqual(results.errors[5].rule, Rules.mako_js_string_missing_quotes) + self.assertEqual(results.errors[5].expression['expression'], "${x-missing-quotes | n, js_escaped_string}") + + def test_check_mako_expressions_in_require_js(self): + """ + Test _check_mako_file_is_safe in JavaScript require context provides + appropriate errors + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + <%static:require_module module_name="${x}" class_name="TestFactory"> + ${x} + ${x | n, js_escaped_string} + + """) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 2) + self.assertEqual(results.errors[0].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[0].expression['expression'], "${x}") + self.assertEqual(results.errors[1].rule, Rules.mako_js_string_missing_quotes) + self.assertEqual(results.errors[1].expression['expression'], "${x | n, js_escaped_string}") + + @data( + {'media_type': 'text/javascript', 'expected_errors': 0}, + {'media_type': 'text/ecmascript', 'expected_errors': 0}, + {'media_type': 'application/ecmascript', 'expected_errors': 0}, + {'media_type': 'application/javascript', 'expected_errors': 0}, + {'media_type': 'text/template', 'expected_errors': 1}, + {'media_type': 'unknown/type', 'expected_errors': 1}, + ) + def test_check_mako_expressions_in_script_type(self, data): + """ + Test _check_mako_file_is_safe in script tag with different media types + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + + """).format(data['media_type']) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), data['expected_errors']) + + def test_check_mako_expressions_in_mixed_contexts(self): + """ + Test _check_mako_file_is_safe in mixed contexts provides + appropriate errors + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + ${x | h} + + ${x | h} + <%static:require_module module_name="${x}" class_name="TestFactory"> + ${x | h} + + ${x | h} + """) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 5) + self.assertEqual(results.errors[0].rule, Rules.mako_unwanted_html_filter) + self.assertEqual(results.errors[1].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[2].rule, Rules.mako_unwanted_html_filter) + self.assertEqual(results.errors[3].rule, Rules.mako_invalid_js_filter) + self.assertEqual(results.errors[4].rule, Rules.mako_unwanted_html_filter) + + def test_expression_detailed_results(self): + """ + Test _check_mako_file_is_safe provides detailed results, including line + numbers, columns, and line + """ + checker = MakoTemplateChecker() + results = FileResults('') + + mako_template = textwrap.dedent(""" + ${x | n} +
${( + 'tabbed-multi-line-expression' + ) | n}
+ ${'{{unbalanced-nested' | n} + """) + + checker._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.errors), 4) + self.assertEqual(results.errors[0].rule, Rules.mako_missing_default) + + self.assertEqual(results.errors[1].start_line, 2) + self.assertEqual(results.errors[1].start_column, 1) + self.assertEqual(results.errors[1].end_line, 2) + self.assertEqual(results.errors[1].end_column, 8) + self.assertEqual(len(results.errors[1].lines), 1) + self.assertEqual(results.errors[1].lines[0], "${x | n}") + + self.assertEqual(results.errors[2].start_line, 3) + self.assertEqual(results.errors[2].start_column, 10) + self.assertEqual(results.errors[2].end_line, 5) + self.assertEqual(results.errors[2].end_column, 10) + self.assertEqual(len(results.errors[2].lines), 3) + self.assertEqual(results.errors[2].lines[0], "
${(") + self.assertEqual(results.errors[2].lines[1], + " 'tabbed-multi-line-expression'" + ) + self.assertEqual(results.errors[2].lines[2], " ) | n}
") + + self.assertEqual(results.errors[3].start_line, 6) + self.assertEqual(results.errors[3].start_column, 1) + self.assertEqual(results.errors[3].end_line, 6) + self.assertEqual(results.errors[3].end_column, "?") + self.assertEqual(len(results.errors[3].lines), 1) + self.assertEqual(results.errors[3].lines[0], + "${'{{unbalanced-nested' | n}" + ) + + + def test_find_mako_expressions(self): + """ + Test _find_mako_expressions finds appropriate expressions + """ + checker = MakoTemplateChecker() + + mako_template = textwrap.dedent(""" + ${x} + ${tabbed-x} + ${( + 'tabbed-multi-line-expression' + )} + ${'{{unbalanced-nested'} + ${'{{nested}}'} +
no expression
+ """) + + expressions = checker._find_mako_expressions(mako_template) + + self.assertEqual(len(expressions), 5) + self._validate_expression(mako_template, expressions[0], '${x}') + self._validate_expression(mako_template, expressions[1], '${tabbed-x}') + self._validate_expression(mako_template, expressions[2], "${(\n 'tabbed-multi-line-expression'\n )}") + + # won't parse unbalanced nested {}'s + unbalanced_expression = "${'{{unbalanced-nested'}" + self.assertEqual(expressions[3]['end_index'], -1) + start_index = expressions[3]['start_index'] + self.assertEqual(mako_template[start_index:start_index + len(unbalanced_expression)], unbalanced_expression) + self.assertEqual(expressions[3]['expression'], None) + + self._validate_expression(mako_template, expressions[4], "${'{{nested}}'}") + + def _validate_expression(self, template_string, expression, expected_expression): + start_index = expression['start_index'] + end_index = expression['end_index'] + self.assertEqual(template_string[start_index:end_index + 1], expected_expression) + self.assertEqual(expression['expression'], expected_expression)