From 916723fcaa5b5cde645c862c4eb836aef7f4d3ed Mon Sep 17 00:00:00 2001 From: Robert Raposa Date: Tue, 29 Mar 2016 10:43:39 -0400 Subject: [PATCH] Enhance safe template linter - Check includes for Mako templates - Check display_name_with_default_escaped uses - Add exceptions for Underscore and <%= - Skip templates that are Django and not Mako - Add pragma to disable errors - Enhance unit tests - Remove violation mako-js-string-missing-quotes - Refactor line processing into StringLines --- scripts/safe_template_linter.py | 679 +++++++++++++----- .../templates/includes/include_safe.html | 2 + .../templates/includes/include_unsafe.html | 1 + scripts/tests/templates/test.html | 8 + scripts/tests/test_safe_template_linter.py | 270 ++++++- 5 files changed, 761 insertions(+), 199 deletions(-) create mode 100644 scripts/tests/templates/includes/include_safe.html create mode 100644 scripts/tests/templates/includes/include_unsafe.html create mode 100644 scripts/tests/templates/test.html diff --git a/scripts/safe_template_linter.py b/scripts/safe_template_linter.py index d02651d32b..8dddfe30f1 100755 --- a/scripts/safe_template_linter.py +++ b/scripts/safe_template_linter.py @@ -2,22 +2,22 @@ """ A linting tool to check if templates are safe """ +from __future__ import print_function from enum import Enum import os import re import sys _skip_dirs = ( - '/node_modules', - '/vendor', - '/spec', - '/.pycharm_helpers', - '/test_root', - '/reports/diff_quality', - '/common/static/xmodule/modules', + '.pycharm_helpers', + 'common/static/xmodule/modules', + 'node_modules', + 'reports/diff_quality', + 'spec', + 'scripts/tests/templates', + 'test_root', + 'vendor', ) -_skip_mako_dirs = _skip_dirs -_skip_underscore_dirs = _skip_dirs + ('/test',) def _is_skip_dir(skip_dirs, directory): @@ -33,8 +33,8 @@ def _is_skip_dir(skip_dirs, directory): """ for skip_dir in skip_dirs: - dir_contains_skip_dir = (directory.find(skip_dir + '/') >= 0) - if dir_contains_skip_dir or directory.endswith(skip_dir): + dir_contains_skip_dir = '/' + skip_dir + '/' in directory + if dir_contains_skip_dir or directory.startswith(skip_dir) or directory.endswith(skip_dir): return True return False @@ -48,117 +48,192 @@ def _load_file(self, file_full_path): Returns: A string containing the files contents. + """ with open(file_full_path, 'r') as input_file: file_contents = input_file.read() return file_contents.decode(encoding='utf-8') -def _get_line_breaks(self, string): +class StringLines(object): """ - Creates a list, where each entry represents the index into the string where - the next line break was found. - - Arguments: - string: The string in which to find line breaks. - - Returns: - A list of indices into the string at which each line break can be - found. - + StringLines provides utility methods to work with a string in terms of + lines. As an example, it can convert an index into a line number or column + number (i.e. index into the line). """ - line_breaks = [0] - index = 0 - while True: - index = string.find('\n', index) - if index < 0: - break - index += 1 - line_breaks.append(index) - return line_breaks + def __init__(self, string): + """ + Init method. -def _get_line_number(self, line_breaks, index): - """ - Given the list of line break indices, and an index, determines the line of - the index. + Arguments: + string: The string to work with. - Arguments: - line_breaks: A list of indices into a string at which each line break - was found. - index: The index into the original string for which we want to know the - line number + """ + self._string = string + self._line_breaks = self._process_line_breaks(string) - Returns: - The line number of the provided index. + def _process_line_breaks(self, string): + """ + Creates a list, where each entry represents the index into the string + where the next line break was found. - """ - current_line_number = 0 - for line_break_index in line_breaks: - if line_break_index <= index: - current_line_number += 1 - else: - break - return current_line_number + Arguments: + string: The string in which to find line breaks. + Returns: + A list of indices into the string at which each line break can be + found. -def _get_line(self, string, line_breaks, line_number): - """ - Gets the line of text designated by the provided line number. + """ + line_breaks = [0] + index = 0 + while True: + index = string.find('\n', index) + if index < 0: + break + index += 1 + line_breaks.append(index) + return line_breaks - Arguments: - string: The string of content with line breaks. - line_breaks: A list of indices into a string at which each line break - was found. - line_number: The line number of the line we want to find. + def get_string(self): + """ + Get the original string. + """ + return self._string - Returns: - The line of text designated by the provided line number. + def index_to_line_number(self, index): + """ + Given an index, determines the line of the index. - """ - start_index = line_breaks[line_number - 1] - if len(line_breaks) == line_number: - line = string[start_index:] - else: - end_index = line_breaks[line_number] - line = string[start_index:end_index - 1] - return line.encode(encoding='utf-8') + Arguments: + index: The index into the original string for which we want to know + the line number + Returns: + The line number of the provided index. -def _get_column_number(self, line_breaks, line_number, index): - """ - Gets the column (i.e. index into the line) for the given index into the - original string. + """ + current_line_number = 0 + for line_break_index in self._line_breaks: + if line_break_index <= index: + current_line_number += 1 + else: + break + return current_line_number - Arguments: - line_breaks: A list of indices into a string at which each line break - was found. - line_number: The line number of the line we want to find. - index: The index into the original string. - - Returns: - The column (i.e. index into the line) for the given index into the + def index_to_column_number(self, index): + """ + Gets the column (i.e. index into the line) for the given index into the original string. - """ - start_index = line_breaks[line_number - 1] - column = index - start_index + 1 - return column + Arguments: + index: The index into the original string. + + Returns: + The column (i.e. index into the line) for the given index into the + original string. + + """ + start_index = self.index_to_line_start_index(index) + column = index - start_index + 1 + return column + + def index_to_line_start_index(self, index): + """ + Gets the index of the start of the line of the given index. + + Arguments: + index: The index into the original string. + + Returns: + The index of the start of the line of the given index. + + """ + line_number = self.index_to_line_number(index) + return self.line_number_to_start_index(line_number) + + def line_number_to_start_index(self, line_number): + """ + Gets the starting index for the provided line number. + + Arguments: + line_number: The line number of the line for which we want to find + the start index. + + Returns: + The starting index for the provided line number. + + """ + return self._line_breaks[line_number - 1] + + def line_number_to_line(self, line_number): + """ + Gets the line of text designated by the provided line number. + + Arguments: + line_number: The line number of the line we want to find. + + Returns: + The line of text designated by the provided line number. + + """ + start_index = self._line_breaks[line_number - 1] + if len(self._line_breaks) == line_number: + line = self._string[start_index:] + else: + end_index = self._line_breaks[line_number] + line = self._string[start_index:end_index - 1] + return line + + def line_count(self): + """ + Gets the number of lines in the string. + """ + return len(self._line_breaks) class Rules(Enum): """ An Enum of each rule which the linter will check. """ - mako_missing_default = ('mako-missing-default', 'Missing default <%page expression_filter="h"/>.') - mako_multiple_page_tags = ('mako-multiple-page-tags', 'A Mako template can only have one <%page> tag.') - mako_unparsable_expression = ('mako-unparsable-expression', 'The expression could not be properly parsed.') - mako_unwanted_html_filter = ('mako-unwanted-html-filter', 'Remove explicit h filters when it is provided by the page directive.') - mako_invalid_html_filter = ('mako-invalid-html-filter', 'The expression is using an invalid filter in an HTML context.') - mako_invalid_js_filter = ('mako-invalid-js-filter', 'The expression is using an invalid filter in a JavaScript context.') - mako_js_string_missing_quotes = ('mako-js-string-missing-quotes', 'An expression using the js_escape_string filter must have surrounding quotes.') + mako_missing_default = ( + 'mako-missing-default', + 'Missing default <%page expression_filter="h"/>.' + ) + mako_multiple_page_tags = ( + 'mako-multiple-page-tags', + 'A Mako template can only have one <%page> tag.' + ) + mako_include_with_violations = ( + 'mako-include-with-violations', + 'Must fix violations in included templates first.' + ) + mako_unparsable_expression = ( + 'mako-unparsable-expression', + 'The expression could not be properly parsed.' + ) + mako_unwanted_html_filter = ( + 'mako-unwanted-html-filter', + 'Remove explicit h filters when it is provided by the page directive.' + ) + mako_invalid_html_filter = ( + 'mako-invalid-html-filter', + 'The expression is using an invalid filter in an HTML context.' + ) + mako_deprecated_display_name = ( + 'mako-deprecated-display-name', + 'Replace deprecated display_name_with_default_escaped with display_name_with_default.' + ) + mako_invalid_js_filter = ( + 'mako-invalid-js-filter', + 'The expression is using an invalid filter in a JavaScript context.' + ) - underscore_not_escaped = ('underscore-not-escaped', 'Expressions should be escaped using <%- expression %>.') + underscore_not_escaped = ( + 'underscore-not-escaped', + 'Expressions should be escaped using <%- expression %>.' + ) def __init__(self, rule_id, rule_summary): self.rule_id = rule_id @@ -180,25 +255,64 @@ class RuleViolation(object): """ self.rule = rule self.full_path = '' + self.is_disabled = False - def prepare_results(self, full_path, file_string, line_breaks): + def _mark_disabled(self, string, scope_start_string=False): + """ + Performs the disable pragma search and marks the rule as disabled if a + matching pragma is found. + + Pragma format:: + + safe-lint: disable=violation-name,other-violation-name + + Arguments: + string: The string of code in which to search for the pragma. + scope_start_string: True if the pragma must be at the start of the + string, False otherwise. The pragma is considered at the start + of the string if it has a maximum of 5 non-whitespace characters + preceding it. + + Side Effect: + Sets self.is_disabled as appropriate based on whether the pragma is + found. + + """ + pragma_match = re.search(r'safe-lint:\s*disable=([a-zA-Z,-]+)', string) + if pragma_match is None: + return + if scope_start_string: + spaces_count = string.count(' ', 0, pragma_match.start()) + non_space_count = pragma_match.start() - spaces_count + if non_space_count > 5: + return + + for disabled_rule in pragma_match.group(1).split(','): + if disabled_rule == self.rule.rule_id: + self.is_disabled = True + return + + def prepare_results(self, full_path, string_lines): """ Preps this instance for results reporting. Arguments: full_path: Path of the file in violation. - file_string: The contents of the file in violation. - line_breaks: A list of indices into file_string at which each line - break was found. + string_lines: A StringLines containing the contents of the file in + violation. """ self.full_path = full_path + self._mark_disabled(string_lines.get_string()) - def print_results(self): + def print_results(self, out): """ Prints the results represented by this rule violation. + + Arguments: + out: output file """ - print "{}: {}".format(self.full_path, self.rule.rule_id) + print("{}: {}".format(self.full_path, self.rule.rule_id), file=out) class ExpressionRuleViolation(RuleViolation): @@ -225,50 +339,94 @@ class ExpressionRuleViolation(RuleViolation): self.end_line = 0 self.end_column = 0 self.lines = [] + self.is_disabled = False - def prepare_results(self, full_path, file_string, line_breaks): + def _mark_expression_disabled(self, string_lines): + """ + Marks the expression violation as disabled if it finds the disable + pragma anywhere on the first line of the violation, or at the start of + the line preceding the violation. + + Pragma format:: + + safe-lint: disable=violation-name,other-violation-name + + Examples:: + + <% // safe-lint: disable=underscore-not-escaped %> + <%= gettext('Single Line') %> + + <%= gettext('Single Line') %><% // safe-lint: disable=underscore-not-escaped %> + + Arguments: + string_lines: A StringLines containing the contents of the file in + violation. + + Side Effect: + Sets self.is_disabled as appropriate based on whether the pragma is + found. + + """ + # disable pragma can be at the start of the preceding line + has_previous_line = self.start_line > 1 + if has_previous_line: + line_to_check = string_lines.line_number_to_line(self.start_line - 1) + self._mark_disabled(line_to_check, scope_start_string=True) + if self.is_disabled: + return + + # TODO: this should work at end of any line of the violation + # disable pragma can be anywhere on the first line of the violation + line_to_check = string_lines.line_number_to_line(self.start_line) + self._mark_disabled(line_to_check, scope_start_string=False) + + def prepare_results(self, full_path, string_lines): """ Preps this instance for results reporting. Arguments: full_path: Path of the file in violation. - file_string: The contents of the file in violation. - line_breaks: A list of indices into file_string at which each line - break was found. + string_lines: A StringLines containing the contents of the file in + violation. """ self.full_path = full_path start_index = self.expression['start_index'] - self.start_line = _get_line_number(self, line_breaks, start_index) - self.start_column = _get_column_number(self, line_breaks, self.start_line, start_index) + self.start_line = string_lines.index_to_line_number(start_index) + self.start_column = string_lines.index_to_column_number(start_index) end_index = self.expression['end_index'] if end_index > 0: - self.end_line = _get_line_number(self, line_breaks, end_index) - self.end_column = _get_column_number(self, line_breaks, self.end_line, end_index) + self.end_line = string_lines.index_to_line_number(end_index) + self.end_column = string_lines.index_to_column_number(end_index) else: self.end_line = self.start_line self.end_column = '?' for line_number in range(self.start_line, self.end_line + 1): - self.lines.append(_get_line(self, file_string, line_breaks, line_number)) + self.lines.append(string_lines.line_number_to_line(line_number)) + self._mark_expression_disabled(string_lines) - def print_results(self): + def print_results(self, out): """ Prints the results represented by this rule violation. + + Arguments: + out: output file + """ for line_number in range(self.start_line, self.end_line + 1): - if (line_number == self.start_line): + if line_number == self.start_line: column = self.start_column rule_id = self.rule.rule_id + ":" else: column = 1 rule_id = " " * (len(self.rule.rule_id) + 1) - print "{}: {}:{}: {} {}".format( + print("{}: {}:{}: {} {}".format( self.full_path, line_number, column, rule_id, - self.lines[line_number - self.start_line - 1] - ) + self.lines[line_number - self.start_line - 1].encode(encoding='utf-8') + ), file=out) class FileResults(object): @@ -285,7 +443,45 @@ class FileResults(object): """ self.full_path = full_path + self.directory = os.path.dirname(full_path) + self.is_file = os.path.isfile(full_path) self.violations = [] + self.includes = [] + + def resolve_include(self, include, include_results): + """ + Resolves potential include violations and determines if they are real or + not. For real violations, adds the violations into the violation list. + + If the include_results is not a file, it will be considered a violation + and will require a disable pragma. + + Arguments: + include: The include with the potential violation to be resolved. + include_results: The results of processing the include file. + + """ + include_has_violations = (not include_results.is_file) or (len(include_results.violations) > 0) + if include_has_violations: + self.violations.append(include['potential_violation']) + + def add_include(self, include_file, potential_violation): + """ + Adds an include which also must have no violations. + + Arguments: + include_file: The include file as provided in an include. + potential_violation: Represents the violation of the include, if the + included file has any violations. + + """ + include_full_path = os.path.normpath(self.directory + '/' + include_file) + self.includes.append({ + 'directory': os.path.dirname(include_full_path), + 'file_name': os.path.split(include_full_path)[1], + 'full_path': include_full_path, + 'potential_violation': potential_violation, + }) def prepare_results(self, file_string): """ @@ -295,11 +491,13 @@ class FileResults(object): file_string: The string of content for this file. """ - line_breaks = _get_line_breaks(self, file_string) + string_lines = StringLines(file_string) for violation in self.violations: - violation.prepare_results(self.full_path, file_string, line_breaks) + violation.prepare_results(self.full_path, string_lines) + for include in self.includes: + include['potential_violation'].prepare_results(self.full_path, string_lines) - def print_results(self, options): + def print_results(self, options, out): """ Prints the results (i.e. violations) in this file. @@ -307,13 +505,16 @@ class FileResults(object): options: A list of the following options: is_quiet: True to print only file names, and False to print all violations. + out: output file + """ if options['is_quiet']: - print self.full_path + print(self.full_path, file=out) else: for violation in self.violations: - violation.print_results() + if not violation.is_disabled: + violation.print_results(out) class MakoTemplateLinter(object): @@ -323,6 +524,18 @@ class MakoTemplateLinter(object): _skip_mako_dirs = _skip_dirs + def __init__(self): + """ + Init method. + """ + self.results = {} + + def supports_includes(self): + """ + Mako template linter supports linting includes. + """ + return True + def process_file(self, directory, file_name): """ Process file to determine if it is a Mako template file and @@ -333,12 +546,24 @@ class MakoTemplateLinter(object): file_name (string): A filename for a potential Mako file Returns: - The file results containing any violations, or None if the file is - never checked. + The file results containing any violations. """ - if not self._is_mako_directory(directory): - return None + mako_file_full_path = os.path.normpath(directory + '/' + file_name) + results = FileResults(mako_file_full_path) + + # don't process the same file twice. this could happen when we process + # files included by another file + if mako_file_full_path in self.results: + return self.results[mako_file_full_path] + + self.results[mako_file_full_path] = results + + if not results.is_file: + return results + + if not self._is_valid_directory(directory): + return results # TODO: When safe-by-default is turned on at the platform level, will we: # 1. Turn it on for .html only, or @@ -347,11 +572,11 @@ class MakoTemplateLinter(object): # the n filter to turn off h for some of these)? # For now, we only check .html and .xml files if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')): - return None + return results - return self._load_and_check_mako_file_is_safe(directory + '/' + file_name) + return self._load_and_check_mako_file_is_safe(mako_file_full_path, results) - def _is_mako_directory(self, directory): + def _is_valid_directory(self, directory): """ Determines if the provided directory is a directory that could contain Mako template files that need to be linted. @@ -366,12 +591,15 @@ class MakoTemplateLinter(object): if _is_skip_dir(self._skip_mako_dirs, directory): return False - if (directory.find('/templates/') >= 0) or directory.endswith('/templates'): + # TODO: This is an imperfect guess concerning the Mako template + # directories. This needs to be reviewed before turning on safe by + # default at the platform level. + if ('/templates/' in directory) or directory.endswith('/templates'): return True return False - def _load_and_check_mako_file_is_safe(self, mako_file_full_path): + def _load_and_check_mako_file_is_safe(self, mako_file_full_path, results): """ Loads the Mako template file and checks if it is in violation. @@ -379,16 +607,12 @@ class MakoTemplateLinter(object): mako_file_full_path: The file to be loaded and linted. Returns: - The file results containing any violations, or None if none found. + The file results containing any violations. """ mako_template = _load_file(self, mako_file_full_path) - results = FileResults(mako_file_full_path) self._check_mako_file_is_safe(mako_template, results) - if len(results.violations) > 0: - return results - else: - return None + return results def _check_mako_file_is_safe(self, mako_template, results): """ @@ -399,6 +623,8 @@ class MakoTemplateLinter(object): results: A file results objects to which violations will be added. """ + if self._is_django_template(mako_template): + return has_page_default = False if self._has_multiple_page_tags(mako_template): results.violations.append(RuleViolation(Rules.mako_multiple_page_tags)) @@ -407,8 +633,24 @@ class MakoTemplateLinter(object): if not has_page_default: results.violations.append(RuleViolation(Rules.mako_missing_default)) self._check_mako_expressions(mako_template, has_page_default, results) + self._check_include_files(mako_template, results) results.prepare_results(mako_template) + def _is_django_template(self, mako_template): + """ + Determines if the template is actually a Django template. + + Arguments: + mako_template: The template code. + + Returns: + True if this is really a Django template, and False otherwise. + + """ + if re.search('({%.*%})|({{.*}})', mako_template) is not None: + return True + return False + def _has_multiple_page_tags(self, mako_template): """ Checks if the Mako template contains more than one page expression. @@ -433,6 +675,30 @@ class MakoTemplateLinter(object): page_match = page_h_filter_regex.search(mako_template) return page_match + def _check_include_files(self, mako_template, results): + """ + Checks if the Mako template includes other template files. If so, sets + up potential violations that will be checked later. + + Arguments: + mako_template: The contents of the Mako template. + + """ + regex = re.compile('<%include[^>]*file=(?:"|\')(.+?)(?:"|\')[^>]*/>') + for match in regex.finditer(mako_template): + include_file = match.group(1) + expression = { + 'start_index': match.start(), + 'end_index': match.end(), + 'expression': match.group() + } + results.add_include( + include_file, + ExpressionRuleViolation( + Rules.mako_include_with_violations, expression + ) + ) + def _check_mako_expressions(self, mako_template, has_page_default, results): """ Searches for Mako expressions and then checks if they contain @@ -456,11 +722,28 @@ class MakoTemplateLinter(object): context = self._get_context(contexts, expression['start_index']) self._check_filters(mako_template, expression, context, has_page_default, results) + self._check_deprecated_display_name(expression, results) + + def _check_deprecated_display_name(self, expression, results): + """ + Checks that the deprecated display_name_with_default_escaped is not + used. Adds violation to results if there is a problem. + + Arguments: + expression: A dict containing the start_index, end_index, and + expression (text) of the expression. + results: A list of results into which violations will be added. + + """ + if '.display_name_with_default_escaped' in expression['expression']: + results.violations.append(ExpressionRuleViolation( + Rules.mako_deprecated_display_name, expression + )) def _check_filters(self, mako_template, expression, context, has_page_default, results): """ Checks that the filters used in the given Mako expression are valid - for the given context. + for the given context. Adds violation to results if there is a problem. Arguments: mako_template: The contents of the Mako template. @@ -506,14 +789,7 @@ class MakoTemplateLinter(object): pass elif (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'js_escaped_string'): # {x | n, js_escaped_string} is valid, if surrounded by quotes - prior_character = mako_template[expression['start_index'] - 1] - next_character = mako_template[expression['end_index'] + 1] - has_surrounding_quotes = (prior_character == '\'' and next_character == '\'') or \ - (prior_character == '"' and next_character == '"') - if not has_surrounding_quotes: - results.violations.append(ExpressionRuleViolation( - Rules.mako_js_string_missing_quotes, expression - )) + pass else: results.violations.append(ExpressionRuleViolation( Rules.mako_invalid_js_filter, expression @@ -670,7 +946,13 @@ class UnderscoreTemplateLinter(object): The linter for Underscore.js template files. """ - _skip_underscore_dirs = _skip_dirs + _skip_underscore_dirs = _skip_dirs + ('test',) + + def supports_includes(self): + """ + Underscore template linter does not lint includes. + """ + return False def process_file(self, directory, file_name): """ @@ -682,32 +964,21 @@ class UnderscoreTemplateLinter(object): file_name (string): A filename for a potential underscore file Returns: - The file results containing any violations, or None if the file is - never checked. + The file results containing any violations. """ - if not self._is_underscore_directory(directory): - return + full_path = os.path.normpath(directory + '/' + file_name) + results = FileResults(full_path) + + if not self._is_valid_directory(directory): + return results if not file_name.lower().endswith('.underscore'): - return + return results - full_path = directory + '/' + file_name - return self._load_and_check_underscore_file_is_safe(full_path) + return self._load_and_check_underscore_file_is_safe(full_path, results) - def print_results(self, options): - """ - Prints all results (i.e. violations) for all files that failed this - linter. - - Arguments: - options: A list of the options. - - """ - for result in self._results: - result.print_results(options) - - def _is_underscore_directory(self, directory): + def _is_valid_directory(self, directory): """ Determines if the provided directory is a directory that could contain Underscore.js template files that need to be linted. @@ -724,7 +995,7 @@ class UnderscoreTemplateLinter(object): return True - def _load_and_check_underscore_file_is_safe(self, file_full_path): + def _load_and_check_underscore_file_is_safe(self, file_full_path, results): """ Loads the Underscore.js template file and checks if it is in violation. @@ -732,17 +1003,12 @@ class UnderscoreTemplateLinter(object): file_full_path: The file to be loaded and linted Returns: - The file results containing any violations, or None if the file is - never checked. + The file results containing any violations. """ underscore_template = _load_file(self, file_full_path) - results = FileResults(file_full_path) self._check_underscore_file_is_safe(underscore_template, results) - if len(results.violations) > 0: - return results - else: - return None + return results def _check_underscore_file_is_safe(self, underscore_template, results): """ @@ -767,9 +1033,36 @@ class UnderscoreTemplateLinter(object): """ expressions = self._find_unescaped_expressions(underscore_template) for expression in expressions: - results.violations.append(ExpressionRuleViolation( - Rules.underscore_not_escaped, expression - )) + if not self._is_safe_unescaped_expression(expression): + results.violations.append(ExpressionRuleViolation( + Rules.underscore_not_escaped, expression + )) + + def _is_safe_unescaped_expression(self, expression): + """ + Determines whether an expression is safely escaped, even though it is + using the expression syntax that doesn't itself escape (i.e. <%= ). + + In some cases it is ok to not use the Underscore.js template escape + (i.e. <%- ) because the escaping is happening inside the expression. + + Safe examples:: + + <%= HtmlUtils.ensureHtml(message) %> + <%= _.escape(message) %> + + Arguments: + expression: The expression being checked. + + Returns: + True if the expression has been safely escaped, and False otherwise. + + """ + if expression['expression_inner'].startswith('HtmlUtils.'): + return True + if expression['expression_inner'].startswith('_.escape('): + return True + return False def _find_unescaped_expressions(self, underscore_template): """ @@ -788,7 +1081,7 @@ class UnderscoreTemplateLinter(object): end_index: The index of the end of the expression. expression: The text of the expression. """ - unescaped_expression_regex = re.compile("<%=.*?%>", re.DOTALL) + unescaped_expression_regex = re.compile("<%=(.*?)%>", re.DOTALL) expressions = [] for match in unescaped_expression_regex.finditer(underscore_template): @@ -796,13 +1089,14 @@ class UnderscoreTemplateLinter(object): 'start_index': match.start(), 'end_index': match.end(), 'expression': match.group(), + 'expression_inner': match.group(1).strip() } expressions.append(expression) return expressions -def _process_current_walk(current_walk, template_linters, options): +def _process_current_walk(current_walk, template_linters, options, out): """ For each linter, lints all the files in the current os walk. This means finding and printing violations. @@ -811,18 +1105,26 @@ def _process_current_walk(current_walk, template_linters, options): current_walk: A walk returned by os.walk(). template_linters: A list of linting objects. options: A list of the options. + out: output file """ - walk_directory = current_walk[0] + walk_directory = os.path.normpath(current_walk[0]) walk_files = current_walk[2] for walk_file in walk_files: + walk_file = os.path.normpath(walk_file) for template_linter in template_linters: results = template_linter.process_file(walk_directory, walk_file) - if results is not None: - results.print_results(options) + if template_linter.supports_includes(): + for include in results.includes: + include_results = template_linter.process_file( + include['directory'], + include['file_name'] + ) + results.resolve_include(include, include_results) + results.print_results(options, out) -def _process_os_walk(starting_dir, template_linters, options): +def _process_os_walk(starting_dir, template_linters, options, out): """ For each linter, lints all the directories in the starting directory. @@ -830,10 +1132,11 @@ def _process_os_walk(starting_dir, template_linters, options): starting_dir: The initial directory to begin the walk. template_linters: A list of linting objects. options: A list of the options. + out: output file """ for current_walk in os.walk(starting_dir): - _process_current_walk(current_walk, template_linters, options) + _process_current_walk(current_walk, template_linters, options, out) def main(): @@ -842,25 +1145,27 @@ def main(): Prints all of the violations. """ + #TODO: Use click if '--help' in sys.argv: - print "Check that templates are safe." - print "Options:" - print " --quiet Just display the filenames with violations." - print - print "Rules:" + print("Check that templates are safe.") + print("Options:") + print(" --quiet Just display the filenames that have violations.") + print("") + print("Rules:") for rule in Rules.__members__.values(): - print " {0[0]}: {0[1]}".format(rule.value) + print(" {0[0]}: {0[1]}".format(rule.value)) return is_quiet = '--quiet' in sys.argv + # TODO --file=... options = { 'is_quiet': is_quiet, } template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter()] - _process_os_walk('.', template_linters, options) + _process_os_walk('.', template_linters, options, out=sys.stdout) if __name__ == "__main__": diff --git a/scripts/tests/templates/includes/include_safe.html b/scripts/tests/templates/includes/include_safe.html new file mode 100644 index 0000000000..838cf2a8d9 --- /dev/null +++ b/scripts/tests/templates/includes/include_safe.html @@ -0,0 +1,2 @@ +## Mako template with safe by default page expression +<%page expression_filter="h"/> diff --git a/scripts/tests/templates/includes/include_unsafe.html b/scripts/tests/templates/includes/include_unsafe.html new file mode 100644 index 0000000000..2f7908cb44 --- /dev/null +++ b/scripts/tests/templates/includes/include_unsafe.html @@ -0,0 +1 @@ +## Mako template with no safe by default page expression diff --git a/scripts/tests/templates/test.html b/scripts/tests/templates/test.html new file mode 100644 index 0000000000..76ce3c3bb3 --- /dev/null +++ b/scripts/tests/templates/test.html @@ -0,0 +1,8 @@ +<%include file="./includes/include_safe.html" args="message" /> + +<%include file="./includes/include_unsafe.html" args="message" /> + +<%include file="bad_file_name.html" /> + +## safe-lint: disable=mako-include-with-violations +<%include file="bad_file_name.html" /> diff --git a/scripts/tests/test_safe_template_linter.py b/scripts/tests/test_safe_template_linter.py index 2084e7bcab..c449fc1525 100644 --- a/scripts/tests/test_safe_template_linter.py +++ b/scripts/tests/test_safe_template_linter.py @@ -2,14 +2,45 @@ Tests for safe_template_linter.py """ from ddt import ddt, data +import mock +import re +from StringIO import StringIO import textwrap from unittest import TestCase from ..safe_template_linter import ( - FileResults, MakoTemplateLinter, Rules + _process_os_walk, FileResults, MakoTemplateLinter, UnderscoreTemplateLinter, Rules ) +class TestSafeTemplateLinter(TestCase): + """ + Test some top-level linter functions + """ + + def test_process_os_walk_with_includes(self): + """ + Tests the top-level processing of template files, including Mako + includes. + """ + out = StringIO() + + options = { + 'is_quiet': False, + } + + template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter()] + + with mock.patch.object(MakoTemplateLinter, '_is_valid_directory', return_value=True) as mock_is_valid_directory: + _process_os_walk('scripts/tests/templates', template_linters, options, out) + + output = out.getvalue() + self.assertIsNotNone(re.search('test\.html.*mako-missing-default', out.getvalue())) + self.assertIsNotNone(re.search('test\.html.*mako-include-with-violations.*include_unsafe\.html', out.getvalue())) + self.assertIsNotNone(re.search('test\.html.*mako-include-with-violations.*bad_file_name\.html', out.getvalue())) + self.assertIsNotNone(re.search('include_unsafe\.html.*mako-missing-default', out.getvalue())) + + @ddt class TestMakoTemplateLinter(TestCase): """ @@ -20,16 +51,17 @@ class TestMakoTemplateLinter(TestCase): {'directory': 'lms/templates', 'expected': True}, {'directory': 'lms/templates/support', 'expected': True}, {'directory': 'lms/templates/support', 'expected': True}, + {'directory': 'test_root/staticfiles/templates', 'expected': False}, {'directory': './test_root/staticfiles/templates', 'expected': False}, {'directory': './some/random/path', 'expected': False}, ) - def test_is_mako_directory(self, data): + def test_is_valid_directory(self, data): """ - Test _is_mako_directory correctly determines mako directories + Test _is_valid_directory correctly determines mako directories """ linter = MakoTemplateLinter() - self.assertEqual(linter._is_mako_directory(data['directory']), data['expected']) + self.assertEqual(linter._is_valid_directory(data['directory']), data['expected']) @data( { @@ -110,6 +142,79 @@ class TestMakoTemplateLinter(TestCase): self.assertEqual(results.violations[3].rule, Rules.mako_invalid_html_filter) self.assertEqual(results.violations[3].expression['expression'], "${x | n, dump_js_escaped_json}") + def test_check_mako_expression_display_name(self): + """ + Test _check_mako_file_is_safe with display_name_with_default_escaped + fails. + """ + linter = MakoTemplateLinter() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + ${course.display_name_with_default_escaped} + """) + + linter._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.violations), 1) + self.assertEqual(results.violations[0].rule, Rules.mako_deprecated_display_name) + + def test_check_mako_expression_default_disabled(self): + """ + Test _check_mako_file_is_safe with disable pragma for safe-by-default + works to designate that this is not a Mako file + """ + linter = MakoTemplateLinter() + results = FileResults('') + + mako_template = textwrap.dedent(""" + # This is anything but a Mako file. + + # pragma can appear anywhere in file + # safe-lint: disable=mako-missing-default + """) + + linter._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.violations), 1) + self.assertTrue(results.violations[0].is_disabled) + + def test_check_mako_expression_disabled(self): + """ + Test _check_mako_file_is_safe with disable pragma results in no + violation + """ + linter = MakoTemplateLinter() + results = FileResults('') + + mako_template = textwrap.dedent(""" + <%page expression_filter="h"/> + ## safe-lint: disable=mako-unwanted-html-filter + ${x | h} + """) + + linter._check_mako_file_is_safe(mako_template, results) + + self.assertEqual(len(results.violations), 1) + self.assertTrue(results.violations[0].is_disabled) + + @data( + {'template': '{% extends "wiki/base.html" %}'}, + {'template': '{{ message }}'}, + ) + def test_check_mako_on_django_template(self, data): + """ + Test _check_mako_file_is_safe with disable pragma results in no + violation + """ + linter = MakoTemplateLinter() + results = FileResults('') + + linter._check_mako_file_is_safe(data['template'], results) + + self.assertEqual(len(results.violations), 0) + def test_check_mako_expressions_in_html_without_default(self): """ Test _check_mako_file_is_safe in html context without the page level @@ -145,14 +250,12 @@ class TestMakoTemplateLinter(TestCase): ${x | n, dump_html_escaped_json} ${x | n, dump_js_escaped_json} "${x-with-quotes | n, js_escaped_string}" - '${x-with-quotes | n, js_escaped_string}' - ${x-missing-quotes | n, js_escaped_string} """) linter._check_mako_file_is_safe(mako_template, results) - self.assertEqual(len(results.violations), 6) + self.assertEqual(len(results.violations), 5) self.assertEqual(results.violations[0].rule, Rules.mako_invalid_js_filter) self.assertEqual(results.violations[0].expression['expression'], "${x}") self.assertEqual(results.violations[1].rule, Rules.mako_unparsable_expression) @@ -164,8 +267,6 @@ class TestMakoTemplateLinter(TestCase): self.assertEqual(results.violations[3].expression['expression'], "${x | h}") self.assertEqual(results.violations[4].rule, Rules.mako_invalid_js_filter) self.assertEqual(results.violations[4].expression['expression'], "${x | n, dump_html_escaped_json}") - self.assertEqual(results.violations[5].rule, Rules.mako_js_string_missing_quotes) - self.assertEqual(results.violations[5].expression['expression'], "${x-missing-quotes | n, js_escaped_string}") def test_check_mako_expressions_in_require_js(self): """ @@ -185,11 +286,9 @@ class TestMakoTemplateLinter(TestCase): linter._check_mako_file_is_safe(mako_template, results) - self.assertEqual(len(results.violations), 2) + self.assertEqual(len(results.violations), 1) self.assertEqual(results.violations[0].rule, Rules.mako_invalid_js_filter) self.assertEqual(results.violations[0].expression['expression'], "${x}") - self.assertEqual(results.violations[1].rule, Rules.mako_js_string_missing_quotes) - self.assertEqual(results.violations[1].expression['expression'], "${x | n, js_escaped_string}") @data( {'media_type': 'text/javascript', 'expected_violations': 0}, @@ -335,3 +434,150 @@ class TestMakoTemplateLinter(TestCase): end_index = expression['end_index'] self.assertEqual(template_string[start_index:end_index + 1], expected_expression) self.assertEqual(expression['expression'], expected_expression) + + +@ddt +class TestUnderscoreTemplateLinter(TestCase): + """ + Test UnderscoreTemplateLinter + """ + + def test_check_underscore_file_is_safe(self): + """ + Test _check_underscore_file_is_safe with safe template + """ + linter = UnderscoreTemplateLinter() + results = FileResults('') + + template = textwrap.dedent(""" + <%- gettext('Single Line') %> + + <%- + gettext('Multiple Lines') + %> + """) + + linter._check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 0) + + def test_check_underscore_file_is_not_safe(self): + """ + Test _check_underscore_file_is_safe with unsafe template + """ + linter = UnderscoreTemplateLinter() + results = FileResults('') + + template = textwrap.dedent(""" + <%= gettext('Single Line') %> + + <%= + gettext('Multiple Lines') + %> + """) + + linter._check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 2) + self.assertEqual(results.violations[0].rule, Rules.underscore_not_escaped) + self.assertEqual(results.violations[1].rule, Rules.underscore_not_escaped) + + @data( + { + 'template': + '<% // safe-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True], + }, + { + 'template': + '<% // safe-lint: disable=another-rule,underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True], + }, + { + 'template': + '<% // safe-lint: disable=another-rule %>\n' + '<%= message %>', + 'is_disabled': [False], + }, + { + 'template': + '<% // safe-lint: disable=underscore-not-escaped %>\n' + '<%= message %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '// This test does not use proper Underscore.js Template syntax\n' + '// But, it is just testing that a maximum of 5 non-whitespace\n' + '// are used to designate start of line for disabling the next line.\n' + ' 1 2 3 4 5 safe-lint: disable=underscore-not-escaped %>\n' + '<%= message %>\n' + ' 1 2 3 4 5 6 safe-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '<%= message %><% // safe-lint: disable=underscore-not-escaped %>\n' + '<%= message %>', + 'is_disabled': [True, False], + }, + { + 'template': + '<%= message %>\n' + '<% // safe-lint: disable=underscore-not-escaped %>', + 'is_disabled': [False], + }, + ) + def test_check_underscore_file_disable_rule(self, data): + """ + Test _check_underscore_file_is_safe with various disabled pragmas + """ + linter = UnderscoreTemplateLinter() + results = FileResults('') + + linter._check_underscore_file_is_safe(data['template'], results) + + violation_count = len(data['is_disabled']) + self.assertEqual(len(results.violations), violation_count) + for index in range(0, violation_count - 1): + self.assertEqual(results.violations[index].is_disabled, data['is_disabled'][index]) + + def test_check_underscore_file_disables_one_violation(self): + """ + Test _check_underscore_file_is_safe with disabled before a line only + disables for the violation following + """ + linter = UnderscoreTemplateLinter() + results = FileResults('') + + template = textwrap.dedent(""" + <% // safe-lint: disable=underscore-not-escaped %> + <%= message %> + <%= message %> + """) + + linter._check_underscore_file_is_safe(template, results) + + self.assertEqual(len(results.violations), 2) + self.assertEqual(results.violations[0].is_disabled, True) + self.assertEqual(results.violations[1].is_disabled, False) + + @data( + {'template': '<%= HtmlUtils.ensureHtml(message) %>'}, + {'template': '<%= _.escape(message) %>'}, + ) + def test_check_underscore_no_escape_allowed(self, data): + """ + Test _check_underscore_file_is_safe with expressions that are allowed + without escaping because the internal calls properly escape. + """ + linter = UnderscoreTemplateLinter() + results = FileResults('') + + linter._check_underscore_file_is_safe(data['template'], results) + + self.assertEqual(len(results.violations), 0)