diff --git a/scripts/safe_template_linter.py b/scripts/safe_template_linter.py index d85433baff..23cf001a1d 100755 --- a/scripts/safe_template_linter.py +++ b/scripts/safe_template_linter.py @@ -40,7 +40,7 @@ def _is_skip_dir(skip_dirs, directory): return False -def _load_file(self, file_full_path): +def _load_file(file_full_path): """ Loads a file into a string. @@ -56,6 +56,77 @@ def _load_file(self, file_full_path): return file_contents.decode(encoding='utf-8') +def _find_closing_char_index(start_delim, open_char, close_char, template, start_index, num_open_chars=0, strings=None): + """ + Finds the index of the closing char that matches the opening char. + + For example, this could be used to find the end of a Mako expression, + where the open and close characters would be '{' and '}'. + + Arguments: + start_delim: If provided (e.g. '${' for Mako expressions), the + closing character must be found before the next start_delim. + open_char: The opening character to be matched (e.g '{') + close_char: The closing character to be matched (e.g '}') + template: The template to be searched. + start_index: The start index of the last open char. + num_open_chars: The current number of open chars. + strings: A list of ParseStrings already parsed + + Returns: + A dict containing the following, or None if unparseable: + close_char_index: The index of the closing character + strings: a list of ParseStrings + + """ + strings = [] if strings is None else strings + close_char_index = template.find(close_char, start_index) + if close_char_index < 0: + # if we can't find an end_char, let's just quit + return None + open_char_index = template.find(open_char, start_index, close_char_index) + parse_string = ParseString(template, start_index, close_char_index) + + valid_index_list = [close_char_index] + if 0 <= open_char_index: + valid_index_list.append(open_char_index) + if parse_string.start_index is not None: + valid_index_list.append(parse_string.start_index) + min_valid_index = min(valid_index_list) + + if parse_string.start_index == min_valid_index: + strings.append(parse_string) + if parse_string.end_index is None: + return None + else: + return _find_closing_char_index( + start_delim, open_char, close_char, template, start_index=parse_string.end_index, + num_open_chars=num_open_chars, strings=strings + ) + + if open_char_index == min_valid_index: + if start_delim is not None: + # if we find another starting delim, consider this unparseable + start_delim_index = template.find(start_delim, start_index, close_char_index) + if 0 <= start_delim_index < open_char_index: + return None + return _find_closing_char_index( + start_delim, open_char, close_char, template, start_index=open_char_index + 1, + num_open_chars=num_open_chars + 1, strings=strings + ) + + if num_open_chars == 0: + return { + 'close_char_index': close_char_index, + 'strings': strings, + } + else: + return _find_closing_char_index( + start_delim, open_char, close_char, template, start_index=close_char_index + 1, + num_open_chars=num_open_chars - 1, strings=strings + ) + + class StringLines(object): """ StringLines provides utility methods to work with a string in terms of @@ -72,7 +143,10 @@ class StringLines(object): """ self._string = string - self._line_breaks = self._process_line_breaks(string) + self._line_start_indexes = self._process_line_breaks(string) + # this is an exclusive index used in the case that the template doesn't + # end with a new line + self.eof_index = len(string) def _process_line_breaks(self, string): """ @@ -83,19 +157,18 @@ class StringLines(object): string: The string in which to find line breaks. Returns: - A list of indices into the string at which each line break can be - found. + A list of indices into the string at which each line begins. """ - line_breaks = [0] + line_start_indexes = [0] index = 0 while True: index = string.find('\n', index) if index < 0: break index += 1 - line_breaks.append(index) - return line_breaks + line_start_indexes.append(index) + return line_start_indexes def get_string(self): """ @@ -116,7 +189,7 @@ class StringLines(object): """ current_line_number = 0 - for line_break_index in self._line_breaks: + for line_break_index in self._line_start_indexes: if line_break_index <= index: current_line_number += 1 else: @@ -154,6 +227,20 @@ class StringLines(object): line_number = self.index_to_line_number(index) return self.line_number_to_start_index(line_number) + def index_to_line_end_index(self, index): + """ + Gets the index of the end of the line of the given index. + + Arguments: + index: The index into the original string. + + Returns: + The index of the end of the line of the given index. + + """ + line_number = self.index_to_line_number(index) + return self.line_number_to_end_index(line_number) + def line_number_to_start_index(self, line_number): """ Gets the starting index for the provided line number. @@ -166,7 +253,26 @@ class StringLines(object): The starting index for the provided line number. """ - return self._line_breaks[line_number - 1] + return self._line_start_indexes[line_number - 1] + + def line_number_to_end_index(self, line_number): + """ + Gets the ending index for the provided line number. + + Arguments: + line_number: The line number of the line for which we want to find + the end index. + + Returns: + The ending index for the provided line number. + + """ + if line_number < len(self._line_start_indexes): + return self._line_start_indexes[line_number] + else: + # an exclusive index in the case that the file didn't end with a + # newline. + return self.eof_index def line_number_to_line(self, line_number): """ @@ -179,11 +285,11 @@ class StringLines(object): The line of text designated by the provided line number. """ - start_index = self._line_breaks[line_number - 1] - if len(self._line_breaks) == line_number: + start_index = self._line_start_indexes[line_number - 1] + if len(self._line_start_indexes) == line_number: line = self._string[start_index:] else: - end_index = self._line_breaks[line_number] + end_index = self._line_start_indexes[line_number] line = self._string[start_index:end_index - 1] return line @@ -191,7 +297,7 @@ class StringLines(object): """ Gets the number of lines in the string. """ - return len(self._line_breaks) + return len(self._line_start_indexes) class Rules(Enum): @@ -222,6 +328,14 @@ class Rules(Enum): 'mako-invalid-js-filter', 'The expression is using an invalid filter in a JavaScript context.' ) + mako_js_missing_quotes = ( + 'mako-js-missing-quotes', + 'An expression using js_escaped_string must be wrapped in quotes.' + ) + mako_js_html_string = ( + 'mako-js-html-string', + 'A JavaScript string containing HTML should not have an embedded Mako expression.' + ) mako_deprecated_display_name = ( 'mako-deprecated-display-name', 'Replace deprecated display_name_with_default_escaped with display_name_with_default.' @@ -254,12 +368,95 @@ class Rules(Enum): 'underscore-not-escaped', 'Expressions should be escaped using <%- expression %>.' ) + javascript_jquery_append = ( + 'javascript-jquery-append', + 'Use HtmlUtils.append() or .append(HtmlUtils.xxx().toString()).' + ) + javascript_jquery_prepend = ( + 'javascript-jquery-prepend', + 'Use HtmlUtils.prepend() or .prepend(HtmlUtils.xxx().toString()).' + ) + javascript_jquery_insertion = ( + 'javascript-jquery-insertion', + 'JQuery DOM insertion calls that take content must use HtmlUtils (e.g. $el.after(HtmlUtils.xxx().toString()).' + ) + javascript_jquery_insert_into_target = ( + 'javascript-jquery-insert-into-target', + 'JQuery DOM insertion calls that take a target can only be called from elements (e.g. .$el.appendTo()).' + ) + javascript_jquery_html = ( + 'javascript-jquery-html', + "Use HtmlUtils.setHtml(), .html(HtmlUtils.xxx().toString()), or JQuery's text() function." + ) + javascript_concat_html = ( + 'javascript-concat-html', + 'Use HtmlUtils functions rather than concatenating strings with HTML.' + ) + javascript_escape = ( + 'javascript-escape', + "Avoid calls to escape(), especially in Backbone. Use templates, HtmlUtils, or JQuery's text() function." + ) + javascript_interpolate = ( + 'javascript-interpolate', + 'Use StringUtils.interpolate() or HtmlUtils.interpolateHtml() as appropriate.' + ) def __init__(self, rule_id, rule_summary): self.rule_id = rule_id self.rule_summary = rule_summary +class Expression(object): + """ + Represents an arbitrary expression. + + An expression can be any type of code snippet. It will sometimes have a + starting and ending delimiter, but not always. + + Here are some example expressions:: + + ${x | n, decode.utf8} + <%= x %> + function(x) + "
" + message + "
" + + Other details of note: + - Only a start_index is required for a valid expression. + - If end_index is None, it means we couldn't parse the rest of the + expression. + - All other details of the expression are optional, and are only added if + and when supplied and needed for additional checks. They are not necessary + for the final results output. + + """ + + def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None): + """ + Init method. + + Arguments: + start_index: the starting index of the expression + end_index: the index immediately following the expression, or None + if the expression was unparseable + template: optional template code in which the expression was found + start_delim: optional starting delimiter of the expression + end_delim: optional ending delimeter of the expression + strings: optional list of ParseStrings + + """ + self.start_index = start_index + self.end_index = end_index + self.start_delim = start_delim + self.end_delim = end_delim + self.strings = strings + if template is not None and self.end_index is not None: + self.expression = template[start_index:end_index] + self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip() + else: + self.expression = None + self.expression_inner = None + + class RuleViolation(object): """ Base class representing a rule violation which can be used for reporting. @@ -312,6 +509,18 @@ class RuleViolation(object): self.is_disabled = True return + def sort_key(self): + """ + Returns a key that can be sorted on + """ + return 0 + + def first_line(self): + """ + Since a file level rule has no first line, returns empty string. + """ + return '' + def prepare_results(self, full_path, string_lines): """ Preps this instance for results reporting. @@ -349,7 +558,7 @@ class ExpressionRuleViolation(RuleViolation): Arguments: rule: The Rule which was violated. - expression: The expression that was in violation. + expression: The Expression that was in violation. """ super(ExpressionRuleViolation, self).__init__(rule) @@ -400,6 +609,18 @@ class ExpressionRuleViolation(RuleViolation): line_to_check = string_lines.line_number_to_line(self.start_line) self._mark_disabled(line_to_check, scope_start_string=False) + def sort_key(self): + """ + Returns a key that can be sorted on + """ + return (self.start_line, self.start_column) + + def first_line(self): + """ + Returns the initial line of code of the violation. + """ + return self.lines[0] + def prepare_results(self, full_path, string_lines): """ Preps this instance for results reporting. @@ -411,11 +632,11 @@ class ExpressionRuleViolation(RuleViolation): """ self.full_path = full_path - start_index = self.expression['start_index'] + start_index = self.expression.start_index self.start_line = string_lines.index_to_line_number(start_index) self.start_column = string_lines.index_to_column_number(start_index) - end_index = self.expression['end_index'] - if end_index > 0: + end_index = self.expression.end_index + if end_index is not None: self.end_line = string_lines.index_to_line_number(end_index) self.end_column = string_lines.index_to_column_number(end_index) else: @@ -467,17 +688,21 @@ class FileResults(object): self.is_file = os.path.isfile(full_path) self.violations = [] - def prepare_results(self, file_string): + def prepare_results(self, file_string, line_comment_delim=None): """ Prepares the results for output for this file. Arguments: file_string: The string of content for this file. + line_comment_delim: A string representing the start of a line + comment. For example "##" for Mako and "//" for JavaScript. """ string_lines = StringLines(file_string) for violation in self.violations: violation.prepare_results(self.full_path, string_lines) + if line_comment_delim is not None: + self._filter_commented_code(line_comment_delim) def print_results(self, options, out): """ @@ -489,14 +714,49 @@ class FileResults(object): all violations. out: output file + Returns: + The number of violations. When using --quiet, returns number of + files with violations. """ + num_violations = 0 if options['is_quiet']: - print(self.full_path, file=out) + if self.violations is not None and 0 < len(self.violations): + num_violations += 1 + print(self.full_path, file=out) else: + self.violations.sort(key=lambda violation: violation.sort_key()) for violation in self.violations: if not violation.is_disabled: + num_violations += 1 violation.print_results(out) + return num_violations + + def _filter_commented_code(self, line_comment_delim): + """ + Remove any violations that were found in commented out code. + + Arguments: + line_comment_delim: A string representing the start of a line + comment. For example "##" for Mako and "//" for JavaScript. + + """ + self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)] + + def _is_commented(self, violation, line_comment_delim): + """ + Checks if violation line is commented out. + + Arguments: + violation: The violation to check + line_comment_delim: A string representing the start of a line + comment. For example "##" for Mako and "//" for JavaScript. + + Returns: + True if the first line of the violation is actually commented out, + False otherwise. + """ + return violation.first_line().lstrip().startswith(line_comment_delim) class ParseString(object): @@ -504,8 +764,8 @@ class ParseString(object): ParseString is the result of parsing a string out of a template. A ParseString has the following attributes: - start_index: The index of the first quote, or -1 if none found - end_index: The index following the closing quote, or -1 if + start_index: The index of the first quote, or None if none found + end_index: The index following the closing quote, or None if unparseable quote_length: The length of the quote. Could be 3 for a Python triple quote. Or None if none found. @@ -525,12 +785,12 @@ class ParseString(object): end_index: The end index to search before. """ - self.end_index = -1 + self.end_index = None self.quote_length = None self.string = None self.string_inner = None self.start_index = self._find_string_start(template, start_index, end_index) - if 0 <= self.start_index: + if self.start_index is not None: result = self._parse_string(template, self.start_index) if result is not None: self.end_index = result['end_index'] @@ -549,13 +809,13 @@ class ParseString(object): end_index: The end index to search before. Returns: - The start index of the first single or double quote, or -1 if - no quote was found. + The start index of the first single or double quote, or None if no + quote was found. """ quote_regex = re.compile(r"""['"]""") start_match = quote_regex.search(template, start_index, end_index) if start_match is None: - return -1 + return None else: return start_match.start() @@ -603,12 +863,540 @@ class ParseString(object): } +class UnderscoreTemplateLinter(object): + """ + The linter for Underscore.js template files. + """ + + _skip_underscore_dirs = _skip_dirs + ('test',) + + def process_file(self, directory, file_name): + """ + Process file to determine if it is an Underscore template file and + if it is safe. + + Arguments: + directory (string): The directory of the file to be checked + file_name (string): A filename for a potential underscore file + + Returns: + The file results containing any violations. + + """ + full_path = os.path.normpath(directory + '/' + file_name) + results = FileResults(full_path) + + if not self._is_valid_directory(directory): + return results + + if not file_name.lower().endswith('.underscore'): + return results + + return self._load_and_check_underscore_file_is_safe(full_path, results) + + def _is_valid_directory(self, directory): + """ + Determines if the provided directory is a directory that could contain + Underscore.js template files that need to be linted. + + Arguments: + directory: The directory to be linted. + + Returns: + True if this directory should be linted for Underscore.js template + violations and False otherwise. + """ + if _is_skip_dir(self._skip_underscore_dirs, directory): + return False + + return True + + def _load_and_check_underscore_file_is_safe(self, file_full_path, results): + """ + Loads the Underscore.js template file and checks if it is in violation. + + Arguments: + file_full_path: The file to be loaded and linted + + Returns: + The file results containing any violations. + + """ + underscore_template = _load_file(file_full_path) + self.check_underscore_file_is_safe(underscore_template, results) + return results + + def check_underscore_file_is_safe(self, underscore_template, results): + """ + Checks for violations in an Underscore.js template. + + Arguments: + underscore_template: The contents of the Underscore.js template. + results: A file results objects to which violations will be added. + + """ + self._check_underscore_expressions(underscore_template, results) + results.prepare_results(underscore_template) + + def _check_underscore_expressions(self, underscore_template, results): + """ + Searches for Underscore.js expressions that contain violations. + + Arguments: + underscore_template: The contents of the Underscore.js template. + results: A list of results into which violations will be added. + + """ + expressions = self._find_unescaped_expressions(underscore_template) + for expression in expressions: + if not self._is_safe_unescaped_expression(expression): + results.violations.append(ExpressionRuleViolation( + Rules.underscore_not_escaped, expression + )) + + def _is_safe_unescaped_expression(self, expression): + """ + Determines whether an expression is safely escaped, even though it is + using the expression syntax that doesn't itself escape (i.e. <%= ). + + In some cases it is ok to not use the Underscore.js template escape + (i.e. <%- ) because the escaping is happening inside the expression. + + Safe examples:: + + <%= HtmlUtils.ensureHtml(message) %> + <%= _.escape(message) %> + + Arguments: + expression: The Expression being checked. + + Returns: + True if the Expression has been safely escaped, and False otherwise. + + """ + if expression.expression_inner.startswith('HtmlUtils.'): + return True + if expression.expression_inner.startswith('_.escape('): + return True + return False + + def _find_unescaped_expressions(self, underscore_template): + """ + Returns a list of unsafe expressions. + + At this time all expressions that are unescaped are considered unsafe. + + Arguments: + underscore_template: The contents of the Underscore.js template. + + Returns: + A list of Expressions. + """ + unescaped_expression_regex = re.compile("<%=.*?%>", re.DOTALL) + + expressions = [] + for match in unescaped_expression_regex.finditer(underscore_template): + expression = Expression( + match.start(), match.end(), template=underscore_template, start_delim="<%=", end_delim="%>" + ) + expressions.append(expression) + return expressions + + +class JavaScriptLinter(object): + """ + The linter for JavaScript and CoffeeScript files. + """ + + _skip_javascript_dirs = _skip_dirs + ('i18n', 'static/coffee') + _skip_coffeescript_dirs = _skip_dirs + underScoreLinter = UnderscoreTemplateLinter() + + def process_file(self, directory, file_name): + """ + Process file to determine if it is a JavaScript file and + if it is safe. + + Arguments: + directory (string): The directory of the file to be checked + file_name (string): A filename for a potential JavaScript file + + Returns: + The file results containing any violations. + + """ + file_full_path = os.path.normpath(directory + '/' + file_name) + results = FileResults(file_full_path) + + if not results.is_file: + return results + + if file_name.lower().endswith('.js') and not file_name.lower().endswith('.min.js'): + skip_dirs = self._skip_javascript_dirs + elif file_name.lower().endswith('.coffee'): + skip_dirs = self._skip_coffeescript_dirs + else: + return results + + if not self._is_valid_directory(skip_dirs, directory): + return results + + return self._load_and_check_javascript_file_is_safe(file_full_path, results) + + def _is_valid_directory(self, skip_dirs, directory): + """ + Determines if the provided directory is a directory that could contain + a JavaScript file that needs to be linted. + + Arguments: + skip_dirs: The directories to be skipped. + directory: The directory to be linted. + + Returns: + True if this directory should be linted for JavaScript violations + and False otherwise. + """ + if _is_skip_dir(skip_dirs, directory): + return False + + return True + + def _load_and_check_javascript_file_is_safe(self, file_full_path, results): + """ + Loads the JavaScript file and checks if it is in violation. + + Arguments: + file_full_path: The file to be loaded and linted. + + Returns: + The file results containing any violations. + + """ + file_contents = _load_file(file_full_path) + self.check_javascript_file_is_safe(file_contents, results) + return results + + def check_javascript_file_is_safe(self, file_contents, results): + """ + Checks for violations in a JavaScript file. + + Arguments: + file_contents: The contents of the JavaScript file. + results: A file results objects to which violations will be added. + + """ + no_caller_check = None + no_argument_check = None + self._check_jquery_function( + file_contents, "append", Rules.javascript_jquery_append, no_caller_check, + self._is_jquery_argument_safe, results + ) + self._check_jquery_function( + file_contents, "prepend", Rules.javascript_jquery_prepend, no_caller_check, + self._is_jquery_argument_safe, results + ) + self._check_jquery_function( + file_contents, "unwrap|wrap|wrapAll|wrapInner|after|before|replaceAll|replaceWith", + Rules.javascript_jquery_insertion, no_caller_check, self._is_jquery_argument_safe, results + ) + self._check_jquery_function( + file_contents, "appendTo|prependTo|insertAfter|insertBefore", + Rules.javascript_jquery_insert_into_target, self._is_jquery_insert_caller_safe, no_argument_check, results + ) + self._check_jquery_function( + file_contents, "html", Rules.javascript_jquery_html, no_caller_check, + self._is_jquery_html_argument_safe, results + ) + self._check_javascript_interpolate(file_contents, results) + self._check_javascript_escape(file_contents, results) + self._check_concat_with_html(file_contents, results) + self.underScoreLinter.check_underscore_file_is_safe(file_contents, results) + results.prepare_results(file_contents, line_comment_delim='//') + + def _get_expression_for_function(self, file_contents, function_start_match): + """ + Returns an expression that matches the function call opened with + function_start_match. + + Arguments: + file_contents: The contents of the JavaScript file. + function_start_match: A regex match representing the start of the function + call (e.g. ".escape("). + + Returns: + An Expression that best matches the function. + + """ + start_index = function_start_match.start() + inner_start_index = function_start_match.end() + result = _find_closing_char_index( + None, "(", ")", file_contents, start_index=inner_start_index + ) + if result is not None: + end_index = result['close_char_index'] + 1 + expression = Expression( + start_index, end_index, template=file_contents, start_delim=function_start_match.group(), end_delim=")" + ) + else: + expression = Expression(start_index) + return expression + + def _check_javascript_interpolate(self, file_contents, results): + """ + Checks that interpolate() calls are safe. + + Only use of StringUtils.interpolate() or HtmlUtils.interpolateText() + are safe. + + Arguments: + file_contents: The contents of the JavaScript file. + results: A file results objects to which violations will be added. + + """ + # Ignores calls starting with "StringUtils.", because those are safe + regex = re.compile(r"(?'))" + or ".append($('" + message + "
"', 'rule': Rules.javascript_concat_html}, + {'template': ' // var m = "" + commentedOutMessage + "
"', 'rule': None}, + {'template': 'var m = "" + message + "
"', 'rule': Rules.javascript_concat_html}, + {'template': 'var m = "" + message + " broken string', 'rule': Rules.javascript_concat_html}, + ) + def test_concat_with_html(self, data): + """ + Test check_javascript_file_is_safe with concatenating strings and HTML + """ + linter = JavaScriptLinter() + results = FileResults('') + + linter.check_javascript_file_is_safe(data['template'], results) + self._validate_data_rule(data, results) + + @data( + {'template': 'test.append( test.render().el )', 'rule': None}, + {'template': 'test.append(test.render().el)', 'rule': None}, + {'template': 'test.append(test.render().$el)', 'rule': None}, + {'template': 'test.append(testEl)', 'rule': None}, + {'template': 'test.append($test)', 'rule': None}, + # plain text is ok because any & will be escaped, and it stops false + # negatives on some other objects with an append() method + {'template': 'test.append("plain text")', 'rule': None}, + {'template': 'test.append("
")', 'rule': Rules.javascript_jquery_append}, + {'template': 'graph.svg.append("g")', 'rule': None}, + {'template': 'test.append( $( "