#!/usr/bin/env python """ A linting tool to check if templates are safe """ from __future__ import print_function import argparse import ast from enum import Enum import os import re import sys import textwrap class StringLines(object): """ StringLines provides utility methods to work with a string in terms of lines. As an example, it can convert an index into a line number or column number (i.e. index into the line). """ def __init__(self, string): """ Init method. Arguments: string: The string to work with. """ self._string = string self._line_start_indexes = self._process_line_breaks(string) # this is an exclusive index used in the case that the template doesn't # end with a new line self.eof_index = len(string) def _process_line_breaks(self, string): """ Creates a list, where each entry represents the index into the string where the next line break was found. Arguments: string: The string in which to find line breaks. Returns: A list of indices into the string at which each line begins. """ line_start_indexes = [0] index = 0 while True: index = string.find('\n', index) if index < 0: break index += 1 line_start_indexes.append(index) return line_start_indexes def get_string(self): """ Get the original string. """ return self._string def index_to_line_number(self, index): """ Given an index, determines the line of the index. Arguments: index: The index into the original string for which we want to know the line number Returns: The line number of the provided index. """ current_line_number = 0 for line_break_index in self._line_start_indexes: if line_break_index <= index: current_line_number += 1 else: break return current_line_number def index_to_column_number(self, index): """ Gets the column (i.e. index into the line) for the given index into the original string. Arguments: index: The index into the original string. Returns: The column (i.e. index into the line) for the given index into the original string. """ start_index = self.index_to_line_start_index(index) column = index - start_index + 1 return column def index_to_line_start_index(self, index): """ Gets the index of the start of the line of the given index. Arguments: index: The index into the original string. Returns: The index of the start of the line of the given index. """ line_number = self.index_to_line_number(index) return self.line_number_to_start_index(line_number) def index_to_line_end_index(self, index): """ Gets the index of the end of the line of the given index. Arguments: index: The index into the original string. Returns: The index of the end of the line of the given index. """ line_number = self.index_to_line_number(index) return self.line_number_to_end_index(line_number) def line_number_to_start_index(self, line_number): """ Gets the starting index for the provided line number. Arguments: line_number: The line number of the line for which we want to find the start index. Returns: The starting index for the provided line number. """ return self._line_start_indexes[line_number - 1] def line_number_to_end_index(self, line_number): """ Gets the ending index for the provided line number. Arguments: line_number: The line number of the line for which we want to find the end index. Returns: The ending index for the provided line number. """ if line_number < len(self._line_start_indexes): return self._line_start_indexes[line_number] else: # an exclusive index in the case that the file didn't end with a # newline. return self.eof_index def line_number_to_line(self, line_number): """ Gets the line of text designated by the provided line number. Arguments: line_number: The line number of the line we want to find. Returns: The line of text designated by the provided line number. """ start_index = self._line_start_indexes[line_number - 1] if len(self._line_start_indexes) == line_number: line = self._string[start_index:] else: end_index = self._line_start_indexes[line_number] line = self._string[start_index:end_index - 1] return line def line_count(self): """ Gets the number of lines in the string. """ return len(self._line_start_indexes) class Rules(Enum): """ An Enum of each rule which the linter will check. """ # IMPORTANT: Do not edit without also updating the docs: # - http://edx.readthedocs.io/projects/edx-developer-guide/en/latest/conventions/safe_templates.html#safe-template-linter mako_missing_default = 'mako-missing-default' mako_multiple_page_tags = 'mako-multiple-page-tags' mako_unparseable_expression = 'mako-unparseable-expression' mako_unwanted_html_filter = 'mako-unwanted-html-filter' mako_invalid_html_filter = 'mako-invalid-html-filter' mako_invalid_js_filter = 'mako-invalid-js-filter' mako_js_missing_quotes = 'mako-js-missing-quotes' mako_js_html_string = 'mako-js-html-string' mako_html_entities = 'mako-html-entities' mako_unknown_context = 'mako-unknown-context' underscore_not_escaped = 'underscore-not-escaped' javascript_jquery_append = 'javascript-jquery-append' javascript_jquery_prepend = 'javascript-jquery-prepend' javascript_jquery_insertion = 'javascript-jquery-insertion' javascript_jquery_insert_into_target = 'javascript-jquery-insert-into-target' javascript_jquery_html = 'javascript-jquery-html' javascript_concat_html = 'javascript-concat-html' javascript_escape = 'javascript-escape' javascript_interpolate = 'javascript-interpolate' python_concat_html = 'python-concat-html' python_custom_escape = 'python-custom-escape' python_deprecated_display_name = 'python-deprecated-display-name' python_requires_html_or_text = 'python-requires-html-or-text' python_close_before_format = 'python-close-before-format' python_wrap_html = 'python-wrap-html' python_interpolate_html = 'python-interpolate-html' python_parse_error = 'python-parse-error' def __init__(self, rule_id): self.rule_id = rule_id class Expression(object): """ Represents an arbitrary expression. An expression can be any type of code snippet. It will sometimes have a starting and ending delimiter, but not always. Here are some example expressions:: ${x | n, decode.utf8} <%= x %> function(x) "
" + message + "
" Other details of note: - Only a start_index is required for a valid expression. - If end_index is None, it means we couldn't parse the rest of the expression. - All other details of the expression are optional, and are only added if and when supplied and needed for additional checks. They are not necessary for the final results output. """ def __init__(self, start_index, end_index=None, template=None, start_delim="", end_delim="", strings=None): """ Init method. Arguments: start_index: the starting index of the expression end_index: the index immediately following the expression, or None if the expression was unparseable template: optional template code in which the expression was found start_delim: optional starting delimiter of the expression end_delim: optional ending delimeter of the expression strings: optional list of ParseStrings """ self.start_index = start_index self.end_index = end_index self.start_delim = start_delim self.end_delim = end_delim self.strings = strings if template is not None and self.end_index is not None: self.expression = template[start_index:end_index] self.expression_inner = self.expression[len(start_delim):-len(end_delim)].strip() else: self.expression = None self.expression_inner = None class RuleViolation(object): """ Base class representing a rule violation which can be used for reporting. """ def __init__(self, rule): """ Init method. Arguments: rule: The Rule which was violated. """ self.rule = rule self.full_path = '' self.is_disabled = False def _mark_disabled(self, string, scope_start_string=False): """ Performs the disable pragma search and marks the rule as disabled if a matching pragma is found. Pragma format:: safe-lint: disable=violation-name,other-violation-name Arguments: string: The string of code in which to search for the pragma. scope_start_string: True if the pragma must be at the start of the string, False otherwise. The pragma is considered at the start of the string if it has a maximum of 5 non-whitespace characters preceding it. Side Effect: Sets self.is_disabled as appropriate based on whether the pragma is found. """ pragma_match = re.search(r'safe-lint:\s*disable=([a-zA-Z,-]+)', string) if pragma_match is None: return if scope_start_string: spaces_count = string.count(' ', 0, pragma_match.start()) non_space_count = pragma_match.start() - spaces_count if non_space_count > 5: return for disabled_rule in pragma_match.group(1).split(','): if disabled_rule == self.rule.rule_id: self.is_disabled = True return def sort_key(self): """ Returns a key that can be sorted on """ return (0, 0, self.rule.rule_id) def first_line(self): """ Since a file level rule has no first line, returns empty string. """ return '' def prepare_results(self, full_path, string_lines): """ Preps this instance for results reporting. Arguments: full_path: Path of the file in violation. string_lines: A StringLines containing the contents of the file in violation. """ self.full_path = full_path self._mark_disabled(string_lines.get_string()) def print_results(self, _options, out): """ Prints the results represented by this rule violation. Arguments: _options: ignored out: output file """ print("{}: {}".format(self.full_path, self.rule.rule_id), file=out) class ExpressionRuleViolation(RuleViolation): """ A class representing a particular rule violation for expressions which contain more specific details of the location of the violation for reporting purposes. """ def __init__(self, rule, expression): """ Init method. Arguments: rule: The Rule which was violated. expression: The Expression that was in violation. """ super(ExpressionRuleViolation, self).__init__(rule) self.expression = expression self.start_line = 0 self.start_column = 0 self.end_line = 0 self.end_column = 0 self.lines = [] self.is_disabled = False def _mark_expression_disabled(self, string_lines): """ Marks the expression violation as disabled if it finds the disable pragma anywhere on the first line of the violation, or at the start of the line preceding the violation. Pragma format:: safe-lint: disable=violation-name,other-violation-name Examples:: <% // safe-lint: disable=underscore-not-escaped %> <%= gettext('Single Line') %> <%= gettext('Single Line') %><% // safe-lint: disable=underscore-not-escaped %> Arguments: string_lines: A StringLines containing the contents of the file in violation. Side Effect: Sets self.is_disabled as appropriate based on whether the pragma is found. """ # disable pragma can be at the start of the preceding line has_previous_line = self.start_line > 1 if has_previous_line: line_to_check = string_lines.line_number_to_line(self.start_line - 1) self._mark_disabled(line_to_check, scope_start_string=True) if self.is_disabled: return # TODO: this should work at end of any line of the violation # disable pragma can be anywhere on the first line of the violation line_to_check = string_lines.line_number_to_line(self.start_line) self._mark_disabled(line_to_check, scope_start_string=False) def sort_key(self): """ Returns a key that can be sorted on """ return (self.start_line, self.start_column, self.rule.rule_id) def first_line(self): """ Returns the initial line of code of the violation. """ return self.lines[0] def prepare_results(self, full_path, string_lines): """ Preps this instance for results reporting. Arguments: full_path: Path of the file in violation. string_lines: A StringLines containing the contents of the file in violation. """ self.full_path = full_path start_index = self.expression.start_index self.start_line = string_lines.index_to_line_number(start_index) self.start_column = string_lines.index_to_column_number(start_index) end_index = self.expression.end_index if end_index is not None: self.end_line = string_lines.index_to_line_number(end_index) self.end_column = string_lines.index_to_column_number(end_index) else: self.end_line = self.start_line self.end_column = '?' for line_number in range(self.start_line, self.end_line + 1): self.lines.append(string_lines.line_number_to_line(line_number)) self._mark_expression_disabled(string_lines) def print_results(self, options, out): """ Prints the results represented by this rule violation. Arguments: options: A list of the following options: list_files: True to print only file names, and False to print all violations. verbose: True for multiple lines of context, False single line. out: output file """ if options['verbose']: end_line = self.end_line + 1 else: end_line = self.start_line + 1 for line_number in range(self.start_line, end_line): if line_number == self.start_line: column = self.start_column rule_id = self.rule.rule_id + ":" else: column = 1 rule_id = " " * (len(self.rule.rule_id) + 1) line = self.lines[line_number - self.start_line].encode(encoding='utf-8') print("{}: {}:{}: {} {}".format( self.full_path, line_number, column, rule_id, line ), file=out) class SummaryResults(object): """ Contains the summary results for all violations. """ def __init__(self): """ Init method. """ self.total_violations = 0 self.totals_by_rule = dict.fromkeys( [rule.rule_id for rule in Rules.__members__.values()], 0 ) def add_violation(self, violation): """ Adds a violation to the summary details. Arguments: violation: The violation to add to the summary. """ self.total_violations += 1 self.totals_by_rule[violation.rule.rule_id] += 1 def print_results(self, options, out): """ Prints the results (i.e. violations) in this file. Arguments: options: A list of the following options: list_files: True to print only file names, and False to print all violations. rule_totals: If True include totals by rule. out: output file """ if options['list_files'] is False: if options['rule_totals']: max_rule_id_len = max(len(rule_id) for rule_id in self.totals_by_rule) print("", file=out) for rule_id in sorted(self.totals_by_rule.keys()): padding = " " * (max_rule_id_len - len(rule_id)) print("{}: {}{} violations".format(rule_id, padding, self.totals_by_rule[rule_id]), file=out) print("", file=out) # matches output of jshint for simplicity print("", file=out) print("{} violations total".format(self.total_violations), file=out) class FileResults(object): """ Contains the results, or violations, for a file. """ def __init__(self, full_path): """ Init method. Arguments: full_path: The full path for this file. """ self.full_path = full_path self.directory = os.path.dirname(full_path) self.is_file = os.path.isfile(full_path) self.violations = [] def prepare_results(self, file_string, line_comment_delim=None): """ Prepares the results for output for this file. Arguments: file_string: The string of content for this file. line_comment_delim: A string representing the start of a line comment. For example "##" for Mako and "//" for JavaScript. """ string_lines = StringLines(file_string) for violation in self.violations: violation.prepare_results(self.full_path, string_lines) if line_comment_delim is not None: self._filter_commented_code(line_comment_delim) def print_results(self, options, summary_results, out): """ Prints the results (i.e. violations) in this file. Arguments: options: A list of the following options: list_files: True to print only file names, and False to print all violations. summary_results: A SummaryResults with a summary of the violations. verbose: True for multiple lines of context, False single line. out: output file Side effect: Updates the passed SummaryResults. """ if options['list_files']: if self.violations is not None and 0 < len(self.violations): print(self.full_path, file=out) else: self.violations.sort(key=lambda violation: violation.sort_key()) for violation in self.violations: if not violation.is_disabled: violation.print_results(options, out) summary_results.add_violation(violation) def _filter_commented_code(self, line_comment_delim): """ Remove any violations that were found in commented out code. Arguments: line_comment_delim: A string representing the start of a line comment. For example "##" for Mako and "//" for JavaScript. """ self.violations = [v for v in self.violations if not self._is_commented(v, line_comment_delim)] def _is_commented(self, violation, line_comment_delim): """ Checks if violation line is commented out. Arguments: violation: The violation to check line_comment_delim: A string representing the start of a line comment. For example "##" for Mako and "//" for JavaScript. Returns: True if the first line of the violation is actually commented out, False otherwise. """ if 'parse' in violation.rule.rule_id: # For parse rules, don't filter them because the comment could be a # part of the parse issue to begin with. return False else: return violation.first_line().lstrip().startswith(line_comment_delim) class ParseString(object): """ ParseString is the result of parsing a string out of a template. A ParseString has the following attributes: start_index: The index of the first quote, or None if none found end_index: The index following the closing quote, or None if unparseable quote_length: The length of the quote. Could be 3 for a Python triple quote. Or None if none found. string: the text of the parsed string, or None if none found. string_inner: the text inside the quotes of the parsed string, or None if none found. """ def __init__(self, template, start_index, end_index): """ Init method. Arguments: template: The template to be searched. start_index: The start index to search. end_index: The end index to search before. """ self.end_index = None self.quote_length = None self.string = None self.string_inner = None self.start_index = self._find_string_start(template, start_index, end_index) if self.start_index is not None: result = self._parse_string(template, self.start_index) if result is not None: self.end_index = result['end_index'] self.quote_length = result['quote_length'] self.string = result['string'] self.string_inner = result['string_inner'] def _find_string_start(self, template, start_index, end_index): """ Finds the index of the end of start of a string. In other words, the first single or double quote. Arguments: template: The template to be searched. start_index: The start index to search. end_index: The end index to search before. Returns: The start index of the first single or double quote, or None if no quote was found. """ quote_regex = re.compile(r"""['"]""") start_match = quote_regex.search(template, start_index, end_index) if start_match is None: return None else: return start_match.start() def _parse_string(self, template, start_index): """ Finds the indices of a string inside a template. Arguments: template: The template to be searched. start_index: The start index of the open quote. Returns: A dict containing the following, or None if not parseable: end_index: The index following the closing quote quote_length: The length of the quote. Could be 3 for a Python triple quote. string: the text of the parsed string string_inner: the text inside the quotes of the parsed string """ quote = template[start_index] if quote not in ["'", '"']: raise ValueError("start_index must refer to a single or double quote.") triple_quote = quote * 3 if template.startswith(triple_quote, start_index): quote = triple_quote next_start_index = start_index + len(quote) while True: quote_end_index = template.find(quote, next_start_index) backslash_index = template.find("\\", next_start_index) if quote_end_index < 0: return None if 0 <= backslash_index < quote_end_index: next_start_index = backslash_index + 2 else: end_index = quote_end_index + len(quote) quote_length = len(quote) string = template[start_index:end_index] return { 'end_index': end_index, 'quote_length': quote_length, 'string': string, 'string_inner': string[quote_length:-quote_length], } class BaseLinter(object): """ BaseLinter provides some helper functions that are used by multiple linters. """ LINE_COMMENT_DELIM = None def _is_valid_directory(self, skip_dirs, directory): """ Determines if the provided directory is a directory that could contain a file that needs to be linted. Arguments: skip_dirs: The directories to be skipped. directory: The directory to be linted. Returns: True if this directory should be linted for violations and False otherwise. """ if is_skip_dir(skip_dirs, directory): return False return True def _load_file(self, file_full_path): """ Loads a file into a string. Arguments: file_full_path: The full path of the file to be loaded. Returns: A string containing the files contents. """ with open(file_full_path, 'r') as input_file: file_contents = input_file.read() return file_contents.decode(encoding='utf-8') def _load_and_check_file_is_safe(self, file_full_path, lint_function, results): """ Loads the Python file and checks if it is in violation. Arguments: file_full_path: The file to be loaded and linted. lint_function: A function that will lint for violations. It must take two arguments: 1) string contents of the file 2) results object results: A FileResults to be used for this file Returns: The file results containing any violations. """ file_contents = self._load_file(file_full_path) lint_function(file_contents, results) return results def _find_closing_char_index( self, start_delim, open_char, close_char, template, start_index, num_open_chars=0, strings=None ): """ Finds the index of the closing char that matches the opening char. For example, this could be used to find the end of a Mako expression, where the open and close characters would be '{' and '}'. Arguments: start_delim: If provided (e.g. '${' for Mako expressions), the closing character must be found before the next start_delim. open_char: The opening character to be matched (e.g '{') close_char: The closing character to be matched (e.g '}') template: The template to be searched. start_index: The start index of the last open char. num_open_chars: The current number of open chars. strings: A list of ParseStrings already parsed Returns: A dict containing the following, or None if unparseable: close_char_index: The index of the closing character strings: a list of ParseStrings """ strings = [] if strings is None else strings # Find start index of an uncommented line. start_index = self._uncommented_start_index(template, start_index) # loop until we found something useful on an uncommented out line while start_index is not None: close_char_index = template.find(close_char, start_index) if close_char_index < 0: # If we can't find a close char, let's just quit. return None open_char_index = template.find(open_char, start_index, close_char_index) parse_string = ParseString(template, start_index, close_char_index) valid_index_list = [close_char_index] if 0 <= open_char_index: valid_index_list.append(open_char_index) if parse_string.start_index is not None: valid_index_list.append(parse_string.start_index) min_valid_index = min(valid_index_list) start_index = self._uncommented_start_index(template, min_valid_index) if start_index == min_valid_index: break if start_index is None: # No uncommented code to search. return None if parse_string.start_index == min_valid_index: strings.append(parse_string) if parse_string.end_index is None: return None else: return self._find_closing_char_index( start_delim, open_char, close_char, template, start_index=parse_string.end_index, num_open_chars=num_open_chars, strings=strings ) if open_char_index == min_valid_index: if start_delim is not None: # if we find another starting delim, consider this unparseable start_delim_index = template.find(start_delim, start_index, close_char_index) if 0 <= start_delim_index < open_char_index: return None return self._find_closing_char_index( start_delim, open_char, close_char, template, start_index=open_char_index + 1, num_open_chars=num_open_chars + 1, strings=strings ) if num_open_chars == 0: return { 'close_char_index': close_char_index, 'strings': strings, } else: return self._find_closing_char_index( start_delim, open_char, close_char, template, start_index=close_char_index + 1, num_open_chars=num_open_chars - 1, strings=strings ) def _uncommented_start_index(self, template, start_index): """ Finds the first start_index that is on an uncommented line. Arguments: template: The template to be searched. start_index: The start index of the last open char. Returns: If start_index is on an uncommented out line, returns start_index. Otherwise, returns the start_index of the first line that is uncommented, if there is one. Otherwise, returns None. """ if self.LINE_COMMENT_DELIM is not None: line_start_index = StringLines(template).index_to_line_start_index(start_index) uncommented_line_start_index_regex = re.compile("^(?!\s*{})".format(self.LINE_COMMENT_DELIM), re.MULTILINE) # Finds the line start index of the first uncommented line, including the current line. match = uncommented_line_start_index_regex.search(template, line_start_index) if match is None: # No uncommented lines. return None elif match.start() < start_index: # Current line is uncommented, so return original start_index. return start_index else: # Return start of first uncommented line. return match.start() else: # No line comment delimeter, so this acts as a no-op. return start_index class UnderscoreTemplateLinter(BaseLinter): """ The linter for Underscore.js template files. """ def __init__(self): """ Init method. """ super(UnderscoreTemplateLinter, self).__init__() self._skip_underscore_dirs = SKIP_DIRS + ('test',) def process_file(self, directory, file_name): """ Process file to determine if it is an Underscore template file and if it is safe. Arguments: directory (string): The directory of the file to be checked file_name (string): A filename for a potential underscore file Returns: The file results containing any violations. """ full_path = os.path.normpath(directory + '/' + file_name) results = FileResults(full_path) if not self._is_valid_directory(self._skip_underscore_dirs, directory): return results if not file_name.lower().endswith('.underscore'): return results return self._load_and_check_file_is_safe(full_path, self.check_underscore_file_is_safe, results) def check_underscore_file_is_safe(self, underscore_template, results): """ Checks for violations in an Underscore.js template. Arguments: underscore_template: The contents of the Underscore.js template. results: A file results objects to which violations will be added. """ self._check_underscore_expressions(underscore_template, results) results.prepare_results(underscore_template) def _check_underscore_expressions(self, underscore_template, results): """ Searches for Underscore.js expressions that contain violations. Arguments: underscore_template: The contents of the Underscore.js template. results: A list of results into which violations will be added. """ expressions = self._find_unescaped_expressions(underscore_template) for expression in expressions: if not self._is_safe_unescaped_expression(expression): results.violations.append(ExpressionRuleViolation( Rules.underscore_not_escaped, expression )) def _is_safe_unescaped_expression(self, expression): """ Determines whether an expression is safely escaped, even though it is using the expression syntax that doesn't itself escape (i.e. <%= ). In some cases it is ok to not use the Underscore.js template escape (i.e. <%- ) because the escaping is happening inside the expression. Safe examples:: <%= HtmlUtils.ensureHtml(message) %> <%= _.escape(message) %> Arguments: expression: The Expression being checked. Returns: True if the Expression has been safely escaped, and False otherwise. """ if expression.expression_inner.startswith('HtmlUtils.'): return True if expression.expression_inner.startswith('_.escape('): return True return False def _find_unescaped_expressions(self, underscore_template): """ Returns a list of unsafe expressions. At this time all expressions that are unescaped are considered unsafe. Arguments: underscore_template: The contents of the Underscore.js template. Returns: A list of Expressions. """ unescaped_expression_regex = re.compile("<%=.*?%>", re.DOTALL) expressions = [] for match in unescaped_expression_regex.finditer(underscore_template): expression = Expression( match.start(), match.end(), template=underscore_template, start_delim="<%=", end_delim="%>" ) expressions.append(expression) return expressions class JavaScriptLinter(BaseLinter): """ The linter for JavaScript and CoffeeScript files. """ LINE_COMMENT_DELIM = "//" def __init__(self): """ Init method. """ super(JavaScriptLinter, self).__init__() self._skip_javascript_dirs = SKIP_DIRS + ('i18n', 'static/coffee') self._skip_coffeescript_dirs = SKIP_DIRS self.underscore_linter = UnderscoreTemplateLinter() def process_file(self, directory, file_name): """ Process file to determine if it is a JavaScript file and if it is safe. Arguments: directory (string): The directory of the file to be checked file_name (string): A filename for a potential JavaScript file Returns: The file results containing any violations. """ file_full_path = os.path.normpath(directory + '/' + file_name) results = FileResults(file_full_path) if not results.is_file: return results if file_name.lower().endswith('.js') and not file_name.lower().endswith('.min.js'): skip_dirs = self._skip_javascript_dirs elif file_name.lower().endswith('.coffee'): skip_dirs = self._skip_coffeescript_dirs else: return results if not self._is_valid_directory(skip_dirs, directory): return results return self._load_and_check_file_is_safe(file_full_path, self.check_javascript_file_is_safe, results) def check_javascript_file_is_safe(self, file_contents, results): """ Checks for violations in a JavaScript file. Arguments: file_contents: The contents of the JavaScript file. results: A file results objects to which violations will be added. """ no_caller_check = None no_argument_check = None self._check_jquery_function( file_contents, "append", Rules.javascript_jquery_append, no_caller_check, self._is_jquery_argument_safe, results ) self._check_jquery_function( file_contents, "prepend", Rules.javascript_jquery_prepend, no_caller_check, self._is_jquery_argument_safe, results ) self._check_jquery_function( file_contents, "unwrap|wrap|wrapAll|wrapInner|after|before|replaceAll|replaceWith", Rules.javascript_jquery_insertion, no_caller_check, self._is_jquery_argument_safe, results ) self._check_jquery_function( file_contents, "appendTo|prependTo|insertAfter|insertBefore", Rules.javascript_jquery_insert_into_target, self._is_jquery_insert_caller_safe, no_argument_check, results ) self._check_jquery_function( file_contents, "html", Rules.javascript_jquery_html, no_caller_check, self._is_jquery_html_argument_safe, results ) self._check_javascript_interpolate(file_contents, results) self._check_javascript_escape(file_contents, results) self._check_concat_with_html(file_contents, Rules.javascript_concat_html, results) self.underscore_linter.check_underscore_file_is_safe(file_contents, results) results.prepare_results(file_contents, line_comment_delim=self.LINE_COMMENT_DELIM) def _get_expression_for_function(self, file_contents, function_start_match): """ Returns an expression that matches the function call opened with function_start_match. Arguments: file_contents: The contents of the JavaScript file. function_start_match: A regex match representing the start of the function call (e.g. ".escape("). Returns: An Expression that best matches the function. """ start_index = function_start_match.start() inner_start_index = function_start_match.end() result = self._find_closing_char_index( None, "(", ")", file_contents, start_index=inner_start_index ) if result is not None: end_index = result['close_char_index'] + 1 expression = Expression( start_index, end_index, template=file_contents, start_delim=function_start_match.group(), end_delim=")" ) else: expression = Expression(start_index) return expression def _check_javascript_interpolate(self, file_contents, results): """ Checks that interpolate() calls are safe. Only use of StringUtils.interpolate() or HtmlUtils.interpolateText() are safe. Arguments: file_contents: The contents of the JavaScript file. results: A file results objects to which violations will be added. """ # Ignores calls starting with "StringUtils.", because those are safe regex = re.compile(r"(?'))" or ".append($('.*?)%>', re.DOTALL)
for python_block_match in python_block_regex.finditer(mako_template):
self._check_expression_python(
python_code=python_block_match.group('code'),
start_offset=(python_block_match.start() + len('<% ')),
has_page_default=has_page_default,
results=results
)
def _check_expression_python(self, python_code, start_offset, has_page_default, results):
"""
Lint the Python inside a single Python expression in a Mako template.
Arguments:
python_code: The Python contents of an expression.
start_offset: The offset of the Python content inside the original
Mako template.
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
Side effect:
Adds Python violations to results.
"""
python_results = FileResults("")
# Dedent expression internals so it is parseable.
# Note that the final columns reported could be off somewhat.
adjusted_python_code = textwrap.dedent(python_code)
first_letter_match = re.search('\w', python_code)
adjusted_first_letter_match = re.search('\w', adjusted_python_code)
if first_letter_match is not None and adjusted_first_letter_match is not None:
start_offset += (first_letter_match.start() - adjusted_first_letter_match.start())
python_code = adjusted_python_code
root_node = self.python_linter.parse_python_code(python_code, python_results)
self.python_linter.check_python_code_is_safe(python_code, root_node, python_results)
# Check mako expression specific Python rules.
if root_node is not None:
visitor = HtmlStringVisitor(python_code, python_results, True)
visitor.visit(root_node)
for unsafe_html_string_node in visitor.unsafe_html_string_nodes:
python_results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, visitor.node_to_expression(unsafe_html_string_node)
))
if has_page_default:
for over_escaped_entity_string_node in visitor.over_escaped_entity_string_nodes:
python_results.violations.append(ExpressionRuleViolation(
Rules.mako_html_entities, visitor.node_to_expression(over_escaped_entity_string_node)
))
python_results.prepare_results(python_code, line_comment_delim=self.LINE_COMMENT_DELIM)
self._shift_and_add_violations(python_results, start_offset, results)
def _shift_and_add_violations(self, other_linter_results, start_offset, results):
"""
Adds results from a different linter to the Mako results, after shifting
the offset into the original Mako template.
Arguments:
other_linter_results: Results from another linter.
start_offset: The offset of the linted code, a part of the template,
inside the original Mako template.
results: A list of results into which violations will be added.
Side effect:
Adds violations to results.
"""
# translate the violations into the proper location within the original
# Mako template
for violation in other_linter_results.violations:
expression = violation.expression
expression.start_index += start_offset
if expression.end_index is not None:
expression.end_index += start_offset
results.violations.append(ExpressionRuleViolation(violation.rule, expression))
def _check_expression_and_filters(self, mako_template, expression, context, has_page_default, results):
"""
Checks that the filters used in the given Mako expression are valid
for the given context. Adds violation to results if there is a problem.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
context: The context of the page in which the expression was found
(e.g. javascript, html).
has_page_default: True if the page is marked as default, False
otherwise.
results: A list of results into which violations will be added.
"""
if context == 'unknown':
results.violations.append(ExpressionRuleViolation(
Rules.mako_unknown_context, expression
))
return
# Example: finds "| n, h}" when given "${x | n, h}"
filters_regex = re.compile(r'\|([.,\w\s]*)\}')
filters_match = filters_regex.search(expression.expression)
# Check Python code inside expression.
if filters_match is None:
python_code = expression.expression[2:-1]
else:
python_code = expression.expression[2:filters_match.start()]
self._check_expression_python(python_code, expression.start_index + 2, has_page_default, results)
# Check filters.
if filters_match is None:
if context == 'javascript':
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression
))
return
filters = filters_match.group(1).replace(" ", "").split(",")
if filters == ['n', 'decode.utf8']:
# {x | n, decode.utf8} is valid in any context
pass
elif context == 'html':
if filters == ['h']:
if has_page_default:
# suppress this violation if the page default hasn't been set,
# otherwise the template might get less safe
results.violations.append(ExpressionRuleViolation(
Rules.mako_unwanted_html_filter, expression
))
else:
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_html_filter, expression
))
elif context == 'javascript':
self._check_js_expression_not_with_html(mako_template, expression, results)
if filters == ['n', 'dump_js_escaped_json']:
# {x | n, dump_js_escaped_json} is valid
pass
elif filters == ['n', 'js_escaped_string']:
# {x | n, js_escaped_string} is valid, if surrounded by quotes
self._check_js_string_expression_in_quotes(mako_template, expression, results)
else:
results.violations.append(ExpressionRuleViolation(
Rules.mako_invalid_js_filter, expression
))
def _check_js_string_expression_in_quotes(self, mako_template, expression, results):
"""
Checks that a Mako expression using js_escaped_string is surrounded by
quotes.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string = self._find_string_wrapping_expression(mako_template, expression)
if parse_string is None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_js_missing_quotes, expression
))
def _check_js_expression_not_with_html(self, mako_template, expression, results):
"""
Checks that a Mako expression in a JavaScript context does not appear in
a string that also contains HTML.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
results: A list of results into which violations will be added.
"""
parse_string = self._find_string_wrapping_expression(mako_template, expression)
if parse_string is not None and re.search('[<>]', parse_string.string) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.mako_js_html_string, expression
))
def _find_string_wrapping_expression(self, mako_template, expression):
"""
Finds the string wrapping the Mako expression if there is one.
Arguments:
mako_template: The contents of the Mako template.
expression: A Mako Expression.
Returns:
ParseString representing a scrubbed version of the wrapped string,
where the Mako expression was replaced with "${...}", if a wrapped
string was found. Otherwise, returns None if none found.
"""
lines = StringLines(mako_template)
start_index = lines.index_to_line_start_index(expression.start_index)
if expression.end_index is not None:
end_index = lines.index_to_line_end_index(expression.end_index)
else:
return None
# scrub out the actual expression so any code inside the expression
# doesn't interfere with rules applied to the surrounding code (i.e.
# checking JavaScript).
scrubbed_lines = "".join((
mako_template[start_index:expression.start_index],
"${...}",
mako_template[expression.end_index:end_index]
))
adjusted_start_index = expression.start_index - start_index
start_index = 0
while True:
parse_string = ParseString(scrubbed_lines, start_index, len(scrubbed_lines))
# check for validly parsed string
if 0 <= parse_string.start_index < parse_string.end_index:
# check if expression is contained in the given string
if parse_string.start_index < adjusted_start_index < parse_string.end_index:
return parse_string
else:
# move to check next string
start_index = parse_string.end_index
else:
break
return None
def _get_contexts(self, mako_template):
"""
Returns a data structure that represents the indices at which the
template changes from HTML context to JavaScript and back.
Return:
A list of dicts where each dict contains:
- index: the index of the context.
- type: the context type (e.g. 'html' or 'javascript').
"""
contexts_re = re.compile(
r"""
| # script tag start
| # script tag end
<%static:require_module(_async)?.*?> | # require js script tag start (optionally the _async version)
%static:require_module(_async)?> | # require js script tag end (optionally the _async version)
<%block[ ]*name=['"]requirejs['"]\w*> | # require js tag start
%block> # require js tag end
""",
re.VERBOSE | re.IGNORECASE
)
media_type_re = re.compile(r"""type=['"].*?['"]""", re.IGNORECASE)
contexts = [{'index': 0, 'type': 'html'}]
javascript_types = [
'text/javascript', 'text/ecmascript', 'application/ecmascript', 'application/javascript',
'text/x-mathjax-config', 'json/xblock-args'
]
html_types = ['text/template']
for context in contexts_re.finditer(mako_template):
match_string = context.group().lower()
if match_string.startswith("