Merge pull request #11800 from edx/robrap/safe-template-linter
TNL-4214: Add safe template linter
This commit is contained in:
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
852
scripts/safe_template_linter.py
Executable file
852
scripts/safe_template_linter.py
Executable file
@@ -0,0 +1,852 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
A linting tool to check if templates are safe
|
||||
"""
|
||||
from enum import Enum
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
_skip_dirs = (
|
||||
'/node_modules',
|
||||
'/vendor',
|
||||
'/spec',
|
||||
'/.pycharm_helpers',
|
||||
'/test_root',
|
||||
'/reports/diff_quality',
|
||||
'/common/static/xmodule/modules',
|
||||
)
|
||||
_skip_mako_dirs = _skip_dirs
|
||||
_skip_underscore_dirs = _skip_dirs + ('/test',)
|
||||
|
||||
|
||||
def _is_skip_dir(skip_dirs, directory):
|
||||
"""
|
||||
Determines whether a directory should be skipped or linted.
|
||||
|
||||
Arguments:
|
||||
skip_dirs: The configured directories to be skipped.
|
||||
directory: The current directory to be tested.
|
||||
|
||||
Returns:
|
||||
True if the directory should be skipped, and False otherwise.
|
||||
|
||||
"""
|
||||
for skip_dir in skip_dirs:
|
||||
dir_contains_skip_dir = (directory.find(skip_dir + '/') >= 0)
|
||||
if dir_contains_skip_dir or directory.endswith(skip_dir):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _load_file(self, file_full_path):
|
||||
"""
|
||||
Loads a file into a string.
|
||||
|
||||
Arguments:
|
||||
file_full_path: The full path of the file to be loaded.
|
||||
|
||||
Returns:
|
||||
A string containing the files contents.
|
||||
"""
|
||||
with open(file_full_path, 'r') as input_file:
|
||||
file_contents = input_file.read()
|
||||
return file_contents.decode(encoding='utf-8')
|
||||
|
||||
|
||||
def _get_line_breaks(self, string):
|
||||
"""
|
||||
Creates a list, where each entry represents the index into the string where
|
||||
the next line break was found.
|
||||
|
||||
Arguments:
|
||||
string: The string in which to find line breaks.
|
||||
|
||||
Returns:
|
||||
A list of indices into the string at which each line break can be
|
||||
found.
|
||||
|
||||
"""
|
||||
line_breaks = [0]
|
||||
index = 0
|
||||
while True:
|
||||
index = string.find('\n', index)
|
||||
if index < 0:
|
||||
break
|
||||
index += 1
|
||||
line_breaks.append(index)
|
||||
return line_breaks
|
||||
|
||||
|
||||
def _get_line_number(self, line_breaks, index):
|
||||
"""
|
||||
Given the list of line break indices, and an index, determines the line of
|
||||
the index.
|
||||
|
||||
Arguments:
|
||||
line_breaks: A list of indices into a string at which each line break
|
||||
was found.
|
||||
index: The index into the original string for which we want to know the
|
||||
line number
|
||||
|
||||
Returns:
|
||||
The line number of the provided index.
|
||||
|
||||
"""
|
||||
current_line_number = 0
|
||||
for line_break_index in line_breaks:
|
||||
if line_break_index <= index:
|
||||
current_line_number += 1
|
||||
else:
|
||||
break
|
||||
return current_line_number
|
||||
|
||||
|
||||
def _get_line(self, string, line_breaks, line_number):
|
||||
"""
|
||||
Gets the line of text designated by the provided line number.
|
||||
|
||||
Arguments:
|
||||
string: The string of content with line breaks.
|
||||
line_breaks: A list of indices into a string at which each line break
|
||||
was found.
|
||||
line_number: The line number of the line we want to find.
|
||||
|
||||
Returns:
|
||||
The line of text designated by the provided line number.
|
||||
|
||||
"""
|
||||
start_index = line_breaks[line_number - 1]
|
||||
if len(line_breaks) == line_number:
|
||||
line = string[start_index:]
|
||||
else:
|
||||
end_index = line_breaks[line_number]
|
||||
line = string[start_index:end_index - 1]
|
||||
return line.encode(encoding='utf-8')
|
||||
|
||||
|
||||
def _get_column_number(self, line_breaks, line_number, index):
|
||||
"""
|
||||
Gets the column (i.e. index into the line) for the given index into the
|
||||
original string.
|
||||
|
||||
Arguments:
|
||||
line_breaks: A list of indices into a string at which each line break
|
||||
was found.
|
||||
line_number: The line number of the line we want to find.
|
||||
index: The index into the original string.
|
||||
|
||||
Returns:
|
||||
The column (i.e. index into the line) for the given index into the
|
||||
original string.
|
||||
|
||||
"""
|
||||
start_index = line_breaks[line_number - 1]
|
||||
column = index - start_index + 1
|
||||
return column
|
||||
|
||||
|
||||
class Rules(Enum):
|
||||
"""
|
||||
An Enum of each rule which the linter will check.
|
||||
"""
|
||||
mako_missing_default = ('mako-missing-default', 'The default page directive with h filter is missing.')
|
||||
mako_unparsable_expression = ('mako-unparsable-expression', 'The expression could not be properly parsed.')
|
||||
mako_unwanted_html_filter = ('mako-unwanted-html-filter', 'Remove explicit h filters when it is provided by the page directive.')
|
||||
mako_invalid_html_filter = ('mako-invalid-html-filter', 'The expression is using an invalid filter in an HTML context.')
|
||||
mako_invalid_js_filter = ('mako-invalid-js-filter', 'The expression is using an invalid filter in a JavaScript context.')
|
||||
mako_js_string_missing_quotes = ('mako-js-string-missing-quotes', 'An expression using the js_escape_string filter must have surrounding quotes.')
|
||||
|
||||
underscore_not_escaped = ('underscore-not-escaped', 'Expressions should be escaped using <%- expression %>.')
|
||||
|
||||
def __init__(self, rule_id, rule_summary):
|
||||
self.rule_id = rule_id
|
||||
self.rule_summary = rule_summary
|
||||
|
||||
|
||||
class RuleViolation(object):
|
||||
"""
|
||||
Base class representing a rule violation which can be used for reporting.
|
||||
"""
|
||||
|
||||
def __init__(self, rule):
|
||||
"""
|
||||
Init method.
|
||||
|
||||
Arguments:
|
||||
rule: The Rule which was violated.
|
||||
|
||||
"""
|
||||
self.rule = rule
|
||||
self.full_path = ''
|
||||
|
||||
def prepare_results(self, full_path, file_string, line_breaks):
|
||||
"""
|
||||
Preps this instance for results reporting.
|
||||
|
||||
Arguments:
|
||||
full_path: Path of the file in violation.
|
||||
file_string: The contents of the file in violation.
|
||||
line_breaks: A list of indices into file_string at which each line
|
||||
break was found.
|
||||
|
||||
"""
|
||||
self.full_path = full_path
|
||||
|
||||
def print_results(self):
|
||||
"""
|
||||
Prints the results represented by this rule violation.
|
||||
"""
|
||||
print "{}: {}".format(self.full_path, self.rule.rule_id)
|
||||
|
||||
|
||||
class ExpressionRuleViolation(RuleViolation):
|
||||
"""
|
||||
A class representing a particular rule violation for expressions which
|
||||
contain more specific details of the location of the violation for reporting
|
||||
purposes.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, rule, expression):
|
||||
"""
|
||||
Init method.
|
||||
|
||||
Arguments:
|
||||
rule: The Rule which was violated.
|
||||
expression: The expression that was in violation.
|
||||
|
||||
"""
|
||||
super(ExpressionRuleViolation, self).__init__(rule)
|
||||
self.expression = expression
|
||||
self.start_line = 0
|
||||
self.start_column = 0
|
||||
self.end_line = 0
|
||||
self.end_column = 0
|
||||
self.lines = []
|
||||
|
||||
def prepare_results(self, full_path, file_string, line_breaks):
|
||||
"""
|
||||
Preps this instance for results reporting.
|
||||
|
||||
Arguments:
|
||||
full_path: Path of the file in violation.
|
||||
file_string: The contents of the file in violation.
|
||||
line_breaks: A list of indices into file_string at which each line
|
||||
break was found.
|
||||
|
||||
"""
|
||||
self.full_path = full_path
|
||||
start_index = self.expression['start_index']
|
||||
self.start_line = _get_line_number(self, line_breaks, start_index)
|
||||
self.start_column = _get_column_number(self, line_breaks, self.start_line, start_index)
|
||||
end_index = self.expression['end_index']
|
||||
if end_index > 0:
|
||||
self.end_line = _get_line_number(self, line_breaks, end_index)
|
||||
self.end_column = _get_column_number(self, line_breaks, self.end_line, end_index)
|
||||
else:
|
||||
self.end_line = self.start_line
|
||||
self.end_column = '?'
|
||||
for line_number in range(self.start_line, self.end_line + 1):
|
||||
self.lines.append(_get_line(self, file_string, line_breaks, line_number))
|
||||
|
||||
def print_results(self):
|
||||
"""
|
||||
Prints the results represented by this rule violation.
|
||||
"""
|
||||
for line_number in range(self.start_line, self.end_line + 1):
|
||||
if (line_number == self.start_line):
|
||||
column = self.start_column
|
||||
rule_id = self.rule.rule_id + ":"
|
||||
else:
|
||||
column = 1
|
||||
rule_id = " " * (len(self.rule.rule_id) + 1)
|
||||
print "{}: {}:{}: {} {}".format(
|
||||
self.full_path,
|
||||
line_number,
|
||||
column,
|
||||
rule_id,
|
||||
self.lines[line_number - self.start_line - 1]
|
||||
)
|
||||
|
||||
|
||||
class FileResults(object):
|
||||
"""
|
||||
Contains the results, or violations, for a file.
|
||||
"""
|
||||
|
||||
def __init__(self, full_path):
|
||||
"""
|
||||
Init method.
|
||||
|
||||
Arguments:
|
||||
full_path: The full path for this file.
|
||||
|
||||
"""
|
||||
self.full_path = full_path
|
||||
self.violations = []
|
||||
|
||||
def prepare_results(self, file_string):
|
||||
"""
|
||||
Prepares the results for output for this file.
|
||||
|
||||
Arguments:
|
||||
file_string: The string of content for this file.
|
||||
|
||||
"""
|
||||
line_breaks = _get_line_breaks(self, file_string)
|
||||
for violation in self.violations:
|
||||
violation.prepare_results(self.full_path, file_string, line_breaks)
|
||||
|
||||
def print_results(self, options):
|
||||
"""
|
||||
Prints the results (i.e. violations) in this file.
|
||||
|
||||
Arguments:
|
||||
options: A list of the following options:
|
||||
is_quiet: True to print only file names, and False to print
|
||||
all violations.
|
||||
|
||||
"""
|
||||
if options['is_quiet']:
|
||||
print self.full_path
|
||||
else:
|
||||
for violation in self.violations:
|
||||
violation.print_results()
|
||||
|
||||
|
||||
class MakoTemplateLinter(object):
|
||||
"""
|
||||
The linter for Mako template files.
|
||||
"""
|
||||
|
||||
_skip_mako_dirs = _skip_dirs
|
||||
|
||||
def process_file(self, directory, file_name):
|
||||
"""
|
||||
Process file to determine if it is a Mako template file and
|
||||
if it is safe.
|
||||
|
||||
Arguments:
|
||||
directory (string): The directory of the file to be checked
|
||||
file_name (string): A filename for a potential Mako file
|
||||
|
||||
Returns:
|
||||
The file results containing any violations, or None if the file is
|
||||
never checked.
|
||||
|
||||
"""
|
||||
if not self._is_mako_directory(directory):
|
||||
return None
|
||||
|
||||
# TODO: When safe-by-default is turned on at the platform level, will we:
|
||||
# 1. Turn it on for .html only, or
|
||||
# 2. Turn it on for all files, and have different rulesets that have
|
||||
# different rules of .xml, .html, .js, .txt Mako templates (e.g. use
|
||||
# the n filter to turn off h for some of these)?
|
||||
# For now, we only check .html and .xml files
|
||||
if not (file_name.lower().endswith('.html') or file_name.lower().endswith('.xml')):
|
||||
return None
|
||||
|
||||
return self._load_and_check_mako_file_is_safe(directory + '/' + file_name)
|
||||
|
||||
def _is_mako_directory(self, directory):
|
||||
"""
|
||||
Determines if the provided directory is a directory that could contain
|
||||
Mako template files that need to be linted.
|
||||
|
||||
Arguments:
|
||||
directory: The directory to be linted.
|
||||
|
||||
Returns:
|
||||
True if this directory should be linted for Mako template violations
|
||||
and False otherwise.
|
||||
"""
|
||||
if _is_skip_dir(self._skip_mako_dirs, directory):
|
||||
return False
|
||||
|
||||
if (directory.find('/templates/') >= 0) or directory.endswith('/templates'):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _load_and_check_mako_file_is_safe(self, mako_file_full_path):
|
||||
"""
|
||||
Loads the Mako template file and checks if it is in violation.
|
||||
|
||||
Arguments:
|
||||
mako_file_full_path: The file to be loaded and linted.
|
||||
|
||||
Returns:
|
||||
The file results containing any violations, or None if none found.
|
||||
|
||||
"""
|
||||
mako_template = _load_file(self, mako_file_full_path)
|
||||
results = FileResults(mako_file_full_path)
|
||||
self._check_mako_file_is_safe(mako_template, results)
|
||||
if len(results.violations) > 0:
|
||||
return results
|
||||
else:
|
||||
return None
|
||||
|
||||
def _check_mako_file_is_safe(self, mako_template, results):
|
||||
"""
|
||||
Checks for violations in a Mako template.
|
||||
|
||||
Arguments:
|
||||
mako_template: The contents of the Mako template.
|
||||
results: A file results objects to which violations will be added.
|
||||
|
||||
"""
|
||||
has_page_default = self._has_page_default(mako_template, results)
|
||||
if not has_page_default:
|
||||
results.violations.append(RuleViolation(Rules.mako_missing_default))
|
||||
self._check_mako_expressions(mako_template, has_page_default, results)
|
||||
results.prepare_results(mako_template)
|
||||
|
||||
def _has_page_default(self, mako_template, results):
|
||||
"""
|
||||
Checks if the Mako template contains the page expression marking it as
|
||||
safe by default.
|
||||
|
||||
Arguments:
|
||||
mako_template: The contents of the Mako template.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
page_h_filter_regex = re.compile('<%page expression_filter=(?:"h"|\'h\')\s*/>')
|
||||
page_match = page_h_filter_regex.search(mako_template)
|
||||
return page_match
|
||||
|
||||
def _check_mako_expressions(self, mako_template, has_page_default, results):
|
||||
"""
|
||||
Searches for Mako expressions and then checks if they contain
|
||||
violations.
|
||||
|
||||
Arguments:
|
||||
mako_template: The contents of the Mako template.
|
||||
has_page_default: True if the page is marked as default, False
|
||||
otherwise.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
expressions = self._find_mako_expressions(mako_template)
|
||||
contexts = self._get_contexts(mako_template)
|
||||
for expression in expressions:
|
||||
if expression['expression'] is None:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.mako_unparsable_expression, expression
|
||||
))
|
||||
continue
|
||||
|
||||
context = self._get_context(contexts, expression['start_index'])
|
||||
self._check_filters(mako_template, expression, context, has_page_default, results)
|
||||
|
||||
def _check_filters(self, mako_template, expression, context, has_page_default, results):
|
||||
"""
|
||||
Checks that the filters used in the given Mako expression are valid
|
||||
for the given context.
|
||||
|
||||
Arguments:
|
||||
mako_template: The contents of the Mako template.
|
||||
expression: A dict containing the start_index, end_index, and
|
||||
expression (text) of the expression.
|
||||
context: The context of the page in which the expression was found
|
||||
(e.g. javascript, html).
|
||||
has_page_default: True if the page is marked as default, False
|
||||
otherwise.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
# finds "| n, h}" when given "${x | n, h}"
|
||||
filters_regex = re.compile('\|[a-zA-Z_,\s]*\}')
|
||||
filters_match = filters_regex.search(expression['expression'])
|
||||
if filters_match is None:
|
||||
if context == 'javascript':
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.mako_invalid_js_filter, expression
|
||||
))
|
||||
return
|
||||
|
||||
filters = filters_match.group()[1:-1].replace(" ", "").split(",")
|
||||
if context == 'html':
|
||||
if (len(filters) == 1) and (filters[0] == 'h'):
|
||||
if has_page_default:
|
||||
# suppress this violation if the page default hasn't been set,
|
||||
# otherwise the template might get less safe
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.mako_unwanted_html_filter, expression
|
||||
))
|
||||
elif (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'dump_html_escaped_json'):
|
||||
# {x | n, dump_html_escaped_json} is valid
|
||||
pass
|
||||
else:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.mako_invalid_html_filter, expression
|
||||
))
|
||||
|
||||
else:
|
||||
if (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'dump_js_escaped_json'):
|
||||
# {x | n, dump_js_escaped_json} is valid
|
||||
pass
|
||||
elif (len(filters) == 2) and (filters[0] == 'n') and (filters[1] == 'js_escaped_string'):
|
||||
# {x | n, js_escaped_string} is valid, if surrounded by quotes
|
||||
prior_character = mako_template[expression['start_index'] - 1]
|
||||
next_character = mako_template[expression['end_index'] + 1]
|
||||
has_surrounding_quotes = (prior_character == '\'' and next_character == '\'') or \
|
||||
(prior_character == '"' and next_character == '"')
|
||||
if not has_surrounding_quotes:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.mako_js_string_missing_quotes, expression
|
||||
))
|
||||
else:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.mako_invalid_js_filter, expression
|
||||
))
|
||||
|
||||
def _get_contexts(self, mako_template):
|
||||
"""
|
||||
Returns a data structure that represents the indices at which the
|
||||
template changes from HTML context to JavaScript and back.
|
||||
|
||||
Return:
|
||||
A list of dicts where each dict contains the 'index' of the context
|
||||
and the context 'type' (e.g. 'html' or 'javascript').
|
||||
"""
|
||||
contexts_re = re.compile(r"""
|
||||
<script.*?>| # script tag start
|
||||
</script>| # script tag end
|
||||
<%static:require_module.*?>| # require js script tag start
|
||||
</%static:require_module> # require js script tag end""", re.VERBOSE + re.IGNORECASE)
|
||||
media_type_re = re.compile(r"""type=['"].*?['"]""", re.IGNORECASE)
|
||||
|
||||
contexts = [{'index': 0, 'type': 'html'}]
|
||||
for context in contexts_re.finditer(mako_template):
|
||||
match_string = context.group().lower()
|
||||
if match_string.startswith("<script"):
|
||||
match_type = media_type_re.search(match_string)
|
||||
context_type = 'javascript'
|
||||
if match_type is not None:
|
||||
# get media type (e.g. get text/javascript from
|
||||
# type="text/javascript")
|
||||
match_type = match_type.group()[6:-1].lower()
|
||||
if match_type not in [
|
||||
'text/javascript',
|
||||
'text/ecmascript',
|
||||
'application/ecmascript',
|
||||
'application/javascript',
|
||||
]:
|
||||
#TODO: What are other types found, and are these really
|
||||
# html? Or do we need to properly handle unknown
|
||||
# contexts?
|
||||
context_type = 'html'
|
||||
contexts.append({'index': context.end(), 'type': context_type})
|
||||
elif match_string.startswith("</script"):
|
||||
contexts.append({'index': context.start(), 'type': 'html'})
|
||||
elif match_string.startswith("<%static:require_module"):
|
||||
contexts.append({'index': context.end(), 'type': 'javascript'})
|
||||
else:
|
||||
contexts.append({'index': context.start(), 'type': 'html'})
|
||||
|
||||
return contexts
|
||||
|
||||
def _get_context(self, contexts, index):
|
||||
"""
|
||||
Gets the context (e.g. javascript, html) of the template at the given
|
||||
index.
|
||||
|
||||
Arguments:
|
||||
contexts: A list of dicts where each dict contains the 'index' of the context
|
||||
and the context 'type' (e.g. 'html' or 'javascript').
|
||||
index: The index for which we want the context.
|
||||
|
||||
Returns:
|
||||
The context (e.g. javascript or html) for the given index.
|
||||
"""
|
||||
current_context = contexts[0]['type']
|
||||
for context in contexts:
|
||||
if context['index'] <= index:
|
||||
current_context = context['type']
|
||||
else:
|
||||
break
|
||||
return current_context
|
||||
|
||||
def _find_mako_expressions(self, mako_template):
|
||||
"""
|
||||
Finds all the Mako expressions in a Mako template and creates a list
|
||||
of dicts for each expression.
|
||||
|
||||
Arguments:
|
||||
mako_template: The content of the Mako template.
|
||||
|
||||
Returns:
|
||||
A list of dicts for each expression, where the dict contains the
|
||||
following:
|
||||
|
||||
start_index: The index of the start of the expression.
|
||||
end_index: The index of the end of the expression, or -1 if
|
||||
unparseable.
|
||||
expression: The text of the expression.
|
||||
"""
|
||||
start_delim = '${'
|
||||
start_index = 0
|
||||
expressions = []
|
||||
|
||||
while True:
|
||||
start_index = mako_template.find(start_delim, start_index)
|
||||
if (start_index < 0):
|
||||
break
|
||||
end_index = self._find_balanced_end_curly(mako_template, start_index + len(start_delim), 0)
|
||||
|
||||
if end_index < 0:
|
||||
expression = None
|
||||
else:
|
||||
expression = mako_template[start_index:end_index + 1]
|
||||
|
||||
expression = {
|
||||
'start_index': start_index,
|
||||
'end_index': end_index,
|
||||
'expression': expression
|
||||
}
|
||||
expressions.append(expression)
|
||||
|
||||
# end_index of -1 represents a parsing error and we may find others
|
||||
start_index = max(start_index + len(start_delim), end_index)
|
||||
|
||||
return expressions
|
||||
|
||||
def _find_balanced_end_curly(self, mako_template, start_index, num_open_curlies):
|
||||
"""
|
||||
Finds the end index of the Mako expression's ending curly brace. Skips
|
||||
any additional open/closed braces that are balanced inside. Does not
|
||||
take into consideration strings.
|
||||
|
||||
Arguments:
|
||||
mako_template: The template text.
|
||||
start_index: The start index of the Mako expression.
|
||||
num_open_curlies: The current number of open expressions.
|
||||
|
||||
Returns:
|
||||
The end index of the expression, or -1 if unparseable.
|
||||
"""
|
||||
end_curly_index = mako_template.find('}', start_index)
|
||||
if end_curly_index < 0:
|
||||
# if we can't find an end_curly, let's just quit
|
||||
return end_curly_index
|
||||
|
||||
open_curly_index = mako_template.find('{', start_index, end_curly_index)
|
||||
|
||||
if (open_curly_index >= 0) and (open_curly_index < end_curly_index):
|
||||
if mako_template[open_curly_index - 1] == '$':
|
||||
# assume if we find "${" it is the start of the next expression
|
||||
# and we have a parse error
|
||||
return -1
|
||||
else:
|
||||
return self._find_balanced_end_curly(mako_template, open_curly_index + 1, num_open_curlies + 1)
|
||||
|
||||
if num_open_curlies == 0:
|
||||
return end_curly_index
|
||||
else:
|
||||
return self._find_balanced_end_curly(mako_template, end_curly_index + 1, num_open_curlies - 1)
|
||||
|
||||
|
||||
class UnderscoreTemplateLinter(object):
|
||||
"""
|
||||
The linter for Underscore.js template files.
|
||||
"""
|
||||
|
||||
_skip_underscore_dirs = _skip_dirs
|
||||
|
||||
def process_file(self, directory, file_name):
|
||||
"""
|
||||
Process file to determine if it is an Underscore template file and
|
||||
if it is safe.
|
||||
|
||||
Arguments:
|
||||
directory (string): The directory of the file to be checked
|
||||
file_name (string): A filename for a potential underscore file
|
||||
|
||||
Returns:
|
||||
The file results containing any violations, or None if the file is
|
||||
never checked.
|
||||
|
||||
"""
|
||||
if not self._is_underscore_directory(directory):
|
||||
return
|
||||
|
||||
if not file_name.lower().endswith('.underscore'):
|
||||
return
|
||||
|
||||
full_path = directory + '/' + file_name
|
||||
return self._load_and_check_underscore_file_is_safe(full_path)
|
||||
|
||||
def print_results(self, options):
|
||||
"""
|
||||
Prints all results (i.e. violations) for all files that failed this
|
||||
linter.
|
||||
|
||||
Arguments:
|
||||
options: A list of the options.
|
||||
|
||||
"""
|
||||
for result in self._results:
|
||||
result.print_results(options)
|
||||
|
||||
def _is_underscore_directory(self, directory):
|
||||
"""
|
||||
Determines if the provided directory is a directory that could contain
|
||||
Underscore.js template files that need to be linted.
|
||||
|
||||
Arguments:
|
||||
directory: The directory to be linted.
|
||||
|
||||
Returns:
|
||||
True if this directory should be linted for Underscore.js template
|
||||
violations and False otherwise.
|
||||
"""
|
||||
if _is_skip_dir(self._skip_underscore_dirs, directory):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _load_and_check_underscore_file_is_safe(self, file_full_path):
|
||||
"""
|
||||
Loads the Underscore.js template file and checks if it is in violation.
|
||||
|
||||
Arguments:
|
||||
file_full_path: The file to be loaded and linted
|
||||
|
||||
Returns:
|
||||
The file results containing any violations, or None if the file is
|
||||
never checked.
|
||||
|
||||
"""
|
||||
underscore_template = _load_file(self, file_full_path)
|
||||
results = FileResults(file_full_path)
|
||||
self._check_underscore_file_is_safe(underscore_template, results)
|
||||
if len(results.violations) > 0:
|
||||
return results
|
||||
else:
|
||||
return None
|
||||
|
||||
def _check_underscore_file_is_safe(self, underscore_template, results):
|
||||
"""
|
||||
Checks for violations in an Underscore.js template.
|
||||
|
||||
Arguments:
|
||||
underscore_template: The contents of the Underscore.js template.
|
||||
results: A file results objects to which violations will be added.
|
||||
|
||||
"""
|
||||
self._check_underscore_expressions(underscore_template, results)
|
||||
results.prepare_results(underscore_template)
|
||||
|
||||
def _check_underscore_expressions(self, underscore_template, results):
|
||||
"""
|
||||
Searches for Underscore.js expressions that contain violations.
|
||||
|
||||
Arguments:
|
||||
underscore_template: The contents of the Underscore.js template.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
expressions = self._find_unescaped_expressions(underscore_template)
|
||||
for expression in expressions:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.underscore_not_escaped, expression
|
||||
))
|
||||
|
||||
def _find_unescaped_expressions(self, underscore_template):
|
||||
"""
|
||||
Returns a list of unsafe expressions.
|
||||
|
||||
At this time all expressions that are unescaped are considered unsafe.
|
||||
|
||||
Arguments:
|
||||
underscore_template: The contents of the Underscore.js template.
|
||||
|
||||
Returns:
|
||||
A list of dicts for each expression, where the dict contains the
|
||||
following:
|
||||
|
||||
start_index: The index of the start of the expression.
|
||||
end_index: The index of the end of the expression.
|
||||
expression: The text of the expression.
|
||||
"""
|
||||
unescaped_expression_regex = re.compile("<%=.*?%>")
|
||||
|
||||
expressions = []
|
||||
for match in unescaped_expression_regex.finditer(underscore_template):
|
||||
expression = {
|
||||
'start_index': match.start(),
|
||||
'end_index': match.end(),
|
||||
'expression': match.group(),
|
||||
}
|
||||
expressions.append(expression)
|
||||
|
||||
return expressions
|
||||
|
||||
|
||||
def _process_current_walk(current_walk, template_linters, options):
|
||||
"""
|
||||
For each linter, lints all the files in the current os walk. This means
|
||||
finding and printing violations.
|
||||
|
||||
Arguments:
|
||||
current_walk: A walk returned by os.walk().
|
||||
template_linters: A list of linting objects.
|
||||
options: A list of the options.
|
||||
|
||||
"""
|
||||
walk_directory = current_walk[0]
|
||||
walk_files = current_walk[2]
|
||||
for walk_file in walk_files:
|
||||
for template_linter in template_linters:
|
||||
results = template_linter.process_file(walk_directory, walk_file)
|
||||
if results is not None:
|
||||
results.print_results(options)
|
||||
|
||||
|
||||
def _process_os_walk(starting_dir, template_linters, options):
|
||||
"""
|
||||
For each linter, lints all the directories in the starting directory.
|
||||
|
||||
Arguments:
|
||||
starting_dir: The initial directory to begin the walk.
|
||||
template_linters: A list of linting objects.
|
||||
options: A list of the options.
|
||||
|
||||
"""
|
||||
for current_walk in os.walk(starting_dir):
|
||||
_process_current_walk(current_walk, template_linters, options)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Used to execute the linter. Use --help option for help.
|
||||
|
||||
Prints all of the violations.
|
||||
"""
|
||||
#TODO: Use click
|
||||
if '--help' in sys.argv:
|
||||
print "Check that templates are safe."
|
||||
print "Options:"
|
||||
print " --quiet Just display the filenames with violations."
|
||||
print
|
||||
print "Rules:"
|
||||
for rule in Rules.__members__.values():
|
||||
print " {0[0]}: {0[1]}".format(rule.value)
|
||||
return
|
||||
|
||||
is_quiet = '--quiet' in sys.argv
|
||||
|
||||
options = {
|
||||
'is_quiet': is_quiet,
|
||||
}
|
||||
|
||||
template_linters = [MakoTemplateLinter(), UnderscoreTemplateLinter()]
|
||||
_process_os_walk('.', template_linters, options)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
scripts/tests/__init__.py
Normal file
0
scripts/tests/__init__.py
Normal file
315
scripts/tests/test_safe_template_linter.py
Normal file
315
scripts/tests/test_safe_template_linter.py
Normal file
@@ -0,0 +1,315 @@
|
||||
"""
|
||||
Tests for safe_template_linter.py
|
||||
"""
|
||||
from ddt import ddt, data
|
||||
import textwrap
|
||||
from unittest import TestCase
|
||||
|
||||
from ..safe_template_linter import (
|
||||
FileResults, MakoTemplateLinter, Rules
|
||||
)
|
||||
|
||||
|
||||
@ddt
|
||||
class TestMakoTemplateLinter(TestCase):
|
||||
"""
|
||||
Test MakoTemplateLinter
|
||||
"""
|
||||
|
||||
@data(
|
||||
{'directory': 'lms/templates', 'expected': True},
|
||||
{'directory': 'lms/templates/support', 'expected': True},
|
||||
{'directory': 'lms/templates/support', 'expected': True},
|
||||
{'directory': './test_root/staticfiles/templates', 'expected': False},
|
||||
{'directory': './some/random/path', 'expected': False},
|
||||
)
|
||||
def test_is_mako_directory(self, data):
|
||||
"""
|
||||
Test _is_mako_directory correctly determines mako directories
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
|
||||
self.assertEqual(linter._is_mako_directory(data['directory']), data['expected'])
|
||||
|
||||
def test_check_page_default_with_default_provided(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe with default causes no violation
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
mako_template = """
|
||||
<%page expression_filter="h"/>
|
||||
"""
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 0)
|
||||
|
||||
def test_check_page_default_with_no_default_provided(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe with no default causes violation
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
mako_template = ""
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 1)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_missing_default)
|
||||
|
||||
def test_check_mako_expressions_in_html(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe in html context provides appropriate violations
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
<%page expression_filter="h"/>
|
||||
${x}
|
||||
${'{{unbalanced-nested'}
|
||||
${x | n}
|
||||
${x | h}
|
||||
${x | n, dump_html_escaped_json}
|
||||
${x | n, dump_js_escaped_json}
|
||||
""")
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 4)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_unparsable_expression)
|
||||
start_index = results.violations[0].expression['start_index']
|
||||
self.assertEqual(mako_template[start_index:start_index + 24], "${'{{unbalanced-nested'}")
|
||||
self.assertEqual(results.violations[1].rule, Rules.mako_invalid_html_filter)
|
||||
self.assertEqual(results.violations[1].expression['expression'], "${x | n}")
|
||||
self.assertEqual(results.violations[2].rule, Rules.mako_unwanted_html_filter)
|
||||
self.assertEqual(results.violations[2].expression['expression'], "${x | h}")
|
||||
self.assertEqual(results.violations[3].rule, Rules.mako_invalid_html_filter)
|
||||
self.assertEqual(results.violations[3].expression['expression'], "${x | n, dump_js_escaped_json}")
|
||||
|
||||
def test_check_mako_expressions_in_html_without_default(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe in html context without the page level
|
||||
default h filter suppresses expression level violation
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
${x | h}
|
||||
""")
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 1)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_missing_default)
|
||||
|
||||
def test_check_mako_expressions_in_javascript(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe in JavaScript script context provides
|
||||
appropriate violations
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
<%page expression_filter="h"/>
|
||||
<script>
|
||||
${x}
|
||||
${'{{unbalanced-nested'}
|
||||
${x | n}
|
||||
${x | h}
|
||||
${x | n, dump_html_escaped_json}
|
||||
${x | n, dump_js_escaped_json}
|
||||
"${x-with-quotes | n, js_escaped_string}"
|
||||
'${x-with-quotes | n, js_escaped_string}'
|
||||
${x-missing-quotes | n, js_escaped_string}
|
||||
</script>
|
||||
""")
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 6)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[0].expression['expression'], "${x}")
|
||||
self.assertEqual(results.violations[1].rule, Rules.mako_unparsable_expression)
|
||||
start_index = results.violations[1].expression['start_index']
|
||||
self.assertEqual(mako_template[start_index:start_index + 24], "${'{{unbalanced-nested'}")
|
||||
self.assertEqual(results.violations[2].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[2].expression['expression'], "${x | n}")
|
||||
self.assertEqual(results.violations[3].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[3].expression['expression'], "${x | h}")
|
||||
self.assertEqual(results.violations[4].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[4].expression['expression'], "${x | n, dump_html_escaped_json}")
|
||||
self.assertEqual(results.violations[5].rule, Rules.mako_js_string_missing_quotes)
|
||||
self.assertEqual(results.violations[5].expression['expression'], "${x-missing-quotes | n, js_escaped_string}")
|
||||
|
||||
def test_check_mako_expressions_in_require_js(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe in JavaScript require context provides
|
||||
appropriate violations
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
<%page expression_filter="h"/>
|
||||
<%static:require_module module_name="${x}" class_name="TestFactory">
|
||||
${x}
|
||||
${x | n, js_escaped_string}
|
||||
</%static:require_module>
|
||||
""")
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 2)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[0].expression['expression'], "${x}")
|
||||
self.assertEqual(results.violations[1].rule, Rules.mako_js_string_missing_quotes)
|
||||
self.assertEqual(results.violations[1].expression['expression'], "${x | n, js_escaped_string}")
|
||||
|
||||
@data(
|
||||
{'media_type': 'text/javascript', 'expected_violations': 0},
|
||||
{'media_type': 'text/ecmascript', 'expected_violations': 0},
|
||||
{'media_type': 'application/ecmascript', 'expected_violations': 0},
|
||||
{'media_type': 'application/javascript', 'expected_violations': 0},
|
||||
{'media_type': 'text/template', 'expected_violations': 1},
|
||||
{'media_type': 'unknown/type', 'expected_violations': 1},
|
||||
)
|
||||
def test_check_mako_expressions_in_script_type(self, data):
|
||||
"""
|
||||
Test _check_mako_file_is_safe in script tag with different media types
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
<%page expression_filter="h"/>
|
||||
<script type="{}">
|
||||
${{x | n, dump_js_escaped_json}}
|
||||
</script>
|
||||
""").format(data['media_type'])
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), data['expected_violations'])
|
||||
|
||||
def test_check_mako_expressions_in_mixed_contexts(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe in mixed contexts provides
|
||||
appropriate violations
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
<%page expression_filter="h"/>
|
||||
${x | h}
|
||||
<script type="text/javascript">
|
||||
${x | h}
|
||||
</script>
|
||||
${x | h}
|
||||
<%static:require_module module_name="${x}" class_name="TestFactory">
|
||||
${x | h}
|
||||
</%static:require_module>
|
||||
${x | h}
|
||||
""")
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 5)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_unwanted_html_filter)
|
||||
self.assertEqual(results.violations[1].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[2].rule, Rules.mako_unwanted_html_filter)
|
||||
self.assertEqual(results.violations[3].rule, Rules.mako_invalid_js_filter)
|
||||
self.assertEqual(results.violations[4].rule, Rules.mako_unwanted_html_filter)
|
||||
|
||||
def test_expression_detailed_results(self):
|
||||
"""
|
||||
Test _check_mako_file_is_safe provides detailed results, including line
|
||||
numbers, columns, and line
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
results = FileResults('')
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
${x | n}
|
||||
<div>${(
|
||||
'tabbed-multi-line-expression'
|
||||
) | n}</div>
|
||||
${'{{unbalanced-nested' | n}
|
||||
""")
|
||||
|
||||
linter._check_mako_file_is_safe(mako_template, results)
|
||||
|
||||
self.assertEqual(len(results.violations), 4)
|
||||
self.assertEqual(results.violations[0].rule, Rules.mako_missing_default)
|
||||
|
||||
self.assertEqual(results.violations[1].start_line, 2)
|
||||
self.assertEqual(results.violations[1].start_column, 1)
|
||||
self.assertEqual(results.violations[1].end_line, 2)
|
||||
self.assertEqual(results.violations[1].end_column, 8)
|
||||
self.assertEqual(len(results.violations[1].lines), 1)
|
||||
self.assertEqual(results.violations[1].lines[0], "${x | n}")
|
||||
|
||||
self.assertEqual(results.violations[2].start_line, 3)
|
||||
self.assertEqual(results.violations[2].start_column, 10)
|
||||
self.assertEqual(results.violations[2].end_line, 5)
|
||||
self.assertEqual(results.violations[2].end_column, 10)
|
||||
self.assertEqual(len(results.violations[2].lines), 3)
|
||||
self.assertEqual(results.violations[2].lines[0], " <div>${(")
|
||||
self.assertEqual(
|
||||
results.violations[2].lines[1],
|
||||
" 'tabbed-multi-line-expression'"
|
||||
)
|
||||
self.assertEqual(results.violations[2].lines[2], " ) | n}</div>")
|
||||
|
||||
self.assertEqual(results.violations[3].start_line, 6)
|
||||
self.assertEqual(results.violations[3].start_column, 1)
|
||||
self.assertEqual(results.violations[3].end_line, 6)
|
||||
self.assertEqual(results.violations[3].end_column, "?")
|
||||
self.assertEqual(len(results.violations[3].lines), 1)
|
||||
self.assertEqual(
|
||||
results.violations[3].lines[0],
|
||||
"${'{{unbalanced-nested' | n}"
|
||||
)
|
||||
|
||||
def test_find_mako_expressions(self):
|
||||
"""
|
||||
Test _find_mako_expressions finds appropriate expressions
|
||||
"""
|
||||
linter = MakoTemplateLinter()
|
||||
|
||||
mako_template = textwrap.dedent("""
|
||||
${x}
|
||||
${tabbed-x}
|
||||
${(
|
||||
'tabbed-multi-line-expression'
|
||||
)}
|
||||
${'{{unbalanced-nested'}
|
||||
${'{{nested}}'}
|
||||
<div>no expression</div>
|
||||
""")
|
||||
|
||||
expressions = linter._find_mako_expressions(mako_template)
|
||||
|
||||
self.assertEqual(len(expressions), 5)
|
||||
self._validate_expression(mako_template, expressions[0], '${x}')
|
||||
self._validate_expression(mako_template, expressions[1], '${tabbed-x}')
|
||||
self._validate_expression(mako_template, expressions[2], "${(\n 'tabbed-multi-line-expression'\n )}")
|
||||
|
||||
# won't parse unbalanced nested {}'s
|
||||
unbalanced_expression = "${'{{unbalanced-nested'}"
|
||||
self.assertEqual(expressions[3]['end_index'], -1)
|
||||
start_index = expressions[3]['start_index']
|
||||
self.assertEqual(mako_template[start_index:start_index + len(unbalanced_expression)], unbalanced_expression)
|
||||
self.assertEqual(expressions[3]['expression'], None)
|
||||
|
||||
self._validate_expression(mako_template, expressions[4], "${'{{nested}}'}")
|
||||
|
||||
def _validate_expression(self, template_string, expression, expected_expression):
|
||||
start_index = expression['start_index']
|
||||
end_index = expression['end_index']
|
||||
self.assertEqual(template_string[start_index:end_index + 1], expected_expression)
|
||||
self.assertEqual(expression['expression'], expected_expression)
|
||||
Reference in New Issue
Block a user