Move MakoLinter.

This commit is contained in:
Robert Raposa
2016-05-02 05:04:51 -04:00
parent a6b9ba7d21
commit fb13dc6499

View File

@@ -1407,6 +1407,296 @@ class JavaScriptLinter(BaseLinter):
return False
class PythonLinter(BaseLinter):
"""
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
def __init__(self):
"""
Init method.
"""
super(PythonLinter, self).__init__()
self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
def process_file(self, directory, file_name):
"""
Process file to determine if it is a Python file and
if it is safe.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Python file
Returns:
The file results containing any violations.
"""
file_full_path = os.path.normpath(directory + '/' + file_name)
results = FileResults(file_full_path)
if not results.is_file:
return results
if file_name.lower().endswith('.py') is False:
return results
# skip this linter code (i.e. safe_template_linter.py)
if file_name == os.path.basename(__file__):
return results
if not self._is_valid_directory(self._skip_python_dirs, directory):
return results
return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
def check_python_file_is_safe(self, file_contents, results):
"""
Checks for violations in a Python file.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
self._check_deprecated_display_name(file_contents, results)
self._check_custom_escape(file_contents, results)
self._check_html(file_contents, results)
results.prepare_results(file_contents, line_comment_delim='#')
def _check_deprecated_display_name(self, file_contents, results):
"""
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, expression
))
def _check_custom_escape(self, file_contents, results):
"""
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_custom_escape, expression
))
def _check_html(self, file_contents, results):
"""
Checks many rules related to HTML in a Python file.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
# Text() Expressions keyed by its end index
text_calls_by_end_index = {}
# HTML() Expressions keyed by its end index
html_calls_by_end_index = {}
start_index = 0
while True:
# check HTML(), Text() and format() calls
result = self._check_html_text_format(
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
)
next_start_index = result['next_start_index']
interpolate_end_index = result['interpolate_end_index']
# check for interpolation including HTML outside of function calls
self._check_interpolate_with_html(
file_contents, start_index, interpolate_end_index, results
)
# advance the search
start_index = next_start_index
# end if there is nothing left to search
if interpolate_end_index is None:
break
def _check_html_text_format(
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
):
"""
Checks for HTML(), Text() and format() calls, and various rules related
to these calls.
Arguments:
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
"""
# used to find opening of .format(), Text() and HTML() calls
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
interpolate_end_index = None
end_index = None
strings = None
html_calls = []
while True:
# first search for HTML(), Text(), or .format()
if end_index is None:
function_match = regex_function_open.search(file_contents, start_index)
else:
function_match = regex_function_open.search(file_contents, start_index, end_index)
if function_match is not None:
if interpolate_end_index is None:
interpolate_end_index = function_match.start()
function_close_result = self._find_closing_char_index(
None, '(', ')', file_contents, start_index=function_match.end(),
)
if function_close_result is None:
results.violations.append(ExpressionRuleViolation(
Rules.python_parse_error, Expression(function_match.start())
))
else:
expression = Expression(
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
start_delim=function_match.group(), end_delim=")"
)
# if this an outer most Text(), HTML(), or format() call
if end_index is None:
end_index = expression.end_index
interpolate_end_index = expression.start_index
strings = function_close_result['strings']
if function_match.group() == '.format(':
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
if is_wrapped_with_text is False and is_wrapped_with_html is False:
results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, expression
))
else: # expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if regex_function_open.search(expression.expression_inner) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if function_match.group() == 'Text(':
text_calls_by_end_index[str(expression.end_index)] = expression
else: # function_match.group() == 'HTML(':
html_calls_by_end_index[str(expression.end_index)] = expression
html_calls.append(expression)
start_index = function_match.end()
else:
break
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self._check_format_html_strings_wrapped(strings, html_calls, results)
# compute where to continue the search
if function_match is None and end_index is None:
next_start_index = start_index
elif end_index is None:
next_start_index = function_match.end()
else:
next_start_index = end_index
return {
'next_start_index': next_start_index,
'interpolate_end_index': interpolate_end_index,
}
def _check_format_html_strings_wrapped(self, strings, html_calls, results):
"""
Checks that any string inside a format call that seems to contain HTML
is wrapped with a call to HTML().
Arguments:
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
"""
html_strings = []
html_wrapped_strings = []
if strings is not None:
# find all strings that contain HTML
for string in strings:
if '<' in string.string:
html_strings.append(string)
# check if HTML string is appropriately wrapped
for html_call in html_calls:
if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
html_wrapped_strings.append(string)
break
# loop through all unwrapped strings
for unsafe_string in set(html_strings) - set(html_wrapped_strings):
unsafe_string_expression = Expression(unsafe_string.start_index)
results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, unsafe_string_expression
))
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
"""
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format().
Arguments:
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
"""
# used to find interpolation with HTML
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
pattern_interpolate_html_inner, pattern_interpolate_html_inner
))
if end_index is None:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
else:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
for match_html_string in interpolate_string_iter:
expression = Expression(match_html_string.start(), match_html_string.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_interpolate_html, expression
))
class MakoTemplateLinter(BaseLinter):
"""
The linter for Mako template files.
@@ -1968,296 +2258,6 @@ class MakoTemplateLinter(BaseLinter):
return expressions
class PythonLinter(BaseLinter):
"""
The linter for Python files.
The current implementation of the linter does naive Python parsing. It does
not use the parser. One known issue is that parsing errors found inside a
docstring need to be disabled, rather than being automatically skipped.
Skipping docstrings is an enhancement that could be added.
"""
def __init__(self):
"""
Init method.
"""
super(PythonLinter, self).__init__()
self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
def process_file(self, directory, file_name):
"""
Process file to determine if it is a Python file and
if it is safe.
Arguments:
directory (string): The directory of the file to be checked
file_name (string): A filename for a potential Python file
Returns:
The file results containing any violations.
"""
file_full_path = os.path.normpath(directory + '/' + file_name)
results = FileResults(file_full_path)
if not results.is_file:
return results
if file_name.lower().endswith('.py') is False:
return results
# skip this linter code (i.e. safe_template_linter.py)
if file_name == os.path.basename(__file__):
return results
if not self._is_valid_directory(self._skip_python_dirs, directory):
return results
return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
def check_python_file_is_safe(self, file_contents, results):
"""
Checks for violations in a Python file.
Arguments:
file_contents: The contents of the Python file.
results: A file results objects to which violations will be added.
"""
self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
self._check_deprecated_display_name(file_contents, results)
self._check_custom_escape(file_contents, results)
self._check_html(file_contents, results)
results.prepare_results(file_contents, line_comment_delim='#')
def _check_deprecated_display_name(self, file_contents, results):
"""
Checks that the deprecated display_name_with_default_escaped is not
used. Adds violation to results if there is a problem.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_deprecated_display_name, expression
))
def _check_custom_escape(self, file_contents, results):
"""
Checks for custom escaping calls, rather than using a standard escaping
method.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
for match in re.finditer("(<.*&lt;|&lt;.*<)", file_contents):
expression = Expression(match.start(), match.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_custom_escape, expression
))
def _check_html(self, file_contents, results):
"""
Checks many rules related to HTML in a Python file.
Arguments:
file_contents: The contents of the Python file
results: A list of results into which violations will be added.
"""
# Text() Expressions keyed by its end index
text_calls_by_end_index = {}
# HTML() Expressions keyed by its end index
html_calls_by_end_index = {}
start_index = 0
while True:
# check HTML(), Text() and format() calls
result = self._check_html_text_format(
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
)
next_start_index = result['next_start_index']
interpolate_end_index = result['interpolate_end_index']
# check for interpolation including HTML outside of function calls
self._check_interpolate_with_html(
file_contents, start_index, interpolate_end_index, results
)
# advance the search
start_index = next_start_index
# end if there is nothing left to search
if interpolate_end_index is None:
break
def _check_html_text_format(
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
):
"""
Checks for HTML(), Text() and format() calls, and various rules related
to these calls.
Arguments:
file_contents: The contents of the Python file
start_index: The index at which to begin searching for a function
call.
text_calls_by_end_index: Text() Expressions keyed by its end index.
html_calls_by_end_index: HTML() Expressions keyed by its end index.
results: A list of results into which violations will be added.
Returns:
A dict with the following keys:
'next_start_index': The start index of the next search for a
function call.
'interpolate_end_index': The end index of the next next search
for interpolation with html, or None if the end of file
should be used.
"""
# used to find opening of .format(), Text() and HTML() calls
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
interpolate_end_index = None
end_index = None
strings = None
html_calls = []
while True:
# first search for HTML(), Text(), or .format()
if end_index is None:
function_match = regex_function_open.search(file_contents, start_index)
else:
function_match = regex_function_open.search(file_contents, start_index, end_index)
if function_match is not None:
if interpolate_end_index is None:
interpolate_end_index = function_match.start()
function_close_result = self._find_closing_char_index(
None, '(', ')', file_contents, start_index=function_match.end(),
)
if function_close_result is None:
results.violations.append(ExpressionRuleViolation(
Rules.python_parse_error, Expression(function_match.start())
))
else:
expression = Expression(
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
start_delim=function_match.group(), end_delim=")"
)
# if this an outer most Text(), HTML(), or format() call
if end_index is None:
end_index = expression.end_index
interpolate_end_index = expression.start_index
strings = function_close_result['strings']
if function_match.group() == '.format(':
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
if is_wrapped_with_text is False and is_wrapped_with_html is False:
results.violations.append(ExpressionRuleViolation(
Rules.python_requires_html_or_text, expression
))
else: # expression is 'HTML(' or 'Text('
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
# Generally, format() would be the issue if there is one.
if regex_function_open.search(expression.expression_inner) is not None:
results.violations.append(ExpressionRuleViolation(
Rules.python_close_before_format, expression
))
if function_match.group() == 'Text(':
text_calls_by_end_index[str(expression.end_index)] = expression
else: # function_match.group() == 'HTML(':
html_calls_by_end_index[str(expression.end_index)] = expression
html_calls.append(expression)
start_index = function_match.end()
else:
break
# checks strings in the outer most call to ensure they are properly
# wrapped with HTML()
self._check_format_html_strings_wrapped(strings, html_calls, results)
# compute where to continue the search
if function_match is None and end_index is None:
next_start_index = start_index
elif end_index is None:
next_start_index = function_match.end()
else:
next_start_index = end_index
return {
'next_start_index': next_start_index,
'interpolate_end_index': interpolate_end_index,
}
def _check_format_html_strings_wrapped(self, strings, html_calls, results):
"""
Checks that any string inside a format call that seems to contain HTML
is wrapped with a call to HTML().
Arguments:
strings: A list of ParseStrings for each string inside the format()
call.
html_calls: A list of Expressions representing all of the HTML()
calls inside the format() call.
results: A list of results into which violations will be added.
"""
html_strings = []
html_wrapped_strings = []
if strings is not None:
# find all strings that contain HTML
for string in strings:
if '<' in string.string:
html_strings.append(string)
# check if HTML string is appropriately wrapped
for html_call in html_calls:
if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
html_wrapped_strings.append(string)
break
# loop through all unwrapped strings
for unsafe_string in set(html_strings) - set(html_wrapped_strings):
unsafe_string_expression = Expression(unsafe_string.start_index)
results.violations.append(ExpressionRuleViolation(
Rules.python_wrap_html, unsafe_string_expression
))
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
"""
Find interpolations with html that fall outside of any calls to HTML(),
Text(), and .format().
Arguments:
file_contents: The contents of the Python file
start_index: The index to start the search, or None if nothing to
search
end_index: The index to end the search, or None if the end of file
should be used.
results: A list of results into which violations will be added.
"""
# used to find interpolation with HTML
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
pattern_interpolate_html_inner, pattern_interpolate_html_inner
))
if end_index is None:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
else:
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
for match_html_string in interpolate_string_iter:
expression = Expression(match_html_string.start(), match_html_string.end())
results.violations.append(ExpressionRuleViolation(
Rules.python_interpolate_html, expression
))
def _process_file(full_path, template_linters, options, out):
"""
For each linter, lints the provided file. This means finding and printing