Move MakoLinter.
This commit is contained in:
@@ -1407,6 +1407,296 @@ class JavaScriptLinter(BaseLinter):
|
||||
return False
|
||||
|
||||
|
||||
class PythonLinter(BaseLinter):
|
||||
"""
|
||||
The linter for Python files.
|
||||
|
||||
The current implementation of the linter does naive Python parsing. It does
|
||||
not use the parser. One known issue is that parsing errors found inside a
|
||||
docstring need to be disabled, rather than being automatically skipped.
|
||||
Skipping docstrings is an enhancement that could be added.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Init method.
|
||||
"""
|
||||
super(PythonLinter, self).__init__()
|
||||
self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
|
||||
|
||||
def process_file(self, directory, file_name):
|
||||
"""
|
||||
Process file to determine if it is a Python file and
|
||||
if it is safe.
|
||||
|
||||
Arguments:
|
||||
directory (string): The directory of the file to be checked
|
||||
file_name (string): A filename for a potential Python file
|
||||
|
||||
Returns:
|
||||
The file results containing any violations.
|
||||
|
||||
"""
|
||||
file_full_path = os.path.normpath(directory + '/' + file_name)
|
||||
results = FileResults(file_full_path)
|
||||
|
||||
if not results.is_file:
|
||||
return results
|
||||
|
||||
if file_name.lower().endswith('.py') is False:
|
||||
return results
|
||||
|
||||
# skip this linter code (i.e. safe_template_linter.py)
|
||||
if file_name == os.path.basename(__file__):
|
||||
return results
|
||||
|
||||
if not self._is_valid_directory(self._skip_python_dirs, directory):
|
||||
return results
|
||||
|
||||
return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
|
||||
|
||||
def check_python_file_is_safe(self, file_contents, results):
|
||||
"""
|
||||
Checks for violations in a Python file.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file.
|
||||
results: A file results objects to which violations will be added.
|
||||
|
||||
"""
|
||||
self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
|
||||
self._check_deprecated_display_name(file_contents, results)
|
||||
self._check_custom_escape(file_contents, results)
|
||||
self._check_html(file_contents, results)
|
||||
results.prepare_results(file_contents, line_comment_delim='#')
|
||||
|
||||
def _check_deprecated_display_name(self, file_contents, results):
|
||||
"""
|
||||
Checks that the deprecated display_name_with_default_escaped is not
|
||||
used. Adds violation to results if there is a problem.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
|
||||
expression = Expression(match.start(), match.end())
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_deprecated_display_name, expression
|
||||
))
|
||||
|
||||
def _check_custom_escape(self, file_contents, results):
|
||||
"""
|
||||
Checks for custom escaping calls, rather than using a standard escaping
|
||||
method.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
for match in re.finditer("(<.*<|<.*<)", file_contents):
|
||||
expression = Expression(match.start(), match.end())
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_custom_escape, expression
|
||||
))
|
||||
|
||||
def _check_html(self, file_contents, results):
|
||||
"""
|
||||
Checks many rules related to HTML in a Python file.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
# Text() Expressions keyed by its end index
|
||||
text_calls_by_end_index = {}
|
||||
# HTML() Expressions keyed by its end index
|
||||
html_calls_by_end_index = {}
|
||||
start_index = 0
|
||||
while True:
|
||||
|
||||
# check HTML(), Text() and format() calls
|
||||
result = self._check_html_text_format(
|
||||
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
|
||||
)
|
||||
next_start_index = result['next_start_index']
|
||||
interpolate_end_index = result['interpolate_end_index']
|
||||
|
||||
# check for interpolation including HTML outside of function calls
|
||||
self._check_interpolate_with_html(
|
||||
file_contents, start_index, interpolate_end_index, results
|
||||
)
|
||||
|
||||
# advance the search
|
||||
start_index = next_start_index
|
||||
|
||||
# end if there is nothing left to search
|
||||
if interpolate_end_index is None:
|
||||
break
|
||||
|
||||
def _check_html_text_format(
|
||||
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
|
||||
):
|
||||
"""
|
||||
Checks for HTML(), Text() and format() calls, and various rules related
|
||||
to these calls.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
start_index: The index at which to begin searching for a function
|
||||
call.
|
||||
text_calls_by_end_index: Text() Expressions keyed by its end index.
|
||||
html_calls_by_end_index: HTML() Expressions keyed by its end index.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
Returns:
|
||||
A dict with the following keys:
|
||||
'next_start_index': The start index of the next search for a
|
||||
function call.
|
||||
'interpolate_end_index': The end index of the next next search
|
||||
for interpolation with html, or None if the end of file
|
||||
should be used.
|
||||
|
||||
"""
|
||||
# used to find opening of .format(), Text() and HTML() calls
|
||||
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
|
||||
interpolate_end_index = None
|
||||
end_index = None
|
||||
strings = None
|
||||
html_calls = []
|
||||
while True:
|
||||
# first search for HTML(), Text(), or .format()
|
||||
if end_index is None:
|
||||
function_match = regex_function_open.search(file_contents, start_index)
|
||||
else:
|
||||
function_match = regex_function_open.search(file_contents, start_index, end_index)
|
||||
if function_match is not None:
|
||||
if interpolate_end_index is None:
|
||||
interpolate_end_index = function_match.start()
|
||||
function_close_result = self._find_closing_char_index(
|
||||
None, '(', ')', file_contents, start_index=function_match.end(),
|
||||
)
|
||||
if function_close_result is None:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_parse_error, Expression(function_match.start())
|
||||
))
|
||||
else:
|
||||
expression = Expression(
|
||||
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
|
||||
start_delim=function_match.group(), end_delim=")"
|
||||
)
|
||||
# if this an outer most Text(), HTML(), or format() call
|
||||
if end_index is None:
|
||||
end_index = expression.end_index
|
||||
interpolate_end_index = expression.start_index
|
||||
strings = function_close_result['strings']
|
||||
if function_match.group() == '.format(':
|
||||
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
|
||||
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
|
||||
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
|
||||
if is_wrapped_with_text is False and is_wrapped_with_html is False:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_requires_html_or_text, expression
|
||||
))
|
||||
else: # expression is 'HTML(' or 'Text('
|
||||
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
|
||||
# Generally, format() would be the issue if there is one.
|
||||
if regex_function_open.search(expression.expression_inner) is not None:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_close_before_format, expression
|
||||
))
|
||||
if function_match.group() == 'Text(':
|
||||
text_calls_by_end_index[str(expression.end_index)] = expression
|
||||
else: # function_match.group() == 'HTML(':
|
||||
html_calls_by_end_index[str(expression.end_index)] = expression
|
||||
html_calls.append(expression)
|
||||
|
||||
start_index = function_match.end()
|
||||
else:
|
||||
break
|
||||
|
||||
# checks strings in the outer most call to ensure they are properly
|
||||
# wrapped with HTML()
|
||||
self._check_format_html_strings_wrapped(strings, html_calls, results)
|
||||
|
||||
# compute where to continue the search
|
||||
if function_match is None and end_index is None:
|
||||
next_start_index = start_index
|
||||
elif end_index is None:
|
||||
next_start_index = function_match.end()
|
||||
else:
|
||||
next_start_index = end_index
|
||||
|
||||
return {
|
||||
'next_start_index': next_start_index,
|
||||
'interpolate_end_index': interpolate_end_index,
|
||||
}
|
||||
|
||||
def _check_format_html_strings_wrapped(self, strings, html_calls, results):
|
||||
"""
|
||||
Checks that any string inside a format call that seems to contain HTML
|
||||
is wrapped with a call to HTML().
|
||||
|
||||
Arguments:
|
||||
strings: A list of ParseStrings for each string inside the format()
|
||||
call.
|
||||
html_calls: A list of Expressions representing all of the HTML()
|
||||
calls inside the format() call.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
html_strings = []
|
||||
html_wrapped_strings = []
|
||||
if strings is not None:
|
||||
# find all strings that contain HTML
|
||||
for string in strings:
|
||||
if '<' in string.string:
|
||||
html_strings.append(string)
|
||||
# check if HTML string is appropriately wrapped
|
||||
for html_call in html_calls:
|
||||
if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
|
||||
html_wrapped_strings.append(string)
|
||||
break
|
||||
# loop through all unwrapped strings
|
||||
for unsafe_string in set(html_strings) - set(html_wrapped_strings):
|
||||
unsafe_string_expression = Expression(unsafe_string.start_index)
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_wrap_html, unsafe_string_expression
|
||||
))
|
||||
|
||||
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
|
||||
"""
|
||||
Find interpolations with html that fall outside of any calls to HTML(),
|
||||
Text(), and .format().
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
start_index: The index to start the search, or None if nothing to
|
||||
search
|
||||
end_index: The index to end the search, or None if the end of file
|
||||
should be used.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
# used to find interpolation with HTML
|
||||
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
|
||||
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
|
||||
pattern_interpolate_html_inner, pattern_interpolate_html_inner
|
||||
))
|
||||
if end_index is None:
|
||||
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
|
||||
else:
|
||||
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
|
||||
for match_html_string in interpolate_string_iter:
|
||||
expression = Expression(match_html_string.start(), match_html_string.end())
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_interpolate_html, expression
|
||||
))
|
||||
|
||||
|
||||
class MakoTemplateLinter(BaseLinter):
|
||||
"""
|
||||
The linter for Mako template files.
|
||||
@@ -1968,296 +2258,6 @@ class MakoTemplateLinter(BaseLinter):
|
||||
return expressions
|
||||
|
||||
|
||||
class PythonLinter(BaseLinter):
|
||||
"""
|
||||
The linter for Python files.
|
||||
|
||||
The current implementation of the linter does naive Python parsing. It does
|
||||
not use the parser. One known issue is that parsing errors found inside a
|
||||
docstring need to be disabled, rather than being automatically skipped.
|
||||
Skipping docstrings is an enhancement that could be added.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Init method.
|
||||
"""
|
||||
super(PythonLinter, self).__init__()
|
||||
self._skip_python_dirs = self._skip_dirs + ('tests', 'test/acceptance')
|
||||
|
||||
def process_file(self, directory, file_name):
|
||||
"""
|
||||
Process file to determine if it is a Python file and
|
||||
if it is safe.
|
||||
|
||||
Arguments:
|
||||
directory (string): The directory of the file to be checked
|
||||
file_name (string): A filename for a potential Python file
|
||||
|
||||
Returns:
|
||||
The file results containing any violations.
|
||||
|
||||
"""
|
||||
file_full_path = os.path.normpath(directory + '/' + file_name)
|
||||
results = FileResults(file_full_path)
|
||||
|
||||
if not results.is_file:
|
||||
return results
|
||||
|
||||
if file_name.lower().endswith('.py') is False:
|
||||
return results
|
||||
|
||||
# skip this linter code (i.e. safe_template_linter.py)
|
||||
if file_name == os.path.basename(__file__):
|
||||
return results
|
||||
|
||||
if not self._is_valid_directory(self._skip_python_dirs, directory):
|
||||
return results
|
||||
|
||||
return self._load_and_check_file_is_safe(file_full_path, self.check_python_file_is_safe, results)
|
||||
|
||||
def check_python_file_is_safe(self, file_contents, results):
|
||||
"""
|
||||
Checks for violations in a Python file.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file.
|
||||
results: A file results objects to which violations will be added.
|
||||
|
||||
"""
|
||||
self._check_concat_with_html(file_contents, Rules.python_concat_html, results)
|
||||
self._check_deprecated_display_name(file_contents, results)
|
||||
self._check_custom_escape(file_contents, results)
|
||||
self._check_html(file_contents, results)
|
||||
results.prepare_results(file_contents, line_comment_delim='#')
|
||||
|
||||
def _check_deprecated_display_name(self, file_contents, results):
|
||||
"""
|
||||
Checks that the deprecated display_name_with_default_escaped is not
|
||||
used. Adds violation to results if there is a problem.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
for match in re.finditer(r'\.display_name_with_default_escaped', file_contents):
|
||||
expression = Expression(match.start(), match.end())
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_deprecated_display_name, expression
|
||||
))
|
||||
|
||||
def _check_custom_escape(self, file_contents, results):
|
||||
"""
|
||||
Checks for custom escaping calls, rather than using a standard escaping
|
||||
method.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
for match in re.finditer("(<.*<|<.*<)", file_contents):
|
||||
expression = Expression(match.start(), match.end())
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_custom_escape, expression
|
||||
))
|
||||
|
||||
def _check_html(self, file_contents, results):
|
||||
"""
|
||||
Checks many rules related to HTML in a Python file.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
# Text() Expressions keyed by its end index
|
||||
text_calls_by_end_index = {}
|
||||
# HTML() Expressions keyed by its end index
|
||||
html_calls_by_end_index = {}
|
||||
start_index = 0
|
||||
while True:
|
||||
|
||||
# check HTML(), Text() and format() calls
|
||||
result = self._check_html_text_format(
|
||||
file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
|
||||
)
|
||||
next_start_index = result['next_start_index']
|
||||
interpolate_end_index = result['interpolate_end_index']
|
||||
|
||||
# check for interpolation including HTML outside of function calls
|
||||
self._check_interpolate_with_html(
|
||||
file_contents, start_index, interpolate_end_index, results
|
||||
)
|
||||
|
||||
# advance the search
|
||||
start_index = next_start_index
|
||||
|
||||
# end if there is nothing left to search
|
||||
if interpolate_end_index is None:
|
||||
break
|
||||
|
||||
def _check_html_text_format(
|
||||
self, file_contents, start_index, text_calls_by_end_index, html_calls_by_end_index, results
|
||||
):
|
||||
"""
|
||||
Checks for HTML(), Text() and format() calls, and various rules related
|
||||
to these calls.
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
start_index: The index at which to begin searching for a function
|
||||
call.
|
||||
text_calls_by_end_index: Text() Expressions keyed by its end index.
|
||||
html_calls_by_end_index: HTML() Expressions keyed by its end index.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
Returns:
|
||||
A dict with the following keys:
|
||||
'next_start_index': The start index of the next search for a
|
||||
function call.
|
||||
'interpolate_end_index': The end index of the next next search
|
||||
for interpolation with html, or None if the end of file
|
||||
should be used.
|
||||
|
||||
"""
|
||||
# used to find opening of .format(), Text() and HTML() calls
|
||||
regex_function_open = re.compile(r"(\.format\(|(?<!\w)HTML\(|(?<!\w)Text\()")
|
||||
interpolate_end_index = None
|
||||
end_index = None
|
||||
strings = None
|
||||
html_calls = []
|
||||
while True:
|
||||
# first search for HTML(), Text(), or .format()
|
||||
if end_index is None:
|
||||
function_match = regex_function_open.search(file_contents, start_index)
|
||||
else:
|
||||
function_match = regex_function_open.search(file_contents, start_index, end_index)
|
||||
if function_match is not None:
|
||||
if interpolate_end_index is None:
|
||||
interpolate_end_index = function_match.start()
|
||||
function_close_result = self._find_closing_char_index(
|
||||
None, '(', ')', file_contents, start_index=function_match.end(),
|
||||
)
|
||||
if function_close_result is None:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_parse_error, Expression(function_match.start())
|
||||
))
|
||||
else:
|
||||
expression = Expression(
|
||||
function_match.start(), function_close_result['close_char_index'] + 1, file_contents,
|
||||
start_delim=function_match.group(), end_delim=")"
|
||||
)
|
||||
# if this an outer most Text(), HTML(), or format() call
|
||||
if end_index is None:
|
||||
end_index = expression.end_index
|
||||
interpolate_end_index = expression.start_index
|
||||
strings = function_close_result['strings']
|
||||
if function_match.group() == '.format(':
|
||||
if 'HTML(' in expression.expression_inner or 'Text(' in expression.expression_inner:
|
||||
is_wrapped_with_text = str(function_match.start()) in text_calls_by_end_index.keys()
|
||||
is_wrapped_with_html = str(function_match.start()) in html_calls_by_end_index.keys()
|
||||
if is_wrapped_with_text is False and is_wrapped_with_html is False:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_requires_html_or_text, expression
|
||||
))
|
||||
else: # expression is 'HTML(' or 'Text('
|
||||
# HTML() and Text() calls cannot contain any inner HTML(), Text(), or format() calls.
|
||||
# Generally, format() would be the issue if there is one.
|
||||
if regex_function_open.search(expression.expression_inner) is not None:
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_close_before_format, expression
|
||||
))
|
||||
if function_match.group() == 'Text(':
|
||||
text_calls_by_end_index[str(expression.end_index)] = expression
|
||||
else: # function_match.group() == 'HTML(':
|
||||
html_calls_by_end_index[str(expression.end_index)] = expression
|
||||
html_calls.append(expression)
|
||||
|
||||
start_index = function_match.end()
|
||||
else:
|
||||
break
|
||||
|
||||
# checks strings in the outer most call to ensure they are properly
|
||||
# wrapped with HTML()
|
||||
self._check_format_html_strings_wrapped(strings, html_calls, results)
|
||||
|
||||
# compute where to continue the search
|
||||
if function_match is None and end_index is None:
|
||||
next_start_index = start_index
|
||||
elif end_index is None:
|
||||
next_start_index = function_match.end()
|
||||
else:
|
||||
next_start_index = end_index
|
||||
|
||||
return {
|
||||
'next_start_index': next_start_index,
|
||||
'interpolate_end_index': interpolate_end_index,
|
||||
}
|
||||
|
||||
def _check_format_html_strings_wrapped(self, strings, html_calls, results):
|
||||
"""
|
||||
Checks that any string inside a format call that seems to contain HTML
|
||||
is wrapped with a call to HTML().
|
||||
|
||||
Arguments:
|
||||
strings: A list of ParseStrings for each string inside the format()
|
||||
call.
|
||||
html_calls: A list of Expressions representing all of the HTML()
|
||||
calls inside the format() call.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
html_strings = []
|
||||
html_wrapped_strings = []
|
||||
if strings is not None:
|
||||
# find all strings that contain HTML
|
||||
for string in strings:
|
||||
if '<' in string.string:
|
||||
html_strings.append(string)
|
||||
# check if HTML string is appropriately wrapped
|
||||
for html_call in html_calls:
|
||||
if html_call.start_index < string.start_index < string.end_index < html_call.end_index:
|
||||
html_wrapped_strings.append(string)
|
||||
break
|
||||
# loop through all unwrapped strings
|
||||
for unsafe_string in set(html_strings) - set(html_wrapped_strings):
|
||||
unsafe_string_expression = Expression(unsafe_string.start_index)
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_wrap_html, unsafe_string_expression
|
||||
))
|
||||
|
||||
def _check_interpolate_with_html(self, file_contents, start_index, end_index, results):
|
||||
"""
|
||||
Find interpolations with html that fall outside of any calls to HTML(),
|
||||
Text(), and .format().
|
||||
|
||||
Arguments:
|
||||
file_contents: The contents of the Python file
|
||||
start_index: The index to start the search, or None if nothing to
|
||||
search
|
||||
end_index: The index to end the search, or None if the end of file
|
||||
should be used.
|
||||
results: A list of results into which violations will be added.
|
||||
|
||||
"""
|
||||
# used to find interpolation with HTML
|
||||
pattern_interpolate_html_inner = r'(<.*%s|%s.*<|<.*{\w*}|{\w*}.*<)'
|
||||
regex_interpolate_html = re.compile(r"""(".*{}.*"|'.*{}.*')""".format(
|
||||
pattern_interpolate_html_inner, pattern_interpolate_html_inner
|
||||
))
|
||||
if end_index is None:
|
||||
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index)
|
||||
else:
|
||||
interpolate_string_iter = regex_interpolate_html.finditer(file_contents, start_index, end_index)
|
||||
for match_html_string in interpolate_string_iter:
|
||||
expression = Expression(match_html_string.start(), match_html_string.end())
|
||||
results.violations.append(ExpressionRuleViolation(
|
||||
Rules.python_interpolate_html, expression
|
||||
))
|
||||
|
||||
|
||||
def _process_file(full_path, template_linters, options, out):
|
||||
"""
|
||||
For each linter, lints the provided file. This means finding and printing
|
||||
|
||||
Reference in New Issue
Block a user