From 6c69b6d435e6a9ee8ea8d42d8f62d4b379d0697e Mon Sep 17 00:00:00 2001 From: Manjinder Singh <49171515+jinder1s@users.noreply.github.com> Date: Thu, 2 Jan 2020 10:01:52 -0500 Subject: [PATCH] Adding code to output pytest warnings. (#22570) * Added pytest-json-report plugin - modifying app-opts in setup.cfg - adding hook to all conftest.py files in repo - setting report to be saved to test_root/log/warnings.json - Writing custom logic to save json report to avoid overwrite if pytest called twice This was created to allow us to easily parse through test warnings in jenkins --- cms/conftest.py | 3 + common/lib/conftest.py | 2 + common/test/conftest.py | 1 + conftest.py | 3 + openedx/core/process_warnings.py | 246 ++++++++++++++++++++++++++++++ openedx/core/pytest_hooks.py | 79 ++++++++++ openedx/core/write_to_html.py | 98 ++++++++++++ pavelib/paver_tests/conftest.py | 2 + requirements/constraints.txt | 6 + requirements/edx-sandbox/py35.txt | 2 +- requirements/edx/base.in | 2 +- requirements/edx/base.txt | 10 +- requirements/edx/coverage.in | 1 + requirements/edx/coverage.txt | 4 + requirements/edx/development.txt | 13 +- requirements/edx/testing.in | 1 + requirements/edx/testing.txt | 13 +- scripts/Jenkinsfiles/python | 20 ++- setup.cfg | 2 +- 19 files changed, 488 insertions(+), 20 deletions(-) create mode 100644 openedx/core/process_warnings.py create mode 100644 openedx/core/pytest_hooks.py create mode 100644 openedx/core/write_to_html.py diff --git a/cms/conftest.py b/cms/conftest.py index 03a0490d7b..92a5c06163 100644 --- a/cms/conftest.py +++ b/cms/conftest.py @@ -12,6 +12,9 @@ import os import contracts import pytest +from openedx.core.pytest_hooks import pytest_json_modifyreport # pylint: disable=unused-import +from openedx.core.pytest_hooks import pytest_sessionfinish # pylint: disable=unused-import + # Patch the xml libs before anything else. from safe_lxml import defuse_xml_libs diff --git a/common/lib/conftest.py b/common/lib/conftest.py index 0c19cc206a..7bd2d497ca 100644 --- a/common/lib/conftest.py +++ b/common/lib/conftest.py @@ -7,6 +7,8 @@ import pytest from safe_lxml import defuse_xml_libs +from openedx.core.pytest_hooks import pytest_configure # pylint: disable=unused-import + defuse_xml_libs() diff --git a/common/test/conftest.py b/common/test/conftest.py index a23d052351..055da4ea8a 100644 --- a/common/test/conftest.py +++ b/common/test/conftest.py @@ -3,6 +3,7 @@ # Patch the xml libs before anything else. +from openedx.core.pytest_hooks import pytest_configure # pylint: disable=unused-import from safe_lxml import defuse_xml_libs defuse_xml_libs() diff --git a/conftest.py b/conftest.py index 8d5b764909..57ccae5296 100644 --- a/conftest.py +++ b/conftest.py @@ -10,6 +10,9 @@ import pytest # avoid duplicating the implementation from cms.conftest import _django_clear_site_cache, pytest_configure # pylint: disable=unused-import +from openedx.core.pytest_hooks import pytest_json_modifyreport # pylint: disable=unused-import +from openedx.core.pytest_hooks import pytest_sessionfinish # pylint: disable=unused-import + # When using self.assertEquals, diffs are truncated. We don't want that, always # show the whole diff. diff --git a/openedx/core/process_warnings.py b/openedx/core/process_warnings.py new file mode 100644 index 0000000000..6c749e76f1 --- /dev/null +++ b/openedx/core/process_warnings.py @@ -0,0 +1,246 @@ +""" +Script to process pytest warnings output by pytest-json-report plugin and output it as a html +""" +from __future__ import absolute_import +from __future__ import print_function +import json +import os +import io +import re +import argparse +from collections import Counter +import pandas as pd + +from write_to_html import ( + HtmlOutlineWriter, +) # noqa pylint: disable=import-error,useless-suppression + +columns = [ + "message", + "category", + "filename", + "lineno", + "high_location", + "label", + "num", + "deprecated", +] +columns_index_dict = {key: index for index, key in enumerate(columns)} + + +def seperate_warnings_by_location(warnings_data): + """ + Warnings originate from multiple locations, this function takes in list of warning objects + and separates them based on their filename location + """ + + # first create regex for each n file location + warnings_locations = { + ".*/python\d\.\d/site-packages/.*\.py": "python", # noqa pylint: disable=W1401 + ".*/edx-platform/lms/.*\.py": "lms", # noqa pylint: disable=W1401 + ".*/edx-platform/openedx/.*\.py": "openedx", # noqa pylint: disable=W1401 + ".*/edx-platform/cms/.*\.py": "cms", # noqa pylint: disable=W1401 + ".*/edx-platform/common/.*\.py": "common", # noqa pylint: disable=W1401 + } + + # separate into locations flow: + # - iterate through each wanring_object, see if its filename matches any regex in warning locations. + # - If so, change high_location index on warnings_object to location name + for warnings_object in warnings_data: + warning_origin_located = False + for key in warnings_locations: + if ( + re.search(key, warnings_object[columns_index_dict["filename"]]) + is not None + ): + warnings_object[ + columns_index_dict["high_location"] + ] = warnings_locations[key] + warning_origin_located = True + break + if not warning_origin_located: + warnings_object[columns_index_dict["high_location"]] = "other" + return warnings_data + + +def convert_warning_dict_to_list(warning_dict): + """ + converts our data dict into our defined list based on columns defined at top of this file + """ + output = [] + for column in columns: + if column in warning_dict: + output.append(warning_dict[column]) + else: + output.append(None) + output[columns_index_dict["num"]] = 1 + return output + + +def read_warning_data(dir_path): + """ + During test runs in jenkins, multiple warning json files are output. This function finds all files + and aggregates the warnings in to one large list + """ + # pdb.set_trace() + dir_path = os.path.expanduser(dir_path) + # find all files that exist in given directory + files_in_dir = [ + f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) + ] + warnings_files = [] + + # TODO(jinder): currently this is hard-coded in, maybe create a constants file with info + # THINK(jinder): but creating file for one constant seems overkill + warnings_file_name_regex = ( + "pytest_warnings_?\d*\.json" # noqa pylint: disable=W1401 + ) + + # iterate through files_in_dir and see if they match our know file name pattern + for temp_file in files_in_dir: + if re.search(warnings_file_name_regex, temp_file) is not None: + warnings_files.append(temp_file) + + # go through each warning file and aggregate warnings into warnings_data + warnings_data = [] + for temp_file in warnings_files: + with io.open(os.path.expanduser(dir_path + "/" + temp_file), "r") as read_file: + json_input = json.load(read_file) + if "warnings" in json_input: + data = [ + convert_warning_dict_to_list(warning_dict) + for warning_dict in json_input["warnings"] + ] + warnings_data.extend(data) + else: + print(temp_file) + return warnings_data + + +def compress_similar_warnings(warnings_data): + """ + find all warnings that are exactly the same, count them, and return set with count added to each warning + """ + tupled_data = [tuple(data) for data in warnings_data] + test_counter = Counter(tupled_data) + output = [list(value) for value in test_counter.keys()] + for data_object in output: + data_object[columns_index_dict["num"]] = test_counter[tuple(data_object)] + return output + + +def process_warnings_json(dir_path): + """ + Master function to process through all warnings and output a dict + + dict structure: + { + location: [{warning text: {file_name: warning object}}] + } + + flow: + - Aggregate data from all warning files + - Separate warnings by deprecated vs non deprecated(has word deprecate in it) + - Further categorize warnings + - Return output + Possible Error/enhancement: there might be better ways to separate deprecates vs + non-deprecated warnings + """ + warnings_data = read_warning_data(dir_path) + for warnings_object in warnings_data: + warnings_object[columns_index_dict["deprecated"]] = bool( + "deprecated" in warnings_object[columns_index_dict["message"]] + ) + warnings_data = seperate_warnings_by_location(warnings_data) + compressed_warnings_data = compress_similar_warnings(warnings_data) + return compressed_warnings_data + + +def group_and_sort_by_sumof(dataframe, group, sort_by): + groups_by = dataframe.groupby(group) + temp_list_to_sort = [(key, value, value[sort_by].sum()) for key, value in groups_by] + # sort by count + return sorted(temp_list_to_sort, key=lambda x: -x[2]) + + +def write_html_report(warnings_dataframe, html_path): + """ + converts from panda dataframe to our html + """ + html_path = os.path.expanduser(html_path) + if "/" in html_path: + location_of_last_dir = html_path.rfind("/") + dir_path = html_path[:location_of_last_dir] + os.makedirs(dir_path, exist_ok=True) + with io.open(html_path, "w") as fout: + html_writer = HtmlOutlineWriter(fout) + category_sorted_by_count = group_and_sort_by_sumof( + warnings_dataframe, "category", "num" + ) + for category, group_in_category, category_count in category_sorted_by_count: + # xss-lint: disable=python-wrap-html + html = u'{category}, count: {count} '.format( + category=category, count=category_count + ) + html_writer.start_section(html, klass=u"category") + locations_sorted_by_count = group_and_sort_by_sumof( + group_in_category, "high_location", "num" + ) + + for ( + location, + group_in_location, + location_count, + ) in locations_sorted_by_count: + # xss-lint: disable=python-wrap-html + html = u'{location}, count: {count} '.format( + location=location, count=location_count + ) + html_writer.start_section(html, klass=u"location") + message_group_sorted_by_count = group_and_sort_by_sumof( + group_in_location, "message", "num" + ) + for ( + message, + message_group, + message_count, + ) in message_group_sorted_by_count: + # xss-lint: disable=python-wrap-html + html = u'{warning_text}, count: {count} '.format( + warning_text=message, count=message_count + ) + html_writer.start_section(html, klass=u"warning_text") + # warnings_object[location][warning_text] is a list + for _, warning in message_group.iterrows(): + # xss-lint: disable=python-wrap-html + html = u'{warning_file_path} '.format( + warning_file_path=warning["filename"] + ) + html_writer.start_section(html, klass=u"warning") + # xss-lint: disable=python-wrap-html + html = u'
lineno: {lineno}
'.format( + lineno=warning["lineno"] + ) + html_writer.write(html) + # xss-lint: disable=python-wrap-html + html = u'num_occur: {num}
'.format( + num=warning["num"] + ) + html_writer.write(html) + + html_writer.end_section() + html_writer.end_section() + html_writer.end_section() + html_writer.end_section() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Process and categorize pytest warnings and output html report." + ) + parser.add_argument("--dir-path", default="test_root/log") + parser.add_argument("--html-path", default="test_html.html") + args = parser.parse_args() + data_output = process_warnings_json(args.dir_path) + data_dataframe = pd.DataFrame(data=data_output, columns=columns) + write_html_report(data_dataframe, args.html_path) diff --git a/openedx/core/pytest_hooks.py b/openedx/core/pytest_hooks.py new file mode 100644 index 0000000000..0ec2e3e996 --- /dev/null +++ b/openedx/core/pytest_hooks.py @@ -0,0 +1,79 @@ +""" +Module to put all pytest hooks that modify pytest behaviour +""" +import os +import io +import json + + +def pytest_json_modifyreport(json_report): + """ + - The function is called by pytest-json-report plugin to only output warnings in json format. + - Everything else is removed due to it already being saved by junitxml + - --json-omit flag in does not allow us to remove everything but the warnings + - (the environment metadata is one example of unremoveable data) + - The json warning outputs are meant to be read by jenkins + """ + warnings_flag = "warnings" + if warnings_flag in json_report: + warnings = json_report[warnings_flag] + json_report.clear() + json_report[warnings_flag] = warnings + else: + json_report = {} + return json_report + + +def create_file_name(dir_path, file_name_postfix, num=0): + """ + Used to create file name with this given + structure: TEST_SUITE + "_" + file_name_postfix + "_ " + num.json + The env variable TEST_SUITE is set in jenkinsfile + + This was necessary cause Pytest is run multiple times and we need to make sure old pytest + warning json files are not being overwritten. + """ + name = dir_path + "/" + if "TEST_SUITE" in os.environ: + name += os.environ["TEST_SUITE"] + "_" + name += file_name_postfix + if num != 0: + name += "_" + str(num) + return name + ".json" + + +def pytest_sessionfinish(session): + """ + Since multiple pytests are running, + this makes sure warnings from different run are not overwritten + """ + dir_path = "test_root/log" + file_name_postfix = "pytest_warnings" + num = 0 + # to make sure this doesn't loop forever, putting a maximum + while ( + os.path.isfile(create_file_name(dir_path, file_name_postfix, num)) and num < 100 + ): + num += 1 + + report = session.config._json_report.report # noqa pylint: disable=protected-access + + with io.open(create_file_name(dir_path, file_name_postfix, num), "w") as outfile: + json.dump(report, outfile) + + +class DeferPlugin(object): + """Simple plugin to defer pytest-xdist hook functions.""" + + def pytest_json_modifyreport(self, json_report): + """standard xdist hook function. + """ + return pytest_json_modifyreport(json_report) + + def pytest_sessionfinish(self, session): + return pytest_sessionfinish(session) + + +def pytest_configure(config): + if config.pluginmanager.hasplugin("json-report"): + config.pluginmanager.register(DeferPlugin()) diff --git a/openedx/core/write_to_html.py b/openedx/core/write_to_html.py new file mode 100644 index 0000000000..108ea179a9 --- /dev/null +++ b/openedx/core/write_to_html.py @@ -0,0 +1,98 @@ +""" +Class used to write pytest warning data into html format +""" +import textwrap +import six + + +class HtmlOutlineWriter(object): + """ + writer to handle html writing + """ + HEAD = textwrap.dedent( + u""" + + + + + + + + """ + ) + + SECTION_START = textwrap.dedent( + u"""\ +