Merge pull request #13225 from singingwolfboy/update-pa11ycrawler

Use updated, simplifed version of pa11ycrawler
2016-08-16 08:51:05 -04:00
parent ffbaf1fe54 e0cddf9750
commit 307abcad5b
4 changed files with 118 additions and 61 deletions
--- a/pavelib/bok_choy.py
+++ b/pavelib/bok_choy.py
@@ -124,7 +124,7 @@ def pa11ycrawler(options, passthrough_options):
        not options.get('fasttest')
    )
    options.pa11ycrawler.course_key = getattr(options, 'course-key', "course-v1:edX+Test101+course")
-    test_suite = Pa11yCrawler('a11y_crawler', passthrough_options=passthrough_options, **options.pa11ycrawler)
+    test_suite = Pa11yCrawler('pa11ycrawler', passthrough_options=passthrough_options, **options.pa11ycrawler)
    test_suite.run()

    if getattr(options, 'with_html', False):
--- a/pavelib/paver_tests/test_paver_bok_choy_cmds.py
+++ b/pavelib/paver_tests/test_paver_bok_choy_cmds.py
@@ -4,9 +4,10 @@ Run just this test with: paver test_lib -t pavelib/paver_tests/test_paver_bok_ch
 """
 import os
 import unittest
+from textwrap import dedent

 import ddt
-from mock import patch, call
+from mock import patch, call, Mock
 from test.test_support import EnvironmentVarGuard
 from paver.easy import BuildFailure, call_task, environment
 from pavelib.utils.test.suites import BokChoyTestSuite, Pa11yCrawler
@@ -196,28 +197,30 @@ class TestPaverPa11yCrawlerCmd(unittest.TestCase):
        # reset the options for all tasks
        environment.options.clear()

-    def _expected_command(self, report_dir, start_urls):
-        """
-        Returns the expected command to run pa11ycrawler.
-        """
-        expected_statement = [
-            'pa11ycrawler',
-            'run',
-        ] + start_urls + [
-            '--pa11ycrawler-allowed-domains=localhost',
-            '--pa11ycrawler-reports-dir={}'.format(report_dir),
-            '--pa11ycrawler-deny-url-matcher=logout',
-            '--pa11y-reporter="1.0-json"',
-            '--depth-limit=6',
-        ]
-        return expected_statement
-
    def test_default(self):
-        suite = Pa11yCrawler('')
-        self.assertEqual(
-            suite.cmd,
-            self._expected_command(suite.pa11y_report_dir, suite.start_urls)
+        suite = Pa11yCrawler(
+            'pa11ycrawler', course_key="course-v1:edX+Test101+course",
        )
+        expected_cmd = [
+            "scrapy",
+            "crawl",
+            "edx",
+            "-a",
+            "port=8003",
+            "-a",
+            "course_key=course-v1:edX+Test101+course",
+            "-a",
+            "data_dir=/edx/app/edxapp/edx-platform/reports/pa11ycrawler/data",
+        ]
+        actual_cmd = suite.cmd
+        # verify that the final section of this command is for specifying the
+        # data directory
+        self.assertEqual(actual_cmd[-2], "-a")
+        self.assertTrue(actual_cmd[-1].startswith("data_dir="))
+        # chop off the `data_dir` argument when comparing,
+        # since it is highly dependent on who is running this paver command,
+        # and where it's being run (devstack, jenkins, etc)
+        self.assertEqual(actual_cmd[0:-2], expected_cmd[0:-2])

    @ddt.data(
        (True, True, None),
@@ -245,10 +248,55 @@ class TestPaverPa11yCrawlerCmd(unittest.TestCase):
                    'tar zxf {dir}demo_course.tar.gz -C {dir}'.format(dir=downloaded_to)),
            ])

+    @patch("pavelib.utils.test.suites.bokchoy_suite.path")
+    def test_scrapy_cfg_exists(self, mocked_path_func):
+        # setup
+        mock_path = Mock()
+        mock_path.expand.return_value = mock_path
+        mock_path.isfile.return_value = True
+        mocked_path_func.return_value = mock_path
+
+        # test
+        Pa11yCrawler('pa11ycrawler')
+
+        # check
+        mocked_path_func.assert_called_with("~/.config/scrapy.cfg")
+        self.assertTrue(mock_path.isfile.called)
+        self.assertFalse(mock_path.write_text.called)
+
+    @patch("pavelib.utils.test.suites.bokchoy_suite.path")
+    def test_scrapy_cfg_not_exists(self, mocked_path_func):
+        # setup
+        mock_path = Mock()
+        mock_path.expand.return_value = mock_path
+        mock_path.isfile.return_value = False
+        mocked_path_func.return_value = mock_path
+
+        # test
+        Pa11yCrawler('pa11ycrawler')
+
+        # check
+        mocked_path_func.assert_called_with("~/.config/scrapy.cfg")
+        self.assertTrue(mock_path.isfile.called)
+        self.assertTrue(mock_path.parent.makedirs_p.called)
+        content = dedent("""
+            [settings]
+            default = pa11ycrawler.settings
+
+            [deploy]
+            project = pa11ycrawler
+        """)
+        mock_path.write_text.assert_called_with(content)
+
    def test_generate_html_reports(self):
-        suite = Pa11yCrawler('')
+        suite = Pa11yCrawler('pa11ycrawler')
        suite.generate_html_reports()
        self._mock_sh.assert_has_calls([
-            call(
-                'pa11ycrawler json-to-html --pa11ycrawler-reports-dir={}'.format(suite.pa11y_report_dir)),
+            call([
+                'pa11ycrawler-html',
+                '--data-dir',
+                os.path.join(suite.report_dir, "data"),
+                '--output-dir',
+                os.path.join(suite.report_dir, "html"),
+            ])
        ])
--- a/pavelib/utils/test/suites/bokchoy_suite.py
+++ b/pavelib/utils/test/suites/bokchoy_suite.py
@@ -3,6 +3,7 @@ Class used for defining and running Bok Choy acceptance test suite
 """
 from time import sleep
 from urllib import urlencode
+from textwrap import dedent

 from common.test.acceptance.fixtures.course import CourseFixture, FixtureError

@@ -349,51 +350,59 @@ class Pa11yCrawler(BokChoyTestSuite):
    def __init__(self, *args, **kwargs):
        super(Pa11yCrawler, self).__init__(*args, **kwargs)
        self.course_key = kwargs.get('course_key')
-        self.pa11y_report_dir = os.path.join(self.report_dir, 'pa11ycrawler_reports')
+        self.ensure_scrapy_cfg()

-        self.start_urls = []
-        auto_auth_params = {
-            "redirect": 'true',
-            "staff": 'true',
-            "course_id": self.course_key,
-        }
-        cms_params = urlencode(auto_auth_params)
-        self.start_urls.append("\"http://localhost:8031/auto_auth?{}\"".format(cms_params))
+    def ensure_scrapy_cfg(self):
+        """
+        Scrapy requires a few configuration settings in order to run:
+        http://doc.scrapy.org/en/1.1/topics/commands.html#configuration-settings
+        This method ensures they are correctly written to the filesystem
+        in a location where Scrapy knows to look for them.

-        sequence_url = "/api/courses/v1/blocks/?{}".format(
-            urlencode({
-                "course_id": self.course_key,
-                "depth": "all",
-                "all_blocks": "true",
-            })
-        )
-        auto_auth_params.update({'redirect_to': sequence_url})
-        lms_params = urlencode(auto_auth_params)
-        self.start_urls.append("\"http://localhost:8003/auto_auth?{}\"".format(lms_params))
+        Returns True if the file was created, or False if the file already
+        exists (in which case it was not modified.)
+        """
+        cfg_file = path("~/.config/scrapy.cfg").expand()
+        if cfg_file.isfile():
+            return False
+        cfg_file.parent.makedirs_p()
+        content = dedent("""
+            [settings]
+            default = pa11ycrawler.settings
+
+            [deploy]
+            project = pa11ycrawler
+        """)
+        cfg_file.write_text(content)
+        return True

    def generate_html_reports(self):
        """
-        Runs pa11ycrawler json-to-html
+        Runs pa11ycrawler-html
        """
-        cmd_str = (
-            'pa11ycrawler json-to-html --pa11ycrawler-reports-dir={report_dir}'
-        ).format(report_dir=self.pa11y_report_dir)
-
-        sh(cmd_str)
+        command = [
+            'pa11ycrawler-html',
+            '--data-dir',
+            os.path.join(self.report_dir, 'data'),
+            '--output-dir',
+            os.path.join(self.report_dir, 'html'),
+        ]
+        sh(command)

    @property
    def cmd(self):
        """
        Runs pa11ycrawler as staff user against the test course.
        """
-        cmd = [
-            'pa11ycrawler',
-            'run',
-        ] + self.start_urls + [
-            '--pa11ycrawler-allowed-domains=localhost',
-            '--pa11ycrawler-reports-dir={}'.format(self.pa11y_report_dir),
-            '--pa11ycrawler-deny-url-matcher=logout',
-            '--pa11y-reporter="1.0-json"',
-            '--depth-limit=6',
+        data_dir = os.path.join(self.report_dir, 'data')
+        return [
+            "scrapy",
+            "crawl",
+            "edx",
+            "-a",
+            "port=8003",
+            "-a",
+            "course_key={key}".format(key=self.course_key),
+            "-a",
+            "data_dir={dir}".format(dir=data_dir)
        ]
-        return cmd
--- a/requirements/edx/github.txt
+++ b/requirements/edx/github.txt
@@ -69,7 +69,7 @@ git+https://github.com/edx/rfc6266.git@v0.0.5-edx#egg=rfc6266==0.0.5-edx

 # Used for testing
 git+https://github.com/edx/lettuce.git@0.2.20.002#egg=lettuce==0.2.20.002
-git+https://github.com/edx/pa11ycrawler.git@0.0.4#egg=pa11ycrawler==0.0.4
+git+https://github.com/edx/pa11ycrawler.git@1.0.0#egg=pa11ycrawler==1.0.0

 # Our libraries:
 git+https://github.com/edx/XBlock.git@xblock-0.4.12#egg=XBlock==0.4.12