Use beautiful soup to massage the HTML for easier comparison.

Strip off the data-id tag for HTML diff of courseware Strip data-id attributes so they are not compared in the HTML
2012-12-04 12:09:50 -05:00
parent 8934f54d6f
commit d65985b445
2 changed files with 51 additions and 3 deletions
--- a/lms/djangoapps/courseware/features/smart-accordion.py
+++ b/lms/djangoapps/courseware/features/smart-accordion.py
@@ -17,6 +17,9 @@ def check_for_errors():
@step(u'I verify all the content of each course')
 def i_verify_all_the_content_of_each_course(step):
    all_possible_courses = get_courses()
+    logger.debug('Courses found:')
+    for c in all_possible_courses:
+        logger.debug(c.id)
    ids = [c.id for c in all_possible_courses]

    # Get a list of all the registered courses
@@ -110,7 +113,7 @@ def browse_course(course_id):
            #logger.debug(msg)

            # Save the HTML to a file for later comparison
-            world.save_the_html()
+            world.save_the_course_content('/tmp/%s' % course_id)

            assert num_tabs == num_rendered_tabs, msg

@@ -146,4 +149,4 @@ def validate_course(current_course, ids):
    try:
        ids.index(current_course)
    except:
-        assert False, "invalid course id"
+        assert False, "invalid course id %s" % current_course
--- a/lms/djangoapps/terrain/common.py
+++ b/lms/djangoapps/terrain/common.py
@@ -6,9 +6,12 @@ from lettuce.django import django_url
 from django.conf import settings
 from django.contrib.auth.models import User
 from student.models import CourseEnrollment
-import time
 from urllib import quote_plus
 from nose.tools import assert_equals
+from bs4 import BeautifulSoup
+import time
+import re
+import os.path

 from logging import getLogger
 logger = getLogger(__name__)
@@ -109,6 +112,48 @@ def save_the_html(path='/tmp'):
    f.write(html)
    f.close

+@world.absorb
+def save_the_course_content(path='/tmp'):
+    html = world.browser.html.encode('ascii', 'ignore')
+    soup = BeautifulSoup(html)
+
+    # get rid of the header, we only want to compare the body
+    # soup.head.decompose()
+
+    # for now, remove the data-id attributes, because they are 
+    # causing mismatches between cms-master and master
+    for item in soup.find_all(attrs={'data-id': re.compile('.*')}):
+        del item['data-id']
+
+    # we also need to remove them from unrendered problems, 
+    # where they are contained in the text of divs instead of
+    # in attributes of tags
+    # Be careful of whether or not it was the last attribute
+    # and needs a trailing space
+    for item in soup.find_all(text=re.compile(' data-id=".*?" ')):
+        s = unicode(item.string)
+        item.string.replace_with(re.sub(' data-id=".*?" ', ' ', s))
+
+    for item in soup.find_all(text=re.compile(' data-id=".*?"')):
+        s = unicode(item.string)
+        item.string.replace_with(re.sub(' data-id=".*?"', ' ', s))
+
+    # prettify the html so it will compare better, with
+    # each HTML tag on its own line
+    output = soup.prettify()
+
+    # use string slicing to grab everything after 'courseware/' in the URL
+    u = world.browser.url
+    section_url = u[u.find('courseware/')+11:] 
+
+    if not os.path.exists(path):
+        os.makedirs(path)
+          
+    filename = '%s.html' % (quote_plus(section_url))
+    f = open('%s/%s' % (path, filename), 'w')
+    f.write(output)
+    f.close
+
 ###########  DEBUGGING ##############
@step(u'I save a screenshot to "(.*)"')
 def save_screenshot_to(step, filename):