Use html rather than xml parsing for course updates

This commit is contained in:
Don Mitchell
2013-01-08 15:13:37 -05:00
parent 7377f4bda3
commit 1708dc4080

View File

@@ -1,7 +1,7 @@
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.modulestore import Location
from xmodule.modulestore.django import modulestore
from lxml import etree
from lxml import html
import re
from django.http import HttpResponseBadRequest
import logging
@@ -24,9 +24,9 @@ def get_course_updates(location):
# purely to handle free formed updates not done via editor. Actually kills them, but at least doesn't break.
try:
course_html_parsed = etree.fromstring(course_updates.definition['data'])
except etree.XMLSyntaxError:
course_html_parsed = etree.fromstring("<ol></ol>")
course_html_parsed = html.fromstring(course_updates.definition['data'])
except:
course_html_parsed = html.fromstring("<ol></ol>")
# Confirm that root is <ol>, iterate over <li>, pull out <h2> subs and then rest of val
course_upd_collection = []
@@ -39,7 +39,7 @@ def get_course_updates(location):
# could enforce that update[0].tag == 'h2'
content = update[0].tail
else:
content = "\n".join([etree.tostring(ele) for ele in update[1:]])
content = "\n".join([html.tostring(ele) for ele in update[1:]])
# make the id on the client be 1..len w/ 1 being the oldest and len being the newest
course_upd_collection.append({"id" : location_base + "/" + str(len(course_html_parsed) - idx),
@@ -61,12 +61,12 @@ def update_course_updates(location, update, passed_id=None):
# purely to handle free formed updates not done via editor. Actually kills them, but at least doesn't break.
try:
course_html_parsed = etree.fromstring(course_updates.definition['data'])
except etree.XMLSyntaxError:
course_html_parsed = etree.fromstring("<ol></ol>")
course_html_parsed = html.fromstring(course_updates.definition['data'])
except:
course_html_parsed = html.fromstring("<ol></ol>")
# No try/catch b/c failure generates an error back to client
new_html_parsed = etree.fromstring('<li><h2>' + update['date'] + '</h2>' + update['content'] + '</li>')
new_html_parsed = html.fromstring('<li><h2>' + update['date'] + '</h2>' + update['content'] + '</li>')
# Confirm that root is <ol>, iterate over <li>, pull out <h2> subs and then rest of val
if course_html_parsed.tag == 'ol':
@@ -82,7 +82,7 @@ def update_course_updates(location, update, passed_id=None):
passed_id = course_updates.location.url() + "/" + str(idx)
# update db record
course_updates.definition['data'] = etree.tostring(course_html_parsed)
course_updates.definition['data'] = html.tostring(course_html_parsed)
modulestore('direct').update_item(location, course_updates.definition['data'])
return {"id" : passed_id,
@@ -105,9 +105,9 @@ def delete_course_update(location, update, passed_id):
# TODO use delete_blank_text parser throughout and cache as a static var in a class
# purely to handle free formed updates not done via editor. Actually kills them, but at least doesn't break.
try:
course_html_parsed = etree.fromstring(course_updates.definition['data'])
except etree.XMLSyntaxError:
course_html_parsed = etree.fromstring("<ol></ol>")
course_html_parsed = html.fromstring(course_updates.definition['data'])
except:
course_html_parsed = html.fromstring("<ol></ol>")
if course_html_parsed.tag == 'ol':
# ??? Should this use the id in the json or in the url or does it matter?
@@ -118,7 +118,7 @@ def delete_course_update(location, update, passed_id):
course_html_parsed.remove(element_to_delete)
# update db record
course_updates.definition['data'] = etree.tostring(course_html_parsed)
course_updates.definition['data'] = html.tostring(course_html_parsed)
store = modulestore('direct')
store.update_item(location, course_updates.definition['data'])