76 lines
2.8 KiB
Python
76 lines
2.8 KiB
Python
"""
|
|
Utility methods for the Shopping Cart app
|
|
"""
|
|
|
|
from django.conf import settings
|
|
from pdfminer.converter import PDFPageAggregator
|
|
from pdfminer.layout import LAParams, LTFigure, LTTextBox, LTTextLine
|
|
from pdfminer.pdfdocument import PDFDocument
|
|
from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
|
|
from pdfminer.pdfpage import PDFPage
|
|
from pdfminer.pdfparser import PDFParser
|
|
|
|
from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers
|
|
|
|
|
|
def is_shopping_cart_enabled():
|
|
"""
|
|
Utility method to check the various configuration to verify that
|
|
all of the settings have been enabled
|
|
"""
|
|
enable_paid_course_registration = configuration_helpers.get_value(
|
|
'ENABLE_PAID_COURSE_REGISTRATION',
|
|
settings.FEATURES.get('ENABLE_PAID_COURSE_REGISTRATION')
|
|
)
|
|
|
|
enable_shopping_cart = configuration_helpers.get_value(
|
|
'ENABLE_SHOPPING_CART',
|
|
settings.FEATURES.get('ENABLE_SHOPPING_CART')
|
|
)
|
|
|
|
return enable_paid_course_registration and enable_shopping_cart
|
|
|
|
|
|
def parse_pages(pdf_buffer, password):
|
|
"""
|
|
With an PDF buffer object, get the pages, parse each one, and return the entire pdf text
|
|
"""
|
|
# Create a PDF parser object associated with the file object.
|
|
parser = PDFParser(pdf_buffer)
|
|
# Create a PDF document object that stores the document structure.
|
|
# Supply the password for initialization.
|
|
document = PDFDocument(parser, password)
|
|
|
|
resource_manager = PDFResourceManager()
|
|
la_params = LAParams()
|
|
device = PDFPageAggregator(resource_manager, laparams=la_params)
|
|
interpreter = PDFPageInterpreter(resource_manager, device)
|
|
|
|
text_content = [] # a list of strings, each representing text collected from each page of the doc
|
|
for page in PDFPage.create_pages(document):
|
|
interpreter.process_page(page)
|
|
# receive the LTPage object for this page
|
|
layout = device.get_result()
|
|
# layout is an LTPage object which may contain
|
|
# child objects like LTTextBox, LTFigure, LTImage, etc.
|
|
text_content.append(parse_lt_objects(layout._objs)) # pylint: disable=protected-access
|
|
|
|
return text_content
|
|
|
|
|
|
def parse_lt_objects(lt_objects):
|
|
"""
|
|
Iterate through the list of LT* objects and capture the text data contained in each object
|
|
"""
|
|
text_content = []
|
|
|
|
for lt_object in lt_objects:
|
|
if isinstance(lt_object, LTTextBox) or isinstance(lt_object, LTTextLine):
|
|
# text
|
|
text_content.append(lt_object.get_text().encode('utf-8'))
|
|
elif isinstance(lt_object, LTFigure):
|
|
# LTFigure objects are containers for other LT* objects, so recurse through the children
|
|
text_content.append(parse_lt_objects(lt_object._objs)) # pylint: disable=protected-access
|
|
|
|
return '\n'.join(text_content)
|