28 lines
705 B
Python
28 lines
705 B
Python
"""Provides a function to convert html to plaintext."""
|
|
import logging
|
|
from subprocess import Popen, PIPE
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def html_to_text(html_message):
|
|
"""
|
|
Converts an html message to plaintext.
|
|
Currently uses lynx in a subprocess; should be refactored to
|
|
use something more pythonic.
|
|
"""
|
|
process = Popen(
|
|
['lynx', '-stdin', '-display_charset=UTF-8', '-assume_charset=UTF-8', '-dump'],
|
|
stdin=PIPE,
|
|
stdout=PIPE
|
|
)
|
|
# use lynx to get plaintext
|
|
(plaintext, err_from_stderr) = process.communicate(
|
|
input=html_message.encode('utf-8')
|
|
)
|
|
|
|
if err_from_stderr:
|
|
log.info(err_from_stderr)
|
|
|
|
return plaintext
|