Update documentation, comments, and docstrings throughout the codebase to reflect the migration from setup.py to pyproject.toml: - Transformer class docstrings: changed to reference "entry point name in the package configuration" for better future-proofing - Block structure module docs: updated to reference pyproject.toml - Test file comments: updated entry point references - Config files (tox.ini, pytest.ini): updated references - Documentation (extension_points.rst, course apps ADRs): updated to reference pyproject.toml with inclusive language for external packages - Requirements documentation (github.in): updated with inclusive language - edxmako README: modernized install command to use pip install Historical ADRs and references to external packages that may still use setup.py were intentionally left unchanged or updated with inclusive language acknowledging both pyproject.toml and setup.py. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
225 lines
10 KiB
Python
225 lines
10 KiB
Python
"""
|
|
Effort Estimation Transformer implementation.
|
|
Adds effort estimations for block types it recognizes.
|
|
"""
|
|
|
|
import math
|
|
|
|
import crum
|
|
import lxml.html
|
|
from django.utils.functional import cached_property
|
|
from edxval.api import get_videos_for_course
|
|
|
|
from openedx.core.djangoapps.content.block_structure.transformer import BlockStructureTransformer
|
|
from openedx.core.lib.mobile_utils import is_request_from_mobile_app
|
|
|
|
from .toggles import EFFORT_ESTIMATION_DISABLED_FLAG
|
|
|
|
|
|
class EffortEstimationTransformer(BlockStructureTransformer):
|
|
"""
|
|
A transformer that adds effort estimation to the block tree.
|
|
|
|
There are two fields added by this transformer:
|
|
- effort_activities: The number of "activities" at this block or lower. Note that verticals count as a single
|
|
activity at most. Activities are basically anything that isn't text or video.
|
|
- effort_time: Our best guess at how long the block and lower will take, in seconds. We use an estimated reading
|
|
speed and video duration to calculate this. Just a rough guide.
|
|
|
|
If there is any missing data (like no video duration), we don't provide any estimates at all for the course.
|
|
We'd rather provide no estimate than a misleading estimate.
|
|
|
|
This transformer requires data gathered during the collection phase (from a course publish), so it won't work
|
|
on a course until the next publish.
|
|
"""
|
|
WRITE_VERSION = 1
|
|
READ_VERSION = 1
|
|
|
|
# Public xblock field names
|
|
EFFORT_ACTIVITIES = 'effort_activities'
|
|
EFFORT_TIME = 'effort_time'
|
|
|
|
# Private transformer field names
|
|
DISABLE_ESTIMATION = 'disable_estimation'
|
|
HTML_WORD_COUNT = 'html_word_count'
|
|
VIDEO_CLIP_DURATION = 'video_clip_duration'
|
|
VIDEO_DURATION = 'video_duration'
|
|
|
|
CACHE_VIDEO_DURATIONS = 'video.durations'
|
|
DEFAULT_WPM = 265 # words per minute
|
|
|
|
class MissingEstimationData(Exception):
|
|
pass
|
|
|
|
@classmethod
|
|
def name(cls):
|
|
"""
|
|
Unique identifier for the transformer's class.
|
|
This must match the entry point name in the package configuration.
|
|
"""
|
|
return 'effort_estimation'
|
|
|
|
@classmethod
|
|
def collect(cls, block_structure):
|
|
"""
|
|
Grabs raw estimates for leaf content.
|
|
|
|
Pooling leaf estimates higher up the tree (e.g. in verticals, then sequentials, then chapters) is done by
|
|
transform() below at run time, because which blocks each user sees can be different.
|
|
"""
|
|
block_structure.request_xblock_fields('category')
|
|
block_structure.request_xblock_fields('global_speed', 'only_on_web') # video fields
|
|
|
|
collection_cache = {} # collection methods can stuff some temporary data here
|
|
|
|
collections = {
|
|
'html': cls._collect_html_effort,
|
|
'video': cls._collect_video_effort,
|
|
}
|
|
|
|
try:
|
|
for block_key in block_structure.topological_traversal():
|
|
xblock = block_structure.get_xblock(block_key)
|
|
|
|
if xblock.category in collections:
|
|
collections[xblock.category](block_structure, block_key, xblock, collection_cache)
|
|
|
|
except cls.MissingEstimationData:
|
|
# Some bit of required data is missing. Likely some duration info is missing from the video pipeline.
|
|
# Rather than attempt to work around it, just set a note for ourselves to not show durations for this
|
|
# course at all. Better no estimate than a misleading estimate.
|
|
block_structure.set_transformer_data(cls, cls.DISABLE_ESTIMATION, True)
|
|
|
|
@classmethod
|
|
def _collect_html_effort(cls, block_structure, block_key, xblock, _cache):
|
|
"""Records a word count for later reading speed calculations."""
|
|
try:
|
|
text = lxml.html.fromstring(xblock.data).text_content() if xblock.data else ''
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
raise cls.MissingEstimationData() from exc
|
|
|
|
block_structure.set_transformer_block_field(block_key, cls, cls.HTML_WORD_COUNT, len(text.split()))
|
|
|
|
@classmethod
|
|
def _collect_video_effort(cls, block_structure, block_key, xblock, cache):
|
|
"""Records a duration for later viewing speed calculations."""
|
|
# Lookup all course video metadata at once rather than piecemeal, for performance reasons
|
|
if cls.CACHE_VIDEO_DURATIONS not in cache:
|
|
all_videos, _ = get_videos_for_course(str(block_structure.root_block_usage_key.course_key))
|
|
cache[cls.CACHE_VIDEO_DURATIONS] = {v['edx_video_id']: v['duration'] for v in all_videos}
|
|
|
|
# Check if we have a duration. If not, raise an exception that will stop this transformer from affecting
|
|
# this course.
|
|
duration = cache[cls.CACHE_VIDEO_DURATIONS].get(xblock.edx_video_id, 0)
|
|
if duration <= 0:
|
|
raise cls.MissingEstimationData()
|
|
|
|
block_structure.set_transformer_block_field(block_key, cls, cls.VIDEO_DURATION, duration)
|
|
|
|
# Some videos will suggest specific start & end times, rather than the whole video. Note that this is only
|
|
# supported in some clients (other clients - like the mobile app - will play the whole video anyway). So we
|
|
# record this duration separately, to use instead of the whole video duration if the client supports it.
|
|
clip_duration = (xblock.end_time - xblock.start_time).total_seconds()
|
|
if clip_duration > 0:
|
|
block_structure.set_transformer_block_field(block_key, cls, cls.VIDEO_CLIP_DURATION, clip_duration)
|
|
|
|
def transform(self, usage_info, block_structure):
|
|
# Early exit if our per-course opt-out flag is enabled
|
|
if EFFORT_ESTIMATION_DISABLED_FLAG.is_enabled(block_structure.root_block_usage_key.course_key):
|
|
return
|
|
|
|
# Skip any transformation if our collection phase said to
|
|
cls = EffortEstimationTransformer
|
|
if block_structure.get_transformer_data(cls, cls.DISABLE_ESTIMATION, default=False):
|
|
return
|
|
|
|
# These estimation methods should return a tuple of (a number in seconds, an activity count)
|
|
estimations = {
|
|
'chapter': self._estimate_children_effort,
|
|
'course': self._estimate_children_effort,
|
|
'html': self._estimate_html_effort,
|
|
'sequential': self._estimate_children_effort,
|
|
'vertical': self._estimate_vertical_effort,
|
|
'video': self._estimate_video_effort,
|
|
}
|
|
|
|
# We're good to continue and make user-specific estimates based on collected data
|
|
for block_key in block_structure.post_order_traversal():
|
|
category = block_structure.get_xblock_field(block_key, 'category')
|
|
if category not in estimations:
|
|
continue
|
|
|
|
time, activities = estimations[category](usage_info, block_structure, block_key)
|
|
|
|
if time is not None:
|
|
# We take the ceiling of the estimate here just for cleanliness. Losing the fractional seconds does
|
|
# technically make our estimate less accurate, especially as we combine these values in parents.
|
|
# But easier to present a simple integer to any consumers, and precise to-the-second accuracy on our
|
|
# estimate is not a primary goal.
|
|
block_structure.override_xblock_field(block_key, self.EFFORT_TIME, math.ceil(time))
|
|
|
|
if activities is not None:
|
|
block_structure.override_xblock_field(block_key, self.EFFORT_ACTIVITIES, activities)
|
|
|
|
@cached_property
|
|
def _is_on_mobile(self):
|
|
"""Returns whether the current request is from our mobile app."""
|
|
request = crum.get_current_request()
|
|
return request and is_request_from_mobile_app(request)
|
|
|
|
def _gather_child_values(self, block_structure, block_key, field, default=0):
|
|
"""Collects and sums all child values for field."""
|
|
return sum(
|
|
block_structure.get_xblock_field(child_key, field, default=default)
|
|
for child_key in block_structure.get_children(block_key)
|
|
)
|
|
|
|
def _estimate_children_effort(self, _usage_info, block_structure, block_key):
|
|
"""Collects time and activity counts for children."""
|
|
time = self._gather_child_values(block_structure, block_key, self.EFFORT_TIME)
|
|
time = time or None # avoid claiming anything takes 0 seconds by coercing to None (no estimate) instead
|
|
|
|
# Use 1 as the default for activity - any block that we don't know for sure is 0, we should count
|
|
activities = self._gather_child_values(block_structure, block_key, self.EFFORT_ACTIVITIES, default=1)
|
|
|
|
return time, activities
|
|
|
|
def _estimate_html_effort(self, _usage_info, block_structure, block_key):
|
|
"""Returns an average expected time to read the contained html."""
|
|
cls = EffortEstimationTransformer
|
|
word_count = block_structure.get_transformer_block_field(block_key, cls, self.HTML_WORD_COUNT)
|
|
if not word_count:
|
|
return None, 0
|
|
|
|
time = word_count / self.DEFAULT_WPM * 60 # in seconds
|
|
return time, 0
|
|
|
|
def _estimate_vertical_effort(self, usage_info, block_structure, block_key):
|
|
"""A vertical is either an amount of time if we know it, or an activity"""
|
|
time, activities = self._estimate_children_effort(usage_info, block_structure, block_key)
|
|
|
|
# Verticals are the basic activity metric - we may have collected all unknown xblocks as activities in the call
|
|
# above, but we reset that count to 1 here.
|
|
return time, 1 if activities else 0
|
|
|
|
def _estimate_video_effort(self, _usage_info, block_structure, block_key):
|
|
"""Returns an expected time to view the video, at the user's preferred speed."""
|
|
cls = EffortEstimationTransformer
|
|
clip_duration = block_structure.get_transformer_block_field(block_key, cls, self.VIDEO_CLIP_DURATION)
|
|
duration = block_structure.get_transformer_block_field(block_key, cls, self.VIDEO_DURATION)
|
|
global_speed = block_structure.get_xblock_field(block_key, 'global_speed', default=1)
|
|
only_on_web = block_structure.get_xblock_field(block_key, 'only_on_web', default=False)
|
|
|
|
if self._is_on_mobile:
|
|
if only_on_web:
|
|
return None, 0
|
|
clip_duration = None # mobile can't do clips
|
|
|
|
user_duration = clip_duration or duration
|
|
if not user_duration:
|
|
return None, 0
|
|
|
|
# We are intentionally only looking at global_speed, not speed (which is last speed user used on this video)
|
|
# because this estimate is meant to be somewhat static.
|
|
return user_duration / global_speed, 0
|