Files
edx-platform/openedx/features/effort_estimation/block_transformers.py
Feanil Patel 8143796b26 docs: update references from setup.py to pyproject.toml
Update documentation, comments, and docstrings throughout the codebase
to reflect the migration from setup.py to pyproject.toml:

- Transformer class docstrings: changed to reference "entry point name
  in the package configuration" for better future-proofing
- Block structure module docs: updated to reference pyproject.toml
- Test file comments: updated entry point references
- Config files (tox.ini, pytest.ini): updated references
- Documentation (extension_points.rst, course apps ADRs): updated to
  reference pyproject.toml with inclusive language for external packages
- Requirements documentation (github.in): updated with inclusive language
- edxmako README: modernized install command to use pip install

Historical ADRs and references to external packages that may still use
setup.py were intentionally left unchanged or updated with inclusive
language acknowledging both pyproject.toml and setup.py.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-03 10:46:16 -05:00

225 lines
10 KiB
Python

"""
Effort Estimation Transformer implementation.
Adds effort estimations for block types it recognizes.
"""
import math
import crum
import lxml.html
from django.utils.functional import cached_property
from edxval.api import get_videos_for_course
from openedx.core.djangoapps.content.block_structure.transformer import BlockStructureTransformer
from openedx.core.lib.mobile_utils import is_request_from_mobile_app
from .toggles import EFFORT_ESTIMATION_DISABLED_FLAG
class EffortEstimationTransformer(BlockStructureTransformer):
"""
A transformer that adds effort estimation to the block tree.
There are two fields added by this transformer:
- effort_activities: The number of "activities" at this block or lower. Note that verticals count as a single
activity at most. Activities are basically anything that isn't text or video.
- effort_time: Our best guess at how long the block and lower will take, in seconds. We use an estimated reading
speed and video duration to calculate this. Just a rough guide.
If there is any missing data (like no video duration), we don't provide any estimates at all for the course.
We'd rather provide no estimate than a misleading estimate.
This transformer requires data gathered during the collection phase (from a course publish), so it won't work
on a course until the next publish.
"""
WRITE_VERSION = 1
READ_VERSION = 1
# Public xblock field names
EFFORT_ACTIVITIES = 'effort_activities'
EFFORT_TIME = 'effort_time'
# Private transformer field names
DISABLE_ESTIMATION = 'disable_estimation'
HTML_WORD_COUNT = 'html_word_count'
VIDEO_CLIP_DURATION = 'video_clip_duration'
VIDEO_DURATION = 'video_duration'
CACHE_VIDEO_DURATIONS = 'video.durations'
DEFAULT_WPM = 265 # words per minute
class MissingEstimationData(Exception):
pass
@classmethod
def name(cls):
"""
Unique identifier for the transformer's class.
This must match the entry point name in the package configuration.
"""
return 'effort_estimation'
@classmethod
def collect(cls, block_structure):
"""
Grabs raw estimates for leaf content.
Pooling leaf estimates higher up the tree (e.g. in verticals, then sequentials, then chapters) is done by
transform() below at run time, because which blocks each user sees can be different.
"""
block_structure.request_xblock_fields('category')
block_structure.request_xblock_fields('global_speed', 'only_on_web') # video fields
collection_cache = {} # collection methods can stuff some temporary data here
collections = {
'html': cls._collect_html_effort,
'video': cls._collect_video_effort,
}
try:
for block_key in block_structure.topological_traversal():
xblock = block_structure.get_xblock(block_key)
if xblock.category in collections:
collections[xblock.category](block_structure, block_key, xblock, collection_cache)
except cls.MissingEstimationData:
# Some bit of required data is missing. Likely some duration info is missing from the video pipeline.
# Rather than attempt to work around it, just set a note for ourselves to not show durations for this
# course at all. Better no estimate than a misleading estimate.
block_structure.set_transformer_data(cls, cls.DISABLE_ESTIMATION, True)
@classmethod
def _collect_html_effort(cls, block_structure, block_key, xblock, _cache):
"""Records a word count for later reading speed calculations."""
try:
text = lxml.html.fromstring(xblock.data).text_content() if xblock.data else ''
except Exception as exc: # pylint: disable=broad-except
raise cls.MissingEstimationData() from exc
block_structure.set_transformer_block_field(block_key, cls, cls.HTML_WORD_COUNT, len(text.split()))
@classmethod
def _collect_video_effort(cls, block_structure, block_key, xblock, cache):
"""Records a duration for later viewing speed calculations."""
# Lookup all course video metadata at once rather than piecemeal, for performance reasons
if cls.CACHE_VIDEO_DURATIONS not in cache:
all_videos, _ = get_videos_for_course(str(block_structure.root_block_usage_key.course_key))
cache[cls.CACHE_VIDEO_DURATIONS] = {v['edx_video_id']: v['duration'] for v in all_videos}
# Check if we have a duration. If not, raise an exception that will stop this transformer from affecting
# this course.
duration = cache[cls.CACHE_VIDEO_DURATIONS].get(xblock.edx_video_id, 0)
if duration <= 0:
raise cls.MissingEstimationData()
block_structure.set_transformer_block_field(block_key, cls, cls.VIDEO_DURATION, duration)
# Some videos will suggest specific start & end times, rather than the whole video. Note that this is only
# supported in some clients (other clients - like the mobile app - will play the whole video anyway). So we
# record this duration separately, to use instead of the whole video duration if the client supports it.
clip_duration = (xblock.end_time - xblock.start_time).total_seconds()
if clip_duration > 0:
block_structure.set_transformer_block_field(block_key, cls, cls.VIDEO_CLIP_DURATION, clip_duration)
def transform(self, usage_info, block_structure):
# Early exit if our per-course opt-out flag is enabled
if EFFORT_ESTIMATION_DISABLED_FLAG.is_enabled(block_structure.root_block_usage_key.course_key):
return
# Skip any transformation if our collection phase said to
cls = EffortEstimationTransformer
if block_structure.get_transformer_data(cls, cls.DISABLE_ESTIMATION, default=False):
return
# These estimation methods should return a tuple of (a number in seconds, an activity count)
estimations = {
'chapter': self._estimate_children_effort,
'course': self._estimate_children_effort,
'html': self._estimate_html_effort,
'sequential': self._estimate_children_effort,
'vertical': self._estimate_vertical_effort,
'video': self._estimate_video_effort,
}
# We're good to continue and make user-specific estimates based on collected data
for block_key in block_structure.post_order_traversal():
category = block_structure.get_xblock_field(block_key, 'category')
if category not in estimations:
continue
time, activities = estimations[category](usage_info, block_structure, block_key)
if time is not None:
# We take the ceiling of the estimate here just for cleanliness. Losing the fractional seconds does
# technically make our estimate less accurate, especially as we combine these values in parents.
# But easier to present a simple integer to any consumers, and precise to-the-second accuracy on our
# estimate is not a primary goal.
block_structure.override_xblock_field(block_key, self.EFFORT_TIME, math.ceil(time))
if activities is not None:
block_structure.override_xblock_field(block_key, self.EFFORT_ACTIVITIES, activities)
@cached_property
def _is_on_mobile(self):
"""Returns whether the current request is from our mobile app."""
request = crum.get_current_request()
return request and is_request_from_mobile_app(request)
def _gather_child_values(self, block_structure, block_key, field, default=0):
"""Collects and sums all child values for field."""
return sum(
block_structure.get_xblock_field(child_key, field, default=default)
for child_key in block_structure.get_children(block_key)
)
def _estimate_children_effort(self, _usage_info, block_structure, block_key):
"""Collects time and activity counts for children."""
time = self._gather_child_values(block_structure, block_key, self.EFFORT_TIME)
time = time or None # avoid claiming anything takes 0 seconds by coercing to None (no estimate) instead
# Use 1 as the default for activity - any block that we don't know for sure is 0, we should count
activities = self._gather_child_values(block_structure, block_key, self.EFFORT_ACTIVITIES, default=1)
return time, activities
def _estimate_html_effort(self, _usage_info, block_structure, block_key):
"""Returns an average expected time to read the contained html."""
cls = EffortEstimationTransformer
word_count = block_structure.get_transformer_block_field(block_key, cls, self.HTML_WORD_COUNT)
if not word_count:
return None, 0
time = word_count / self.DEFAULT_WPM * 60 # in seconds
return time, 0
def _estimate_vertical_effort(self, usage_info, block_structure, block_key):
"""A vertical is either an amount of time if we know it, or an activity"""
time, activities = self._estimate_children_effort(usage_info, block_structure, block_key)
# Verticals are the basic activity metric - we may have collected all unknown xblocks as activities in the call
# above, but we reset that count to 1 here.
return time, 1 if activities else 0
def _estimate_video_effort(self, _usage_info, block_structure, block_key):
"""Returns an expected time to view the video, at the user's preferred speed."""
cls = EffortEstimationTransformer
clip_duration = block_structure.get_transformer_block_field(block_key, cls, self.VIDEO_CLIP_DURATION)
duration = block_structure.get_transformer_block_field(block_key, cls, self.VIDEO_DURATION)
global_speed = block_structure.get_xblock_field(block_key, 'global_speed', default=1)
only_on_web = block_structure.get_xblock_field(block_key, 'only_on_web', default=False)
if self._is_on_mobile:
if only_on_web:
return None, 0
clip_duration = None # mobile can't do clips
user_duration = clip_duration or duration
if not user_duration:
return None, 0
# We are intentionally only looking at global_speed, not speed (which is last speed user used on this video)
# because this estimate is meant to be somewhat static.
return user_duration / global_speed, 0