feat: Add DOI-specific headers for link validation (#37246)
This commit is contained in:
@@ -12,6 +12,7 @@ import tarfile
|
||||
from datetime import datetime, timezone
|
||||
from importlib.metadata import entry_points
|
||||
from tempfile import NamedTemporaryFile, mkdtemp
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiohttp
|
||||
import olxcleaner
|
||||
@@ -56,16 +57,16 @@ from cms.djangoapps.contentstore.toggles import enable_course_optimizer_check_pr
|
||||
from cms.djangoapps.contentstore.utils import (
|
||||
IMPORTABLE_FILE_TYPES,
|
||||
contains_previous_course_reference,
|
||||
get_previous_run_course_key,
|
||||
create_course_info_usage_key,
|
||||
create_or_update_xblock_upstream_link,
|
||||
delete_course,
|
||||
get_previous_run_course_key,
|
||||
initialize_permissions,
|
||||
reverse_usage_url,
|
||||
translation_language
|
||||
)
|
||||
from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info
|
||||
from cms.djangoapps.models.settings.course_metadata import CourseMetadata
|
||||
from cms.djangoapps.contentstore.utils import create_course_info_usage_key
|
||||
from common.djangoapps.course_action_state.models import CourseRerunState
|
||||
from common.djangoapps.static_replace import replace_static_urls
|
||||
from common.djangoapps.student.auth import has_course_author_access
|
||||
@@ -116,6 +117,18 @@ DEFAULT_HEADERS = {
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
# DOI-specific headers
|
||||
DOI_HEADERS = {
|
||||
"User-Agent": DEFAULT_HEADERS["User-Agent"],
|
||||
"Accept": "application/vnd.citationstyles.csl+json",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
||||
# Domain-specific header mapping
|
||||
DOMAIN_HEADERS = {
|
||||
"doi.org": DOI_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
class LinkState:
|
||||
"""
|
||||
@@ -1434,7 +1447,7 @@ async def _validate_urls_access_in_batches(url_list, course_key, batch_size=100)
|
||||
|
||||
async def _validate_batch(batch, course_key):
|
||||
"""Validate a batch of URLs"""
|
||||
async with aiohttp.ClientSession(headers=DEFAULT_HEADERS) as session:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = [_validate_url_access(session, url_data, course_key) for url_data in batch]
|
||||
batch_results = await asyncio.gather(*tasks)
|
||||
return batch_results
|
||||
@@ -1462,8 +1475,17 @@ async def _validate_url_access(session, url_data, course_key):
|
||||
url = url.strip() # Trim leading/trailing whitespace
|
||||
result = {'block_id': block_id, 'url': url}
|
||||
standardized_url = _convert_to_standard_url(url, course_key)
|
||||
|
||||
try:
|
||||
async with session.get(standardized_url, timeout=5) as response:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
headers = DOMAIN_HEADERS.get(domain, DEFAULT_HEADERS)
|
||||
except Exception as e: # lint-amnesty, pylint: disable=broad-except
|
||||
LOGGER.debug(f'[Link Check] Error parsing URL {url}: {str(e)}')
|
||||
headers = DEFAULT_HEADERS
|
||||
|
||||
try:
|
||||
async with session.get(standardized_url, headers=headers, timeout=5) as response:
|
||||
result.update({'status': response.status})
|
||||
except Exception as e: # lint-amnesty, pylint: disable=broad-except
|
||||
result.update({'status': None})
|
||||
|
||||
Reference in New Issue
Block a user