build: Add missing Celery task decorators, and add CI check for it (#33154)

This commit is contained in:
Tim McCormack
2023-09-15 16:03:04 -04:00
committed by GitHub
parent 3db436be0a
commit 25b18e83cd
8 changed files with 290 additions and 0 deletions

45
.github/workflows/semgrep.yml vendored Normal file
View File

@@ -0,0 +1,45 @@
# Finds code problems by structural pattern matching.
#
# New rules can be added to test_root/semgrep/ and they should be picked up
# automatically. See https://semgrep.dev/docs/ for documentation.
name: Semgrep code quality
on:
pull_request:
push:
branches:
- master
jobs:
run_semgrep:
name: Semgrep analysis
runs-on: "${{ matrix.os }}"
strategy:
matrix:
os: [ "ubuntu-20.04" ]
python-version: [ "3.8" ]
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 1
- uses: actions/setup-python@v4
with:
python-version: "${{ matrix.python-version }}"
- name: Install semgrep
run: |
make pre-requirements
pip-sync requirements/edx/semgrep.txt
- name: Run semgrep
env:
# Peg this to some reasonable value so that semgrep's rewrapping
# of messages doesn't break up lines in an unpredictable manner:
# https://github.com/returntocorp/semgrep/issues/8608
COLUMNS: 80
run: |
semgrep scan --config test_root/semgrep/ --error --quiet \
-- lms cms common openedx

View File

@@ -102,6 +102,7 @@ REQ_FILES = \
requirements/edx/testing \
requirements/edx/development \
requirements/edx/assets \
requirements/edx/semgrep \
scripts/xblock/requirements
define COMMON_CONSTRAINTS_TEMP_COMMENT

View File

@@ -6,6 +6,7 @@ import logging
from celery import shared_task
from celery_utils.logged_task import LoggedTask
from edx_django_utils.monitoring import set_code_owner_attribute
from .data import CLIPBOARD_PURPOSE
from .models import StagedContent
@@ -14,6 +15,7 @@ log = logging.getLogger(__name__)
@shared_task(base=LoggedTask)
@set_code_owner_attribute
def delete_expired_clipboards(staged_content_ids: list[int]):
"""
A Celery task to delete StagedContent clipboard entries that are no longer

View File

@@ -9,6 +9,7 @@ from celery import shared_task
from celery_utils.logged_task import LoggedTask
from django.conf import settings
from django.contrib.auth import get_user_model
from edx_django_utils.monitoring import set_code_owner_attribute
from opaque_keys.edx.keys import CourseKey, UsageKey
from openedx_tagging.core.tagging.models import Taxonomy
@@ -73,6 +74,7 @@ def _delete_tags(content_object: CourseKey | UsageKey) -> None:
@shared_task(base=LoggedTask)
@set_code_owner_attribute
def update_course_tags(course_key_str: str) -> bool:
"""
Updates the automatically-managed tags for a course
@@ -98,6 +100,7 @@ def update_course_tags(course_key_str: str) -> bool:
@shared_task(base=LoggedTask)
@set_code_owner_attribute
def delete_course_tags(course_key_str: str) -> bool:
"""
Delete the tags for a Course (when the course itself has been deleted).
@@ -119,6 +122,7 @@ def delete_course_tags(course_key_str: str) -> bool:
@shared_task(base=LoggedTask)
@set_code_owner_attribute
def update_xblock_tags(usage_key_str: str) -> bool:
"""
Updates the automatically-managed tags for a XBlock
@@ -149,6 +153,7 @@ def update_xblock_tags(usage_key_str: str) -> bool:
@shared_task(base=LoggedTask)
@set_code_owner_attribute
def delete_xblock_tags(usage_key_str: str) -> bool:
"""
Delete the tags for a XBlock (when the XBlock itself is deleted).

View File

@@ -0,0 +1,13 @@
# Requirements to run Semgrep code quality checks
#
# DON'T JUST ADD NEW DEPENDENCIES!!!
#
# If you open a pull request that adds a new dependency, you should:
# * verify that the dependency has a license compatible with AGPLv3
# * confirm that it has no system requirements beyond what we already install
# * run "make upgrade" to update the detailed requirements files
#
-c ../constraints.txt
semgrep # Semgrep performs structural code searches

View File

@@ -0,0 +1,99 @@
#
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# make upgrade
#
attrs==23.1.0
# via
# glom
# jsonschema
# referencing
# semgrep
boltons==21.0.0
# via
# face
# glom
# semgrep
bracex==2.3.post1
# via wcmatch
certifi==2023.7.22
# via requests
charset-normalizer==2.0.12
# via
# -c requirements/edx/../constraints.txt
# requests
click==8.1.6
# via
# -c requirements/edx/../constraints.txt
# click-option-group
# semgrep
click-option-group==0.5.6
# via semgrep
colorama==0.4.6
# via semgrep
defusedxml==0.7.1
# via semgrep
face==22.0.0
# via glom
glom==22.1.0
# via semgrep
idna==3.4
# via requests
importlib-resources==6.0.1
# via
# jsonschema
# jsonschema-specifications
jsonschema==4.19.0
# via semgrep
jsonschema-specifications==2023.7.1
# via jsonschema
markdown-it-py==3.0.0
# via rich
mdurl==0.1.2
# via markdown-it-py
packaging==23.1
# via semgrep
peewee==3.16.3
# via semgrep
pkgutil-resolve-name==1.3.10
# via jsonschema
pygments==2.16.1
# via rich
python-lsp-jsonrpc==1.0.0
# via semgrep
referencing==0.30.2
# via
# jsonschema
# jsonschema-specifications
requests==2.31.0
# via semgrep
rich==13.5.2
# via semgrep
rpds-py==0.10.0
# via
# jsonschema
# referencing
ruamel-yaml==0.17.32
# via semgrep
ruamel-yaml-clib==0.2.7
# via ruamel-yaml
semgrep==1.38.0
# via -r requirements/edx/semgrep.in
tomli==2.0.1
# via semgrep
typing-extensions==4.7.1
# via
# rich
# semgrep
ujson==5.8.0
# via python-lsp-jsonrpc
urllib3==1.26.16
# via
# -c requirements/edx/../constraints.txt
# requests
# semgrep
wcmatch==8.5
# via semgrep
zipp==3.16.2
# via importlib-resources

View File

@@ -0,0 +1,21 @@
Semgrep linters
###############
Linting rules for use with `semgrep`_ during CI checks on PRs.
Status
******
This is an experimental approach to developing new linting rules. Semgrep provides by-example structural matching that can be easier to write and maintain than procedural code inspecting ASTs. If the approach works out, we can expand our use of Semgrep; if it becomes a problem for some reason, we can switch to adding pylint rules in edx-lint.
Ignoring failures
*****************
If you need to tell semgrep to ignore a block of code, put a ``# nosemgrep`` comment on or before the first matched line.
Documentation for writing new rules:
- https://semgrep.dev/docs/writing-rules/rule-syntax/
- https://semgrep.dev/docs/writing-rules/pattern-syntax/
.. _semgrep: https://github.com/returntocorp/semgrep

View File

@@ -0,0 +1,104 @@
rules:
- id: celery-missing-code-owner-function
# We can't link directly to the howto doc in question because
# semgrep has a bug around long lines:
# https://github.com/returntocorp/semgrep/issues/8608
#
# Here's the intended URL, for reference:
# https://edx.readthedocs.io/projects/edx-django-utils/en/latest/monitoring/how_tos/add_code_owner_custom_attribute_to_an_ida.html#handling-celery-tasks
message: |
Celery tasks need to be decorated with `@set_code_owner_attribute`
(from the `edx_django_utils.monitoring` module) in order for us
to correctly track code-owners for errors and in other monitoring.
For more information, see the Celery section of "Add Code_Owner
Custom Attributes to an IDA" in the Monitoring How-Tos of
<https://edx.readthedocs.io/projects/edx-django-utils>.
languages:
- python
patterns:
# Find functions with decorators containing the substring "task"
# in their name. This might end up with false positives, but
# there are a lot of variations on how we decorate Celery tasks.
# This pattern should match all decorators, whether or not
# they're called as a function (both `@foo(...)` and `@foo`)
# and whether or not there are other decorators above or below.
- pattern-either:
- pattern: |
@$TASK
def $F(...):
...
- pattern: |
@$TASK(...)
def $F(...):
...
# Restrict the decorators of interest to just ones with "task"
# in the name.
- metavariable-pattern:
metavariable: $TASK
patterns:
- pattern-regex: >-
[^\(]*task(\(|$)
# Filter out all of the properly annotated functions, leaving
# just the ones of interest.
- pattern-not: |
@set_code_owner_attribute
def $F(...):
...
# This is an alternative approach that we have needed in rare cases.
- pattern-not: |
def $F(...):
...
set_code_owner_attribute_from_module(...)
severity: WARNING
# This is like celery-missing-code-owner-function but for the `run`
# method of Task classes.
- id: celery-missing-code-owner-class
message: |
Celery task classes need to decorate their `run` method with
`@set_code_owner_attribute` (imported from `edx_django_utils.monitoring`)
in order for us to correctly track code-owners for errors and in other
monitoring. Alternatively, the `run` method can call
`set_code_owner_attribute_from_module`.
For more information, see the Celery section of "Add Code_Owner
Custom Attributes to an IDA" in the Monitoring How-Tos of
<https://edx.readthedocs.io/projects/edx-django-utils>.
languages:
- python
patterns:
- pattern: |
class $C(..., $SUPER, ...):
def run(...):
...
- metavariable-pattern:
metavariable: $SUPER
patterns:
- pattern-regex: "Task$"
- pattern-not: |
class $C(..., $SUPER, ...):
@set_code_owner_attribute
def run(...):
...
- pattern-not: |
class $C(..., $SUPER, ...):
@set_code_owner_attribute
def run(...):
...
- pattern-not: |
class $C(..., $SUPER, ...):
def run(...):
...
set_code_owner_attribute_from_module(...)
severity: WARNING