From 148b90358f71057c4bebe64264360e46550d5daf Mon Sep 17 00:00:00 2001
From: Cory Lee <cory@edx.org>
Date: Wed, 22 Jul 2020 17:45:23 -0400
Subject: [PATCH] Add Dockerfile (#23088)

* Add Dockerfile.
* Add gunicorn config files for local development.
* Add .dockerignore file.

Co-authored-by: Joseph Mulloy <jmulloy@edx.org>
Co-authored-by: Fred Smith <derf@edx.org>
Co-authored-by: Adam Blackwell <ablackwell@edx.org>
Co-authored-by: Kyle McCormick <kmccormick@edx.org>
Co-authored-by: Nadeem Shahzad <nshahzad@edx.org>
---
 .dockerignore                        | 154 +++++++++++++++++++++++++++
 .github/workflows/docker-publish.yml |  22 ++++
 Dockerfile                           |  92 ++++++++++++++++
 Makefile                             |  19 ++++
 cms/docker_cms_gunicorn.py           |  50 +++++++++
 lms/docker_lms_gunicorn.py           |  50 +++++++++
 6 files changed, 387 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 .github/workflows/docker-publish.yml
 create mode 100644 Dockerfile
 create mode 100644 cms/docker_cms_gunicorn.py
 create mode 100644 lms/docker_lms_gunicorn.py

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000..6804bcfcb2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,154 @@
+# .dockerignore for edx-platform.
+# There's a lot here, please try to keep it organized.
+
+### Files that are not needed in the docker file
+
+/test_root/
+.git
+Dockerfile
+
+### Files private to developers
+
+# Files that should be git-ignored, but are hand-edited or otherwise valued,
+# and so should not be destroyed by "make clean".
+# start-noclean
+requirements/private.txt
+requirements/edx/private.in
+requirements/edx/private.txt
+lms/envs/private.py
+cms/envs/private.py
+# end-noclean
+
+### Python artifacts
+*.pyc
+
+### Editor and IDE artifacts
+*~
+*.swp
+*.orig
+/nbproject
+.idea/
+.redcar/
+codekit-config.json
+.pycharm_helpers/
+/_mac/*
+/IntelliLang.xml
+/conda_packages.xml
+/databaseSettings.xml
+/diff.xml
+/debugger.xml
+/editor.xml
+/ide.general.xml
+/inspection/Default.xml
+/other.xml
+/packages.xml
+/web-browsers.xml
+
+### NFS artifacts
+.nfs*
+
+### OS X artifacts
+*.DS_Store
+.AppleDouble
+:2e_*
+:2e#
+
+### Internationalization artifacts
+*.mo
+*.po
+*.prob
+*.dup
+!django.po
+!django.mo
+!djangojs.po
+!djangojs.mo
+conf/locale/en/LC_MESSAGES/*.mo
+conf/locale/fake*/LC_MESSAGES/*.po
+conf/locale/fake*/LC_MESSAGES/*.mo
+# this was a mistake in i18n_tools, now fixed.
+conf/locale/messages.mo
+
+### Testing artifacts
+.testids/
+.noseids
+nosetests.xml
+.cache/
+.coverage
+.coverage.*
+coverage.xml
+cover/
+cover_html/
+reports/
+jscover.log
+jscover.log.*
+.pytest_cache/
+pytest_task*.txt
+.tddium*
+common/test/data/test_unicode/static/
+test_root/courses/
+test_root/data/test_bare.git/
+test_root/export_course_repos/
+test_root/paver_logs/
+test_root/uploads/
+django-pyfs
+.tox/
+common/test/db_cache/bok_choy_*.yaml
+common/test/data/badges/*.png
+
+### Installation artifacts
+*.egg-info
+.pip_download_cache/
+.prereqs_cache
+.vagrant/
+node_modules
+bin/
+
+### Static assets pipeline artifacts
+*.scssc
+lms/static/css/
+lms/static/certificates/css/
+cms/static/css/
+common/static/common/js/vendor/
+common/static/common/css/vendor/
+common/static/bundles
+webpack-stats.json
+
+### Styling generated from templates
+lms/static/sass/*.css
+lms/static/sass/*.css.map
+lms/static/certificates/sass/*.css
+lms/static/themed_sass/
+cms/static/css/
+cms/static/sass/*.css
+cms/static/sass/*.css.map
+cms/static/themed_sass/
+themes/**/css
+
+### Logging artifacts
+log/
+logs
+chromedriver.log
+ghostdriver.log
+
+### Celery artifacts ###
+celerybeat-schedule
+
+### Unknown artifacts
+database.sqlite
+courseware/static/js/mathjax/*
+flushdb.sh
+build
+/src/
+\#*\#
+.env/
+openedx/core/djangoapps/django_comment_common/comment_client/python
+autodeploy.properties
+.ws_migrations_complete
+dist
+*.bak
+
+# Visual Studio Code
+.vscode
+
+# Locally generated PII reports
+pii_report
\ No newline at end of file
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
new file mode 100644
index 0000000000..e7d9e0c1fa
--- /dev/null
+++ b/.github/workflows/docker-publish.yml
@@ -0,0 +1,22 @@
+name: Push Docker Images
+
+on:
+  push:
+    branches:
+      - master
+jobs:
+  # Push image to GitHub Packages.
+  # See also https://docs.docker.com/docker-hub/builds/
+  push:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push'
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Build and Push docker image
+        env:
+          DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }}
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+        run : make docker_push
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000..d42c1ca4d7
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,92 @@
+FROM ubuntu:xenial as base
+
+# Install system requirements
+RUN apt update && \
+    # Global requirements
+    DEBIAN_FRONTEND=noninteractive apt install -y \
+    build-essential \
+    curl \
+    # If we don't need gcc, we should remove it.
+    g++ \
+    gcc \
+    git \
+    git-core \
+    language-pack-en \
+    libfreetype6-dev \
+    libmysqlclient-dev \
+    libssl-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libxslt1-dev \
+    software-properties-common \
+    swig \
+    # openedx requirements
+    gettext \
+    gfortran \
+    graphviz \
+    libffi-dev \
+    libfreetype6-dev \
+    libgeos-dev \
+    libgraphviz-dev \
+    libjpeg8-dev \
+    liblapack-dev \
+    libpng-dev \
+    libsqlite3-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libxslt1-dev \
+    ntp \
+    pkg-config \
+    python3-dev \
+    python3-pip \
+    python3.5 \
+    -qy && rm -rf /var/lib/apt/lists/*
+
+RUN locale-gen en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US:en
+ENV LC_ALL en_US.UTF-8
+
+RUN ln -s /usr/bin/pip3 /usr/bin/pip
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+WORKDIR /edx/app/edx-platform/edx-platform
+
+COPY . /edx/app/edx-platform/edx-platform
+
+ENV PATH /edx/app/edx-platform/nodeenv/bin:${PATH}
+ENV PATH ./node_modules/.bin:${PATH}
+ENV CONFIG_ROOT /edx/etc/
+ENV PATH /edx/app/edx-platform/edx-platform/bin:${PATH}
+ENV SETTINGS production
+
+# TODO: Install requirements before copying in code.
+RUN pip install setuptools==39.0.1 pip==9.0.3
+RUN pip install -r requirements/edx/base.txt
+
+RUN nodeenv /edx/app/edx-platform/nodeenv --node=8.9.3 --prebuilt
+
+RUN npm set progress=false \
+    && npm install
+
+RUN mkdir -p /edx/etc/
+
+EXPOSE 18000
+
+FROM base as lms
+ENV SERVICE_VARIANT lms
+ENV LMS_CFG /edx/etc/lms.yaml
+CMD gunicorn -c /edx/app/edx-platform/edx-platform/lms/docker_lms_gunicorn.py --name lms --bind=0.0.0.0:18000 --max-requests=1000 --access-logfile - lms.wsgi:application
+
+FROM lms as lms-newrelic
+RUN pip install newrelic
+CMD newrelic-admin run-program gunicorn -c /edx/app/edx-platform/edx-platform/lms/docker_lms_gunicorn.py --name lms --bind=0.0.0.0:8000 --max-requests=1000 --access-logfile - lms.wsgi:application
+
+FROM base as cms
+ENV SERVICE_VARIANT cms
+ENV STUDIO_CFG /edx/etc/studio.yaml
+CMD gunicorn -c /edx/app/edx-platform/edx-platform/cms/docker_cms_gunicorn.py --name cms --bind=0.0.0.0:8000 --max-requests=1000 --access-logfile - cms.wsgi:application
+
+FROM cms as cms-newrelic
+RUN pip install newrelic
+CMD newrelic-admin run-program gunicorn -c /edx/app/edx-platform/edx-platform/cms/docker_cms_gunicorn.py --name cms --bind=0.0.0.0:8000 --max-requests=1000 --access-logfile - cms.wsgi:application
diff --git a/Makefile b/Makefile
index d07f2b5fb1..b711b6c59e 100644
--- a/Makefile
+++ b/Makefile
@@ -100,3 +100,22 @@ upgrade: ## update the pip requirements files to use the latest releases satisfy
 	grep -e "^django==" requirements/edx/base.txt > requirements/edx/django.txt
 	sed '/^[dD]jango==/d' requirements/edx/testing.txt > requirements/edx/testing.tmp
 	mv requirements/edx/testing.tmp requirements/edx/testing.txt
+
+# These make targets currently only build LMS images.
+docker_build:
+	docker build . -f Dockerfile --target lms -t openedx/edx-platform
+	docker build . -f Dockerfile --target lms-newrelic -t openedx/edx-platform:latest-newrelic
+
+docker_tag: docker_build
+	docker tag openedx/edx-platform openedx/edx-platform:${GITHUB_SHA}
+	docker tag openedx/edx-platform:latest-newrelic openedx/edx-platform:${GITHUB_SHA}-newrelic
+
+docker_auth:
+	echo "$$DOCKERHUB_PASSWORD" | docker login -u "$$DOCKERHUB_USERNAME" --password-stdin
+
+docker_push: docker_tag docker_auth ## push to docker hub
+	docker push 'openedx/edx-platform:latest'
+	docker push "openedx/edx-platform:${GITHUB_SHA}"
+	docker push 'openedx/edx-platform:latest-newrelic'
+	docker push "openedx/edx-platform:${GITHUB_SHA}-newrelic"
+
diff --git a/cms/docker_cms_gunicorn.py b/cms/docker_cms_gunicorn.py
new file mode 100644
index 0000000000..c95d06c3a0
--- /dev/null
+++ b/cms/docker_cms_gunicorn.py
@@ -0,0 +1,50 @@
+"""
+gunicorn configuration file: http://docs.gunicorn.org/en/stable/configure.html
+
+This file is created and updated by ansible, edit at your peril
+"""
+
+preload_app = False
+timeout = 300
+bind = "127.0.0.1:8010"
+pythonpath = "/edx/app/edxapp/edx-platform"
+max_requests = 50
+workers = 7
+
+
+def pre_request(worker, req):
+    worker.log.info("%s %s" % (req.method, req.path))
+
+
+def close_all_caches():
+    """
+    Close the cache so that newly forked workers cannot accidentally share
+    the socket with the processes they were forked from. This prevents a race
+    condition in which one worker could get a cache response intended for
+    another worker.
+    We do this in a way that is safe for 1.4 and 1.8 while we still have some
+    1.4 installations.
+    """
+    from django.conf import settings
+    from django.core import cache as django_cache
+    if hasattr(django_cache, 'caches'):
+        get_cache = django_cache.caches.__getitem__
+    else:
+        get_cache = django_cache.get_cache  # pylint: disable=no-member
+    for cache_name in settings.CACHES:
+        cache = get_cache(cache_name)
+        if hasattr(cache, 'close'):
+            cache.close()
+
+    # The 1.4 global default cache object needs to be closed also: 1.4
+    # doesn't ensure you get the same object when requesting the same
+    # cache. The global default is a separate Python object from the cache
+    # you get with get_cache("default"), so it will have its own connection
+    # that needs to be closed.
+    cache = django_cache.cache
+    if hasattr(cache, 'close'):
+        cache.close()
+
+
+def post_fork(_server, _worker):
+    close_all_caches()
diff --git a/lms/docker_lms_gunicorn.py b/lms/docker_lms_gunicorn.py
new file mode 100644
index 0000000000..aa7a41321f
--- /dev/null
+++ b/lms/docker_lms_gunicorn.py
@@ -0,0 +1,50 @@
+"""
+gunicorn configuration file: http://docs.gunicorn.org/en/stable/configure.html
+
+This file is created and updated by ansible, edit at your peril
+"""
+
+preload_app = False
+timeout = 300
+bind = "127.0.0.1:8000"
+pythonpath = "/edx/app/edxapp/edx-platform"
+max_requests = 50
+workers = 17
+
+
+def pre_request(worker, req):
+    worker.log.info("%s %s" % (req.method, req.path))
+
+
+def close_all_caches():
+    """
+    Close the cache so that newly forked workers cannot accidentally share
+    the socket with the processes they were forked from. This prevents a race
+    condition in which one worker could get a cache response intended for
+    another worker.
+    We do this in a way that is safe for 1.4 and 1.8 while we still have some
+    1.4 installations.
+    """
+    from django.conf import settings
+    from django.core import cache as django_cache
+    if hasattr(django_cache, 'caches'):
+        get_cache = django_cache.caches.__getitem__
+    else:
+        get_cache = django_cache.get_cache  # pylint: disable=no-member
+    for cache_name in settings.CACHES:
+        cache = get_cache(cache_name)
+        if hasattr(cache, 'close'):
+            cache.close()
+
+    # The 1.4 global default cache object needs to be closed also: 1.4
+    # doesn't ensure you get the same object when requesting the same
+    # cache. The global default is a separate Python object from the cache
+    # you get with get_cache("default"), so it will have its own connection
+    # that needs to be closed.
+    cache = django_cache.cache
+    if hasattr(cache, 'close'):
+        cache.close()
+
+
+def post_fork(_server, _worker):
+    close_all_caches()