diff --git a/common/djangoapps/util/log_sensitive.py b/common/djangoapps/util/log_sensitive.py new file mode 100644 index 0000000000..8874b7f518 --- /dev/null +++ b/common/djangoapps/util/log_sensitive.py @@ -0,0 +1,199 @@ +""" +Utilities for logging sensitive debug information such as authentication tokens. + +Usage: + +1. Generate keys using ``python3 -m common.djangoapps.util.log_sensitive gen-keys`` +2. Follow the instructions it prints out, and pay close attention to the warning + at the end of the output +3. When logging sensitive information, use like so:: + + logger.info( + "Received invalid auth token %s in Authorization header", + encrypt_for_log(token, settings.) + ) + + This will log a message like:: + + Received invalid auth token [encrypted: ZXI...fFo=|IYS...1KA==] in Authorization header + +4. If you need to decrypt one of these messages, save the encrypted portion + to file, retrieve the securely held private key, and run + ``python3 -m common.djangoapps.util.log_sensitive decrypt --help`` + for instructions. +""" + +from base64 import b64decode, b64encode + +import click +from nacl.public import Box, PrivateKey, PublicKey + +# Background: +# +# The NaCl "Box" construction provides asymmetric encryption, allowing +# the sender to encrypt something for a recipient without having a +# shared secret. This encryption is authenticated, meaning that the +# recipient verifies that the message matches the sender's public key +# (proof of sender). But it's also *repudiable* authentication; the +# design allows both the sender and the receiver to read (or have +# created!) the encrypted message, so the receipient can't prove to +# anyone *else* that the sender was the author. +# +# Why we use ephemeral sender keys: +# +# The Box is normally an ideal construction to use for +# communications. However, we don't want the logger to be able to read +# the messages it writes, especially not messages from a different +# server instance or from days or weeks ago. Only developers (or +# others) in possession of the recipient keypair should be able to +# read it, not anyone who compromises a server at some later +# date. Luckily, we also don't care about authenticating the logged +# messages as truly being from the server! The solution is for each +# server to create a fresh public/private keypair at startup and to +# include a copy of the public key in any encrypted logs it writes. + + +# Generate an ephemeral private key for the logger to use during this +# logging session. +logger_private_key = PrivateKey.generate() + + +def encrypt_for_log(message, reader_public_key_b64): + """ + Encrypt a message so that it can be logged using the given public key, + but only read by someone possessing the matching private key. The + public key is provided in base64. + + A separate keypair should be used for each recipient or purpose. + + Returns a string "|" wrapped in + some framing text "[encrypted: ...]"; the inner string can be + decrypted with decrypt_log_message. + """ + reader_public_key = PublicKey(b64decode(reader_public_key_b64)) + + encrypted = Box(logger_private_key, reader_public_key).encrypt(message.encode()) + + pubkey = logger_private_key.public_key + combined = b64encode(bytes(pubkey)).decode() + '|' + b64encode(encrypted).decode() + # The goal of this framing text is to make it always clear in log + # messages that the information is encrypted + return f"[encrypted: {combined}]" + + +def decrypt_log_message(encrypted_message, reader_private_key_b64): + """ + Decrypt a message using the private key that has been stored somewhere + secure and *not* on the server. + """ + reader_private_key = PrivateKey(b64decode(reader_private_key_b64)) + sender_public_key_data, encrypted_raw = \ + [b64decode(part) for part in encrypted_message.split('|', 1)] + return Box(reader_private_key, PublicKey(sender_public_key_data)).decrypt(encrypted_raw).decode() + + +def generate_reader_keys(): + """ + Utility method for generating a public/private keypair for use with these + logging functions. + """ + reader_private_key = PrivateKey.generate() + return { + 'public': b64encode(bytes(reader_private_key.public_key)).decode(), + 'private': b64encode(bytes(reader_private_key)).decode(), + } + + +@click.group() +def cli(): + pass + + +@click.command('gen-keys', help="Generate keypair") +def cli_gen_keys(): + """ + Generate and print a keypair for handling sensitive log messages. + """ + reader_keys = generate_reader_keys() + public_64 = reader_keys['public'] + private_64 = reader_keys['private'] + print( + "This is your PUBLIC key, which should be included in the server's " + "configuration. Create a separate setting (and keypair) for each " + "distinct project or team. This value does not need special protection:" + "\n\n" + f" settings. = \"{public_64}\"" + "\n\n" + "This is your PRIVATE key, which must never be present on the server " + "and should instead be kept encrypted in a separate, safe place " + "such as a password manager:" + "\n\n" + f" \"{private_64}\" (private)" + "\n\n" + "WARNING: Before logging anything sensitive, get a legal/compliance review to " + "ensure this is acceptable in your organization. Encryption is not " + "generally a replacement for retention policies or other privacy " + "safeguards; using this utility does not automatically make sensitive " + "information safe to handle." + ) + + +@click.command('decrypt', help="""Decrypt a logged message. + +If possible, use bash process indirection to keep the private key from +touching disk or shell history unencrypted. The safest way is to keep +the private key in an encrypted file: + + --private-key-file <(gpg2 --decrypt auth-logging-key.enc) + +Alternatively, you could copy it from a password manager to your +clipboard and use a CLI clipboard tool to retrieve it: + +\b + --private-key-file <(xsel -bo) # Linux + --private-key-file <(pbpaste) # Mac + +Another option is to somehow get the private key into an environment +variable and echo it out: + + --private-key-file <(echo "$PRIVATE_KEY") + +The same techniques can also be used for the encrypted message data, +which is less sensitive but should also be handled with care. +""") +@click.option( + '--private-key-file', type=click.File('r'), required=True, + help="Path to file containing reader's private key in Base64", +) +@click.option( + '--message-file', type=click.File('r'), required=True, + help="Path to file containing encrypted message, or - for stdin", +) +def cli_decrypt(private_key_file, message_file): + """ + Decrypt a message and print it to stdout. + """ + print(decrypt_log_message(message_file.read(), private_key_file.read())) + + +@click.command('encrypt', help="Encrypt a one-off message (for testing)") +@click.option('--public-key', help="Reader's public key, in Base64") +@click.option( + '--message-file', type=click.File('r'), required=True, + help="Path to file containing message to encrypt, or - for stdin", +) +def cli_encrypt(public_key, message_file): + """ + Encrypt a message to the provided public key and print it to stdout. + + This is just intended for use when testing or experimenting with the decrypt command. + """ + print(encrypt_for_log(message_file.read(), public_key)) + + +cli.add_command(cli_gen_keys) +cli.add_command(cli_decrypt) +cli.add_command(cli_encrypt) + +if __name__ == '__main__': + cli() diff --git a/common/djangoapps/util/tests/test_log_sensitive.py b/common/djangoapps/util/tests/test_log_sensitive.py new file mode 100644 index 0000000000..a13305c09a --- /dev/null +++ b/common/djangoapps/util/tests/test_log_sensitive.py @@ -0,0 +1,28 @@ +""" +Tests for util.logging +""" + +import re +from common.djangoapps.util.log_sensitive import decrypt_log_message, encrypt_for_log, generate_reader_keys + + +def test_encryption_round_trip(): + reader_keys = generate_reader_keys() + reader_public_64 = reader_keys['public'] + reader_private_64 = reader_keys['private'] + + to_log = encrypt_for_log("Testing testing 1234", reader_public_64) + re_base64 = r'[a-zA-Z0-9/+=]' + assert re.fullmatch(f'\\[encrypted: {re_base64}+\\|{re_base64}+\\]', to_log) + + to_decrypt = to_log.partition('[encrypted: ')[2].rstrip(']') + + decrypted = decrypt_log_message(to_decrypt, reader_private_64) + assert decrypted == "Testing testing 1234" + + # Also check that decryption still works if someone accidentally + # copies in the trailing framing "]" character, just as a + # nice-to-have. (base64 module should handle this already, since + # it stops reading at the first invalid base64 character.) + decrypted_again = decrypt_log_message(to_decrypt + ']', reader_private_64) + assert decrypted_again == "Testing testing 1234" diff --git a/requirements/edx/base.in b/requirements/edx/base.in index 9cd76900d8..18eee89990 100644 --- a/requirements/edx/base.in +++ b/requirements/edx/base.in @@ -133,6 +133,7 @@ pyjwkest PyJWT>=1.6.3 pylti1p3 # Required by content_libraries core library to suport LTI 1.3 launches pymongo # MongoDB driver +PyNaCl # User-friendly cryptography (wrapper and bindings for libsodium) pynliner # Inlines CSS styles into HTML for email notifications python-dateutil python-Levenshtein diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 6a4a120b53..78feab62c7 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -109,10 +109,12 @@ certifi==2021.10.8 # py2neo # requests cffi==1.15.0 - # via cryptography + # via + # cryptography + # pynacl chardet==4.0.0 # via pysrt -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 # via # -r requirements/edx/paver.txt # aiohttp @@ -166,7 +168,7 @@ deprecated==1.2.13 # via # jwcrypto # redis -django==3.2.10 +django==3.2.11 # via # -c requirements/edx/../common_constraints.txt # -c requirements/edx/../constraints.txt @@ -784,6 +786,8 @@ pymongo==3.10.1 # event-tracking # mongodbproxy # mongoengine +pynacl==1.4.0 + # via -r requirements/edx/base.in pynliner==0.8.0 # via -r requirements/edx/base.in pyparsing==3.0.6 @@ -943,6 +947,7 @@ six==1.16.0 # paver # py2neo # pyjwkest + # pynacl # python-dateutil # python-memcached # python-swiftclient diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 5ae5dd20c2..760e45e1ea 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -149,11 +149,12 @@ cffi==1.15.0 # via # -r requirements/edx/testing.txt # cryptography + # pynacl chardet==4.0.0 # via # -r requirements/edx/testing.txt # pysrt -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 # via # -r requirements/edx/testing.txt # aiohttp @@ -244,7 +245,7 @@ distlib==0.3.4 # via # -r requirements/edx/testing.txt # virtualenv -django==3.2.10 +django==3.2.11 # via # -c requirements/edx/../common_constraints.txt # -c requirements/edx/../constraints.txt @@ -1086,6 +1087,8 @@ pymongo==3.10.1 # event-tracking # mongodbproxy # mongoengine +pynacl==1.4.0 + # via -r requirements/edx/testing.txt pynliner==0.8.0 # via -r requirements/edx/testing.txt pyparsing==3.0.6 @@ -1318,6 +1321,7 @@ six==1.16.0 # paver # py2neo # pyjwkest + # pynacl # python-dateutil # python-memcached # python-swiftclient diff --git a/requirements/edx/django.txt b/requirements/edx/django.txt index b0be5a81cf..c93648df5e 100644 --- a/requirements/edx/django.txt +++ b/requirements/edx/django.txt @@ -1 +1 @@ -django==3.2.10 +django==3.2.11 diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index b85d7f1c5b..a898118d04 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -10,7 +10,7 @@ babel==2.9.1 # via sphinx certifi==2021.10.8 # via requests -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 # via requests click==7.1.2 # via diff --git a/requirements/edx/paver.txt b/requirements/edx/paver.txt index d1c32bf4b6..05218feea4 100644 --- a/requirements/edx/paver.txt +++ b/requirements/edx/paver.txt @@ -6,7 +6,7 @@ # certifi==2021.10.8 # via requests -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 # via requests edx-opaque-keys==2.2.2 # via -r requirements/edx/paver.in diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index f45e949598..7ed9f9f334 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -144,11 +144,12 @@ cffi==1.15.0 # via # -r requirements/edx/base.txt # cryptography + # pynacl chardet==4.0.0 # via # -r requirements/edx/base.txt # pysrt -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 # via # -r requirements/edx/base.txt # aiohttp @@ -1021,6 +1022,8 @@ pymongo==3.10.1 # event-tracking # mongodbproxy # mongoengine +pynacl==1.4.0 + # via -r requirements/edx/base.txt pynliner==0.8.0 # via -r requirements/edx/base.txt pyparsing==3.0.6 @@ -1244,6 +1247,7 @@ six==1.16.0 # paver # py2neo # pyjwkest + # pynacl # python-dateutil # python-memcached # python-swiftclient diff --git a/scripts/xblock/requirements.txt b/scripts/xblock/requirements.txt index 5e204ed6a5..313f9c6a55 100644 --- a/scripts/xblock/requirements.txt +++ b/scripts/xblock/requirements.txt @@ -6,7 +6,7 @@ # certifi==2021.10.8 # via requests -charset-normalizer==2.0.9 +charset-normalizer==2.0.10 # via requests idna==3.3 # via requests