diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 40046f4a8..93723aec4 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -12,6 +12,17 @@ post_error_fails_task: true post_timeout_secs: 1800 # 5 minutes functions: + "start csfle servers": + - command: ec2.assume_role + params: + role_arn: ${aws_test_secrets_role} + - command: subprocess.exec + params: + binary: bash + include_expansions_in_env: ["AWS_SECRET_ACCESS_KEY", "AWS_ACCESS_KEY_ID", "AWS_SESSION_TOKEN"] + args: + - ${DRIVERS_TOOLS}/.evergreen/csfle/setup.sh + "setup": - command: git.get_project params: @@ -44,9 +55,20 @@ functions: params: binary: bash working_dir: "src" - include_expansions_in_env: ["DRIVERS_TOOLS", "MONGODB_URI"] + include_expansions_in_env: ["DRIVERS_TOOLS", "MONGODB_URI", "DJANGO_SETTINGS_MODULE", "CRYPT_SHARED_LIB_PATH"] + args: + - ./.evergreen/run-tests.sh + + "run encryption tests": + - command: subprocess.exec + type: test + params: + binary: bash + working_dir: "src" + include_expansions_in_env: ["DRIVERS_TOOLS", "MONGODB_URI", "DJANGO_SETTINGS_MODULE", "CRYPT_SHARED_LIB_PATH"] args: - ./.evergreen/run-tests.sh + - encryption "teardown": - command: subprocess.exec @@ -54,9 +76,11 @@ functions: binary: bash args: - ${DRIVERS_TOOLS}/.evergreen/teardown.sh + - ${DRIVERS_TOOLS}/.evergreen/csfle/teardown.sh pre: - func: setup + - func: start csfle servers - func: bootstrap mongo-orchestration post: @@ -67,6 +91,10 @@ tasks: commands: - func: "run unit tests" + - name: run-encryption-tests + commands: + - func: "run encryption tests" + buildvariants: - name: tests-7-noauth-nossl display_name: Run Tests 7.0 NoAuth NoSSL @@ -111,3 +139,23 @@ buildvariants: SSL: "ssl" tasks: - name: run-tests + + - name: tests-8-qe-local + display_name: Run Tests 8.2 QE local KMS + run_on: rhel87-small + expansions: + MONGODB_VERSION: "8.2" + TOPOLOGY: replica_set + DJANGO_SETTINGS_MODULE: "local_kms_encrypted_settings" + tasks: + - name: run-encryption-tests + + - name: tests-8-qe-aws + display_name: Run Tests 8.2 QE aws KMS + run_on: rhel87-small + expansions: + MONGODB_VERSION: "8.2" + TOPOLOGY: replica_set + DJANGO_SETTINGS_MODULE: "aws_kms_encrypted_settings" + tasks: + - name: run-encryption-tests diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index 46f02be16..7ebf4ece4 100644 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -2,11 +2,22 @@ set -eux -# Install django-mongodb-backend +# Export secrets as environment variables +if [[ "${1:-}" == "encryption" ]]; then + . ../secrets-export.sh +fi + +# Set up virtual environment /opt/python/3.12/bin/python3 -m venv venv . venv/bin/activate python -m pip install -U pip -pip install -e . + +# Conditionally install encryption extra if "encryption" arg is passed +if [[ "${1:-}" == "encryption" ]]; then + pip install -e '.[encryption]' +else + pip install -e . +fi # Install django and test dependencies git clone --branch mongodb-6.0.x https://github.com/mongodb-forks/django django_repo diff --git a/.evergreen/setup.sh b/.evergreen/setup.sh index 4709ed9bd..f1d552f47 100644 --- a/.evergreen/setup.sh +++ b/.evergreen/setup.sh @@ -16,8 +16,8 @@ DRIVERS_TOOLS="$(dirname "$(pwd)")/drivers-tools" PROJECT_DIRECTORY="$(pwd)" if [ "Windows_NT" = "${OS:-}" ]; then - DRIVERS_TOOLS=$(cygpath -m $DRIVERS_TOOLS) - PROJECT_DIRECTORY=$(cygpath -m $PROJECT_DIRECTORY) + DRIVERS_TOOLS=$(cygpath -m "$DRIVERS_TOOLS") + PROJECT_DIRECTORY=$(cygpath -m "$PROJECT_DIRECTORY") fi export PROJECT_DIRECTORY export DRIVERS_TOOLS @@ -37,8 +37,8 @@ PROJECT_DIRECTORY: "$PROJECT_DIRECTORY" EOT # Set up drivers-tools with a .env file. -git clone https://github.com/mongodb-labs/drivers-evergreen-tools.git ${DRIVERS_TOOLS} -cat < ${DRIVERS_TOOLS}/.env +git clone https://github.com/mongodb-labs/drivers-evergreen-tools.git "${DRIVERS_TOOLS}" +cat < "${DRIVERS_TOOLS}/.env" CURRENT_VERSION="$CURRENT_VERSION" DRIVERS_TOOLS="$DRIVERS_TOOLS" MONGO_ORCHESTRATION_HOME="$MONGO_ORCHESTRATION_HOME" diff --git a/.github/workflows/aws_kms_encrypted_settings.py b/.github/workflows/aws_kms_encrypted_settings.py new file mode 100644 index 000000000..58f1fa230 --- /dev/null +++ b/.github/workflows/aws_kms_encrypted_settings.py @@ -0,0 +1,26 @@ +from local_kms_encrypted_settings import * # noqa: F403 + +DATABASES["encrypted"] = { # noqa: F405 + "ENGINE": "django_mongodb_backend", + "NAME": "djangotests_encrypted", + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( # noqa: F405 + key_vault_namespace="djangotests_encrypted.__keyVault", + kms_providers={ + "aws": { + "accessKeyId": os.environ.get("FLE_AWS_KEY"), # noqa: F405 + "secretAccessKey": os.environ.get("FLE_AWS_SECRET"), # noqa: F405 + } + }, + crypt_shared_lib_path=os.environ["CRYPT_SHARED_LIB_PATH"], # noqa: F405 + crypt_shared_lib_required=True, + ), + "directConnection": True, + }, + "KMS_CREDENTIALS": { + "aws": { + "key": "arn:aws:kms:us-east-1:579766882180:key/89fcc2c4-08b0-4bd9-9f25-e30687b580d0", + "region": "us-east-1", + } + }, +} diff --git a/.github/workflows/local_kms_encrypted_settings.py b/.github/workflows/local_kms_encrypted_settings.py new file mode 100644 index 000000000..a0d25075e --- /dev/null +++ b/.github/workflows/local_kms_encrypted_settings.py @@ -0,0 +1,44 @@ +# Settings for django_mongodb_backend/tests when encryption is supported. +import os +from pathlib import Path + +from mongodb_settings import * # noqa: F403 +from pymongo.encryption import AutoEncryptionOpts + +os.environ["LD_LIBRARY_PATH"] = str(Path(os.environ["CRYPT_SHARED_LIB_PATH"]).parent) + +DATABASES["encrypted"] = { # noqa: F405 + "ENGINE": "django_mongodb_backend", + "NAME": "djangotests_encrypted", + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + key_vault_namespace="djangotests_encrypted.__keyVault", + kms_providers={"local": {"key": os.urandom(96)}}, + crypt_shared_lib_path=os.environ["CRYPT_SHARED_LIB_PATH"], + ), + "directConnection": True, + }, + "KMS_CREDENTIALS": {"local": {}}, +} + + +class EncryptedRouter: + def db_for_read(self, model, **hints): + if model._meta.app_label == "encryption_": + return "encrypted" + return None + + db_for_write = db_for_read + + def allow_migrate(self, db, app_label, model_name=None, **hints): + # The encryption_ app's models are only created in the encrypted + # database. + if app_label == "encryption_": + return db == "encrypted" + # Don't create other app's models in the encrypted database. + if db == "encrypted": + return False + return None + + +DATABASE_ROUTERS.append(EncryptedRouter()) # noqa: F405 diff --git a/.github/workflows/mongodb_settings.py b/.github/workflows/mongodb_settings.py index 4dce3c0d5..3cb1db18a 100644 --- a/.github/workflows/mongodb_settings.py +++ b/.github/workflows/mongodb_settings.py @@ -1,4 +1,5 @@ # Settings for django_mongodb_backend/tests. from django_settings import * # noqa: F403 +DATABASES["encrypted"] = {} # noqa: F405 DATABASE_ROUTERS = ["django_mongodb_backend.routers.MongoRouter"] diff --git a/.github/workflows/runtests.py b/.github/workflows/runtests.py index cc258f363..3775c422b 100755 --- a/.github/workflows/runtests.py +++ b/.github/workflows/runtests.py @@ -6,151 +6,6 @@ from django.core.exceptions import ImproperlyConfigured test_apps = [ - "admin_changelist", - "admin_checks", - "admin_custom_urls", - "admin_docs", - "admin_filters", - "admin_inlines", - "admin_ordering", - "admin_scripts", - "admin_utils", - "admin_views", - "admin_widgets", - "aggregation", - "aggregation_regress", - "annotations", - "apps", - "async", - "auth_tests", - "backends", - "basic", - "bulk_create", - "cache", - "check_framework", - "constraints", - "contenttypes_tests", - "context_processors", - "custom_columns", - "custom_lookups", - "custom_managers", - "custom_pk", - "datatypes", - "dates", - "datetimes", - "db_functions", - "defer", - "defer_regress", - "delete", - "delete_regress", - "empty", - "empty_models", - "expressions", - "expressions_case", - "field_defaults", - "file_storage", - "file_uploads", - "fixtures", - "fixtures_model_package", - "fixtures_regress", - "flatpages_tests", - "force_insert_update", - "foreign_object", - "forms_tests", - "from_db_value", - "generic_inline_admin", - "generic_relations", - "generic_relations_regress", - "generic_views", - "get_earliest_or_latest", - "get_object_or_404", - "get_or_create", - "i18n", - "indexes", - "inline_formsets", - "introspection", - "invalid_models_tests", - "known_related_objects", - "lookup", - "m2m_and_m2o", - "m2m_intermediary", - "m2m_multiple", - "m2m_recursive", - "m2m_regress", - "m2m_signals", - "m2m_through", - "m2m_through_regress", - "m2o_recursive", - "managers_regress", - "many_to_many", - "many_to_one", - "many_to_one_null", - "max_lengths", - "messages_tests", - "migrate_signals", - "migration_test_data_persistence", - "migrations", - "model_fields", - "model_forms", - "model_formsets", - "model_formsets_regress", - "model_indexes", - "model_inheritance", - "model_inheritance_regress", - "model_options", - "model_package", - "model_regress", - "model_utils", - "modeladmin", - "multiple_database", - "mutually_referential", - "nested_foreign_keys", - "null_fk", - "null_fk_ordering", - "null_queries", - "one_to_one", - "or_lookups", - "order_with_respect_to", - "ordering", - "pagination", - "prefetch_related", - "proxy_model_inheritance", - "proxy_models", - "queries", - "queryset_pickle", - "redirects_tests", - "reserved_names", - "reverse_lookup", - "save_delete_hooks", - "schema", - "select_for_update", - "select_related", - "select_related_onetoone", - "select_related_regress", - "serializers", - "servers", - "sessions_tests", - "shortcuts", - "signals", - "sitemaps_tests", - "sites_framework", - "sites_tests", - "string_lookup", - "swappable_models", - "syndication_tests", - "test_client", - "test_client_regress", - "test_runner", - "test_utils", - "timezones", - "transactions", - "unmanaged_models", - "update", - "update_only_fields", - "user_commands", - "validation", - "view_tests", - "xor_lookups", # Add directories in django_mongodb_backend/tests *sorted( [ diff --git a/.github/workflows/test-python-atlas.yml b/.github/workflows/test-python-atlas.yml index bbda0f9f4..ebcc0b0e5 100644 --- a/.github/workflows/test-python-atlas.yml +++ b/.github/workflows/test-python-atlas.yml @@ -28,7 +28,7 @@ jobs: - name: install django-mongodb-backend run: | pip3 install --upgrade pip - pip3 install -e . + pip3 install -e .[encryption] - name: Checkout Django uses: actions/checkout@v6 with: @@ -51,8 +51,15 @@ jobs: run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - name: Start local Atlas working-directory: . - run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:7 + run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:8.0.15 + - name: Download crypt shared + run: | + wget https://downloads.mongodb.com/linux/mongo_crypt_shared_v1-linux-x86_64-enterprise-ubuntu2404-8.2.1.tgz + tar -xvzf mongo_crypt_shared_v1-linux-x86_64-enterprise-ubuntu2404-8.2.1.tgz lib/mongo_crypt_v1.so - name: Run tests run: python3 django_repo/tests/runtests_.py permissions: contents: read + env: + DJANGO_SETTINGS_MODULE: "local_kms_encrypted_settings" + CRYPT_SHARED_LIB_PATH: "${{ github.workspace }}/lib/mongo_crypt_v1.so" diff --git a/django_mongodb_backend/base.py b/django_mongodb_backend/base.py index 88c2a1189..b1afc1b03 100644 --- a/django_mongodb_backend/base.py +++ b/django_mongodb_backend/base.py @@ -11,6 +11,7 @@ from django.utils.functional import cached_property from pymongo.collection import Collection from pymongo.driver_info import DriverInfo +from pymongo.encryption import ClientEncryption from pymongo.mongo_client import MongoClient from pymongo.uri_parser import parse_uri @@ -241,6 +242,16 @@ def get_database(self): return OperationDebugWrapper(self) return self.database + @cached_property + def client_encryption(self): + auto_encryption_opts = self.connection._options.auto_encryption_opts + return ClientEncryption( + auto_encryption_opts._kms_providers, + auto_encryption_opts._key_vault_namespace, + self.connection, + self.connection.codec_options, + ) + @cached_property def database(self): """Connect to the database the first time it's accessed.""" diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index 8a5073dd7..1caba6e8f 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -38,6 +38,7 @@ def __init__(self, *args, **kwargs): self.subqueries = [] # Atlas search stage. self.search_pipeline = [] + self.wrap_for_global_aggregation = False def _get_group_alias_column(self, expr, annotation_group_idx): """Generate a dummy field for use in the ids fields in $group.""" @@ -234,21 +235,8 @@ def _build_aggregation_pipeline(self, ids, group): """Build the aggregation pipeline for grouping.""" pipeline = [] if not ids: - group["_id"] = None - pipeline.append({"$facet": {"group": [{"$group": group}]}}) - pipeline.append( - { - "$addFields": { - key: { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": key, - } - } - for key in group - } - } - ) + pipeline.append({"$group": {"_id": None, **group}}) + self.wrap_for_global_aggregation = True else: group["_id"] = ids pipeline.append({"$group": group}) diff --git a/django_mongodb_backend/creation.py b/django_mongodb_backend/creation.py index c8002b2c4..a1d45277e 100644 --- a/django_mongodb_backend/creation.py +++ b/django_mongodb_backend/creation.py @@ -1,5 +1,5 @@ from django.conf import settings -from django.db.backends.base.creation import BaseDatabaseCreation +from django.db.backends.base.creation import TEST_DATABASE_PREFIX, BaseDatabaseCreation class DatabaseCreation(BaseDatabaseCreation): @@ -7,6 +7,14 @@ def _execute_create_test_db(self, cursor, parameters, keepdb=False): # Close the connection (which may point to the non-test database) so # that a new connection to the test database can be established later. self.connection.close_pool() + # Use a test _key_vault_namespace. This assumes the key vault database + # is the same as the encrypted database so that _destroy_test_db() can + # reset the collection by dropping it. + opts = self.connection.settings_dict["OPTIONS"].get("auto_encryption_opts") + if opts: + self.connection.settings_dict["OPTIONS"][ + "auto_encryption_opts" + ]._key_vault_namespace = TEST_DATABASE_PREFIX + opts._key_vault_namespace if not keepdb: self._destroy_test_db(parameters["dbname"], verbosity=0) @@ -24,3 +32,9 @@ def destroy_test_db(self, old_database_name=None, verbosity=1, keepdb=False, suf super().destroy_test_db(old_database_name, verbosity, keepdb, suffix) # Close the connection to the test database. self.connection.close_pool() + # Restore the original _key_vault_namespace. + opts = self.connection.settings_dict["OPTIONS"].get("auto_encryption_opts") + if opts: + self.connection.settings_dict["OPTIONS"][ + "auto_encryption_opts" + ]._key_vault_namespace = opts._key_vault_namespace[len(TEST_DATABASE_PREFIX) :] diff --git a/django_mongodb_backend/features.py b/django_mongodb_backend/features.py index e5f0b2cf2..b6d5e3ec1 100644 --- a/django_mongodb_backend/features.py +++ b/django_mongodb_backend/features.py @@ -597,9 +597,21 @@ def django_test_skips(self): skips.update(self._django_test_skips) return skips + @cached_property + def mongodb_version(self): + return self.connection.get_database_version() # e.g., (6, 3, 0) + @cached_property def is_mongodb_6_3(self): - return self.connection.get_database_version() >= (6, 3) + return self.mongodb_version >= (6, 3) + + @cached_property + def is_mongodb_7_0(self): + return self.mongodb_version >= (7, 0) + + @cached_property + def is_mongodb_8_0(self): + return self.mongodb_version >= (8, 0) @cached_property def supports_atlas_search(self): @@ -629,3 +641,22 @@ def _supports_transactions(self): hello = client.command("hello") # a replica set or a sharded cluster return "setName" in hello or hello.get("msg") == "isdbgrid" + + @cached_property + def supports_queryable_encryption(self): + """ + For testing purposes, Queryable Encryption requires a MongoDB 8.0 or + later replica set or sharded cluster, as well as MongoDB Atlas or + Enterprise. This flag must not guard any non-test functionality since + it would prevent MongoDB 7.0 from being used, which also supports + Queryable Encryption. The models in tests/encryption_ aren't compatible + with MongoDB 7.0 because {"queryType": "range"} being "rangePreview". + """ + self.connection.ensure_connection() + build_info = self.connection.connection.admin.command("buildInfo") + is_enterprise = "enterprise" in build_info.get("modules") + return ( + (is_enterprise or self.supports_atlas_search) + and self._supports_transactions + and self.is_mongodb_8_0 + ) diff --git a/django_mongodb_backend/fields/__init__.py b/django_mongodb_backend/fields/__init__.py index 0c95afd69..6cc4bcc18 100644 --- a/django_mongodb_backend/fields/__init__.py +++ b/django_mongodb_backend/fields/__init__.py @@ -3,6 +3,33 @@ from .duration import register_duration_field from .embedded_model import EmbeddedModelField from .embedded_model_array import EmbeddedModelArrayField +from .encryption import ( + EncryptedArrayField, + EncryptedBigIntegerField, + EncryptedBinaryField, + EncryptedBooleanField, + EncryptedCharField, + EncryptedDateField, + EncryptedDateTimeField, + EncryptedDecimalField, + EncryptedDurationField, + EncryptedEmailField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedFieldMixin, + EncryptedFloatField, + EncryptedGenericIPAddressField, + EncryptedIntegerField, + EncryptedObjectIdField, + EncryptedPositiveBigIntegerField, + EncryptedPositiveIntegerField, + EncryptedPositiveSmallIntegerField, + EncryptedSmallIntegerField, + EncryptedTextField, + EncryptedTimeField, + EncryptedURLField, + EncryptedUUIDField, +) from .json import register_json_field from .objectid import ObjectIdField from .polymorphic_embedded_model import PolymorphicEmbeddedModelField @@ -12,6 +39,31 @@ "ArrayField", "EmbeddedModelArrayField", "EmbeddedModelField", + "EncryptedArrayField", + "EncryptedBigIntegerField", + "EncryptedBinaryField", + "EncryptedBooleanField", + "EncryptedCharField", + "EncryptedDateField", + "EncryptedDateTimeField", + "EncryptedDecimalField", + "EncryptedDurationField", + "EncryptedEmailField", + "EncryptedEmbeddedModelArrayField", + "EncryptedEmbeddedModelField", + "EncryptedFieldMixin", + "EncryptedFloatField", + "EncryptedGenericIPAddressField", + "EncryptedIntegerField", + "EncryptedObjectIdField", + "EncryptedPositiveBigIntegerField", + "EncryptedPositiveIntegerField", + "EncryptedPositiveSmallIntegerField", + "EncryptedSmallIntegerField", + "EncryptedTextField", + "EncryptedTimeField", + "EncryptedURLField", + "EncryptedUUIDField", "ObjectIdAutoField", "ObjectIdField", "PolymorphicEmbeddedModelArrayField", diff --git a/django_mongodb_backend/fields/encryption.py b/django_mongodb_backend/fields/encryption.py new file mode 100644 index 000000000..3ced82769 --- /dev/null +++ b/django_mongodb_backend/fields/encryption.py @@ -0,0 +1,139 @@ +from django.db import models + +from django_mongodb_backend.fields import ArrayField, EmbeddedModelArrayField, EmbeddedModelField +from django_mongodb_backend.fields.objectid import ObjectIdField + + +class EncryptedFieldMixin: + encrypted = True + + def __init__(self, *args, queries=None, db_index=False, null=False, unique=False, **kwargs): + if db_index: + raise ValueError("'db_index=True' is not supported on encrypted fields.") + if null: + raise ValueError("'null=True' is not supported on encrypted fields.") + if unique: + raise ValueError("'unique=True' is not supported on encrypted fields.") + self.queries = queries + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + + if self.queries is not None: + kwargs["queries"] = self.queries + + if path.startswith("django_mongodb_backend.fields.encryption"): + path = path.replace( + "django_mongodb_backend.fields.encryption", + "django_mongodb_backend.fields", + ) + + return name, path, args, kwargs + + +class NoQueriesMixin: + def __init__(self, *args, **kwargs): + if "queries" in kwargs: + raise ValueError(f"{self.__class__.__name__} does not support the queries argument.") + super().__init__(*args, **kwargs) + + +# Django fields +class EncryptedBinaryField(EncryptedFieldMixin, models.BinaryField): + pass + + +class EncryptedBigIntegerField(EncryptedFieldMixin, models.BigIntegerField): + pass + + +class EncryptedBooleanField(EncryptedFieldMixin, models.BooleanField): + pass + + +class EncryptedCharField(EncryptedFieldMixin, models.CharField): + pass + + +class EncryptedDateField(EncryptedFieldMixin, models.DateField): + pass + + +class EncryptedDateTimeField(EncryptedFieldMixin, models.DateTimeField): + pass + + +class EncryptedDecimalField(EncryptedFieldMixin, models.DecimalField): + pass + + +class EncryptedDurationField(EncryptedFieldMixin, models.DurationField): + pass + + +class EncryptedEmailField(EncryptedFieldMixin, models.EmailField): + pass + + +class EncryptedFloatField(EncryptedFieldMixin, models.FloatField): + pass + + +class EncryptedGenericIPAddressField(EncryptedFieldMixin, models.GenericIPAddressField): + pass + + +class EncryptedIntegerField(EncryptedFieldMixin, models.IntegerField): + pass + + +class EncryptedPositiveBigIntegerField(EncryptedFieldMixin, models.PositiveBigIntegerField): + pass + + +class EncryptedPositiveIntegerField(EncryptedFieldMixin, models.PositiveIntegerField): + pass + + +class EncryptedPositiveSmallIntegerField(EncryptedFieldMixin, models.PositiveSmallIntegerField): + pass + + +class EncryptedSmallIntegerField(EncryptedFieldMixin, models.SmallIntegerField): + pass + + +class EncryptedTextField(EncryptedFieldMixin, models.TextField): + pass + + +class EncryptedTimeField(EncryptedFieldMixin, models.TimeField): + pass + + +class EncryptedURLField(EncryptedFieldMixin, models.URLField): + pass + + +class EncryptedUUIDField(EncryptedFieldMixin, models.UUIDField): + pass + + +# MongoDB fields +class EncryptedArrayField(NoQueriesMixin, EncryptedFieldMixin, ArrayField): + pass + + +class EncryptedEmbeddedModelArrayField( + NoQueriesMixin, EncryptedFieldMixin, EmbeddedModelArrayField +): + pass + + +class EncryptedEmbeddedModelField(NoQueriesMixin, EncryptedFieldMixin, EmbeddedModelField): + pass + + +class EncryptedObjectIdField(EncryptedFieldMixin, ObjectIdField): + pass diff --git a/django_mongodb_backend/lookups.py b/django_mongodb_backend/lookups.py index 6b59fb961..7d3229825 100644 --- a/django_mongodb_backend/lookups.py +++ b/django_mongodb_backend/lookups.py @@ -4,6 +4,8 @@ BuiltinLookup, FieldGetDbPrepValueIterableMixin, IsNull, + LessThan, + LessThanOrEqual, Lookup, PatternLookup, UUIDTextMixin, @@ -101,6 +103,26 @@ def is_null_path(self, compiler, connection): return connection.mongo_operators["isnull"](lhs_mql, self.rhs) +def less_than_path(self, compiler, connection): + lhs_mql = process_lhs(self, compiler, connection) + value = process_rhs(self, compiler, connection) + # Encrypted fields don't support null and Automatic Encryption cannot + # handle it ("csfle "analyze_query" failed: typenull type isn't supported + # for the range encrypted index.), so omit the null check. + if getattr(self.lhs.output_field, "encrypted", False): + return {lhs_mql: {"$lt": value}} + return connection.mongo_operators[self.lookup_name](lhs_mql, value) + + +def less_than_or_equal_path(self, compiler, connection): + lhs_mql = process_lhs(self, compiler, connection) + value = process_rhs(self, compiler, connection) + # Same comment as less_than_path. + if getattr(self.lhs.output_field, "encrypted", False): + return {lhs_mql: {"$lte": value}} + return connection.mongo_operators[self.lookup_name](lhs_mql, value) + + # from https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC4 REGEX_MATCH_ESCAPE_CHARS = ( ("\\", r"\\"), # general escape character @@ -157,6 +179,8 @@ def register_lookups(): In.get_subquery_wrapping_pipeline = get_subquery_wrapping_pipeline IsNull.as_mql_expr = is_null_expr IsNull.as_mql_path = is_null_path + LessThan.as_mql_path = less_than_path + LessThanOrEqual.as_mql_path = less_than_or_equal_path Lookup.can_use_path = lookup_can_use_path PatternLookup.prep_lookup_value_mongo = pattern_lookup_prep_lookup_value UUIDTextMixin.as_mql = uuid_text_mixin diff --git a/django_mongodb_backend/management/commands/showencryptedfieldsmap.py b/django_mongodb_backend/management/commands/showencryptedfieldsmap.py new file mode 100644 index 000000000..017fabde5 --- /dev/null +++ b/django_mongodb_backend/management/commands/showencryptedfieldsmap.py @@ -0,0 +1,35 @@ +from bson import json_util +from django.apps import apps +from django.core.management.base import BaseCommand +from django.db import DEFAULT_DB_ALIAS, connections, router + +from django_mongodb_backend.utils import model_has_encrypted_fields + + +class Command(BaseCommand): + help = """ + Shows the mapping of encrypted fields to field attributes, including data + type, data keys and query types. The output can be used to set + ``encrypted_fields_map`` in ``AutoEncryptionOpts``. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--database", + default=DEFAULT_DB_ALIAS, + help=""" + Specifies the database to use. Defaults to ``default``.""", + ) + + def handle(self, *args, **options): + db = options["database"] + connection = connections[db] + connection.ensure_connection() + encrypted_fields_map = {} + with connection.schema_editor() as editor: + for app_config in apps.get_app_configs(): + for model in router.get_migratable_models(app_config, db): + if model_has_encrypted_fields(model): + fields = editor._get_encrypted_fields(model, create_data_keys=False) + encrypted_fields_map[model._meta.db_table] = fields + self.stdout.write(json_util.dumps(encrypted_fields_map, indent=4)) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 5b4f0ec51..4bbe982cb 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -25,7 +25,7 @@ def wrapper(*args, **kwargs): except DuplicateKeyError as e: raise IntegrityError from e except PyMongoError as e: - raise DatabaseError from e + raise DatabaseError(str(e)) from e return wrapper @@ -56,6 +56,7 @@ def __init__(self, compiler): # $lookup stage that encapsulates the pipeline for performing a nested # subquery. self.subquery_lookup = None + self.wrap_for_global_aggregation = compiler.wrap_for_global_aggregation def __repr__(self): return f"" @@ -91,6 +92,22 @@ def get_pipeline(self): pipeline.append({"$match": self.match_mql}) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) + if self.wrap_for_global_aggregation: + pipeline = [ + {"$collStats": {}}, + { + "$lookup": { + "from": self.compiler.collection_name, + "as": "wrapped", + "pipeline": pipeline, + } + }, + { + "$replaceWith": { + "$cond": [{"$eq": ["$wrapped", []]}, {}, {"$first": "$wrapped"}] + } + }, + ] if self.project_fields: pipeline.append({"$project": self.project_fields}) if self.combinator_pipeline: diff --git a/django_mongodb_backend/routers.py b/django_mongodb_backend/routers.py index 60e54bbd8..4b75efbe8 100644 --- a/django_mongodb_backend/routers.py +++ b/django_mongodb_backend/routers.py @@ -1,7 +1,5 @@ from django.apps import apps -from django_mongodb_backend.models import EmbeddedModel - class MongoRouter: def allow_migrate(self, db, app_label, model_name=None, **hints): @@ -9,6 +7,8 @@ def allow_migrate(self, db, app_label, model_name=None, **hints): EmbeddedModels don't have their own collection and must be ignored by dumpdata. """ + from django_mongodb_backend.models import EmbeddedModel # noqa: PLC0415 + if not model_name: return None try: diff --git a/django_mongodb_backend/schema.py b/django_mongodb_backend/schema.py index 9bcaecc63..17d836f87 100644 --- a/django_mongodb_backend/schema.py +++ b/django_mongodb_backend/schema.py @@ -1,5 +1,7 @@ from time import monotonic, sleep +from django.core.exceptions import ImproperlyConfigured +from django.db import NotSupportedError from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.models import Index, UniqueConstraint from pymongo.operations import SearchIndexModel @@ -9,7 +11,7 @@ from .fields import EmbeddedModelField from .gis.schema import GISSchemaEditor from .query import wrap_database_errors -from .utils import OperationCollector +from .utils import OperationCollector, model_has_encrypted_fields def ignore_embedded_models(func): @@ -44,7 +46,7 @@ def get_database(self): @wrap_database_errors @ignore_embedded_models def create_model(self, model): - self.get_database().create_collection(model._meta.db_table) + self._create_collection(model) self._create_model_indexes(model) # Make implicit M2M tables. for field in model._meta.local_many_to_many: @@ -452,6 +454,113 @@ def wait_until_index_dropped(collection, index_name, timeout=60, interval=0.5): sleep(interval) raise TimeoutError(f"Index {index_name} not dropped after {timeout} seconds.") + def _create_collection(self, model): + """ + Create a collection for the model. + If the model has encrypted fields, build (or retrieve) the encrypted_fields schema. + """ + db = self.get_database() + db_table = model._meta.db_table + + if model_has_encrypted_fields(model): + # Encrypted path + client = self.connection.connection + auto_encryption_opts = getattr(client._options, "auto_encryption_opts", None) + if not auto_encryption_opts: + raise ImproperlyConfigured( + f"Tried to create model {model._meta.label} in " + f"'{self.connection.alias}' database. The model has " + "encrypted fields but " + f"DATABASES['{self.connection.alias}']['OPTIONS'] is " + 'missing the "auto_encryption_opts" parameter. If the ' + "model should not be created in this database, adjust " + "your database routers." + ) + encrypted_fields = self._get_encrypted_fields(model) + db.create_collection(db_table, encryptedFields=encrypted_fields) + else: + # Unencrypted path + db.create_collection(db_table) + + def _get_encrypted_fields( + self, model, *, key_alt_name_prefix=None, path_prefix=None, create_data_keys=True + ): + """ + Return the encrypted fields map for the given model. The "prefix" + arguments are used when this method is called recursively on embedded + models. + """ + connection = self.connection + client = connection.connection + key_alt_name_prefix = key_alt_name_prefix or model._meta.db_table + path_prefix = path_prefix or "" + auto_encryption_opts = client._options.auto_encryption_opts + _, key_vault_collection = auto_encryption_opts._key_vault_namespace.split(".", 1) + key_vault = self.get_collection(key_vault_collection) + # Create partial unique index on keyAltNames. + # TODO: find a better place for this. It only needs to run once for an + # application's lifetime. + key_vault.create_index( + "keyAltNames", unique=True, partialFilterExpression={"keyAltNames": {"$exists": True}} + ) + # Select the KMS provider. + kms_providers = auto_encryption_opts._kms_providers + if len(kms_providers) == 1: + # If one provider is configured, no need to consult the router. + kms_provider = next(iter(kms_providers.keys())) + else: # (Since PyMongo requires at least one KMS provider.) + raise NotSupportedError( + "Multiple KMS providers per database aren't supported. " + "Please create a feature request with details about your " + "use case." + ) + if kms_provider == "local": + master_key = None + else: + master_key = connection.settings_dict["KMS_CREDENTIALS"][kms_provider] + # Generate the encrypted fields map. + field_list = [] + for field in model._meta.fields: + key_alt_name = f"{key_alt_name_prefix}.{field.column}" + path = f"{path_prefix}.{field.column}" if path_prefix else field.column + # Check non-encrypted EmbeddedModelFields for encrypted fields. + if isinstance(field, EmbeddedModelField) and not getattr(field, "encrypted", False): + embedded_result = self._get_encrypted_fields( + field.embedded_model, + key_alt_name_prefix=key_alt_name, + path_prefix=path, + create_data_keys=create_data_keys, + ) + # An EmbeddedModelField may not have any encrypted fields. + if embedded_result: + field_list.extend(embedded_result["fields"]) + # Populate data for encrypted field. + elif getattr(field, "encrypted", False): + if create_data_keys: + data_key = connection.client_encryption.create_data_key( + kms_provider=kms_provider, + key_alt_names=[key_alt_name], + master_key=master_key, + ) + else: + data_key = key_vault.find_one({"keyAltNames": key_alt_name}) + if data_key: + data_key = data_key["_id"] + else: + raise ImproperlyConfigured( + f"Encryption key {key_alt_name} not found. Have " + f"migrated the {model} model?" + ) + field_dict = { + "bsonType": field.db_type(connection), + "path": path, + "keyId": data_key, + } + if queries := getattr(field, "queries", None): + field_dict["queries"] = queries + field_list.append(field_dict) + return {"fields": field_list} + # GISSchemaEditor extends some SchemaEditor methods. class DatabaseSchemaEditor(GISSchemaEditor, BaseSchemaEditor): diff --git a/django_mongodb_backend/utils.py b/django_mongodb_backend/utils.py index 8c68bf442..0dcd2ff44 100644 --- a/django_mongodb_backend/utils.py +++ b/django_mongodb_backend/utils.py @@ -71,6 +71,7 @@ class OperationDebugWrapper: "create_indexes", "create_search_index", "drop", + "find_one", "index_information", "insert_many", "delete_many", @@ -146,3 +147,21 @@ def wrapper(self, *args, **kwargs): self.log(method, args, kwargs) return wrapper + + +def model_has_encrypted_fields(model): + """ + Recursively check if this model or any embedded models contain encrypted fields. + Returns True if encryption is found anywhere in the hierarchy. + """ + from django_mongodb_backend.fields import EmbeddedModelField # noqa: PLC0415 + + # Recursively check embedded models. + return any( + getattr(field, "encrypted", False) + or ( + isinstance(field, EmbeddedModelField) + and model_has_encrypted_fields(field.embedded_model) + ) + for field in model._meta.fields + ) diff --git a/docs/howto/index.rst b/docs/howto/index.rst index 95d7ef632..8451960ef 100644 --- a/docs/howto/index.rst +++ b/docs/howto/index.rst @@ -11,3 +11,4 @@ Project configuration :maxdepth: 1 contrib-apps + queryable-encryption diff --git a/docs/howto/queryable-encryption.rst b/docs/howto/queryable-encryption.rst new file mode 100644 index 000000000..7027cdd84 --- /dev/null +++ b/docs/howto/queryable-encryption.rst @@ -0,0 +1,408 @@ +================================ +Configuring Queryable Encryption +================================ + +.. versionadded:: 6.0.1 + +:doc:`manual:core/queryable-encryption` is a powerful MongoDB feature that +allows you to encrypt sensitive fields in your database while still supporting +queries on that encrypted data. + +This section will guide you through the process of configuring Queryable +Encryption in your Django project. + +.. admonition:: MongoDB requirements + + Queryable Encryption can be used with MongoDB replica sets or sharded + clusters running version 7.0 or later. Standalone instances are not + supported. The :ref:`manual:qe-compatibility-reference` table summarizes + which MongoDB server products support Queryable Encryption. + +Installation +============ + +In addition to Django MongoDB Backend's regular :doc:`installation +` and :doc:`configuration ` steps, Queryable +Encryption requires installing optional Python dependencies and the +:ref:`manual:csfle-reference-install-shared-lib`. + +To install the optional dependencies, use pip with the ``encryption`` extra: + +.. code-block:: console + + $ pip install 'django-mongodb-backend[encryption]' + +Next, download the :ref:`Automatic Encryption Shared Library +`. You can choose the latest version, +even if it doesn't match your MongoDB server version. After extracting the +archive, configure the :ref:`crypt_shared_lib_path +`. + +.. _qe-configuring-databases-setting: + +Configuring the ``DATABASES`` setting +===================================== + +In addition to the :ref:`database settings ` +required to use Django MongoDB Backend, Queryable Encryption requires +configuring a separate database connection that uses use PyMongo's +:class:`~pymongo.encryption_options.AutoEncryptionOpts`. + +Here's a sample configuration using a local KMS provider:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "default": { + "ENGINE": "django_mongodb_backend", + "HOST": "mongodb+srv://cluster0.example.mongodb.net", + "NAME": "my_database", + # ... + }, + "encrypted": { + "ENGINE": "django_mongodb_backend", + "HOST": "mongodb+srv://cluster0.example.mongodb.net", + "NAME": "encrypted", + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + key_vault_namespace="encrypted.__keyVault", + kms_providers={ + "local": { + # Generated by os.urandom(96) + "key": ( + b'-\xc3\x0c\xe3\x93\xc3\x8b\xc0\xf8\x12\xc5#b' + b'\x19\xf3\xbc\xccR\xc8\xedI\xda\\ \xfb\x9cB' + b'\x7f\xab5\xe7\xb5\xc9x\xb8\xd4d\xba\xdc\x9c' + b'\x9a\xdb9J]\xe6\xce\x104p\x079q.=\xeb\x9dK*' + b'\x97\xea\xf8\x1e\xc3\xd49K\x18\x81\xc3\x1a"' + b'\xdc\x00U\xc4u"X\xe7xy\xa5\xb2\x0e\xbc\xd6+-' + b'\x80\x03\xef\xc2\xc4\x9bU' + ) + }, + }, + crypt_shared_lib_path="/path/to/mongo_crypt_shared_v1", + crypt_shared_lib_required=True, + ) + }, + }, + } + +``key_vault_namespace`` specifies where to store the data encryption keys. +The database name of the key vault must be the same as in ``"NAME"``. The +vault's collection name can be whatever you wish, but by convention, it's often +``__keyVault``. + +.. admonition:: Dynamic library path configuration + + If you encounter the following error: + + .. code-block:: text + + Pymongocrypt.errors.MongoCryptError: An existing crypt_shared library is + loaded by the application at [/path/to/mongo_crypt_v1.so], but the current + call to mongocrypt_init() failed to find that same library. + + add the directory that contains the shared library to your platform’s dynamic + library search path: + + +---------------+------------------------------+ + | **Platform** | **Environment variable** | + +---------------+------------------------------+ + | Windows | PATH | + +---------------+------------------------------+ + | macOS | DYLD_FALLBACK_LIBRARY_PATH | + +---------------+------------------------------+ + | Linux | LD_LIBRARY_PATH | + +---------------+------------------------------+ + + Examples: + + macOS (bash): + + .. code-block:: console + + $ export DYLD_FALLBACK_LIBRARY_PATH="/path/to/mongo_crypt_shared:${DYLD_FALLBACK_LIBRARY_PATH}" + + Linux (bash): + + .. code-block:: console + + $ export LD_LIBRARY_PATH="/path/to/mongo_crypt_shared:${LD_LIBRARY_PATH}" + + Windows (PowerShell): + + .. code-block:: powershell + + $env:Path = "C:\path\to\mongo_crypt_shared" + ";" + $env:Path + + Windows (Command Prompt): + + .. code-block:: bat + + set PATH=C:\path\to\mongo_crypt_shared;%PATH% + + Notes: + + * Set the variable to the directory that contains the shared library file + (for example, ``mongo_crypt_shared_v1.dylib`` on macOS, + ``mongo_crypt_v1.so`` on Linux, or ``mongo_crypt_v1.dll`` on Windows), not + the file itself. + + * This environment variable is separate from the ``crypt_shared_lib_path`` + option: the environment variable points to a directory, while + ``crypt_shared_lib_path`` is the explicit path to the library file. + +.. _qe-configuring-database-routers-setting: + +Configuring the ``DATABASE_ROUTERS`` setting +============================================ + +Similar to configuring the :ref:`DATABASE_ROUTERS +` setting for +:doc:`embedded models `, Queryable Encryption requires +a :setting:`DATABASE_ROUTERS` setting to route database operations to the +encrypted database. + +The following example shows how to configure a router for the ``"myapp"`` +application that routes database operations to the encrypted database for all +models in that application:: + + # myapp/routers.py + class EncryptedRouter: + def allow_migrate(self, db, app_label, model_name=None, **hints): + if app_label == "myapp": + return db == "encrypted" + # Prevent migrations on the encrypted database for other apps + if db == "encrypted": + return False + return None + + def db_for_read(self, model, **hints): + if model._meta.app_label == "myapp": + return "encrypted" + return None + + db_for_write = db_for_read + +Then in your Django settings, add the custom database router to the +:setting:`django:DATABASE_ROUTERS` setting:: + + # settings.py + DATABASE_ROUTERS = [ + "django_mongodb_backend.routers.MongoRouter", + "myapp.routers.EncryptedRouter", + ] + +Encrypted fields +================ + +Now you can start using encrypted fields in your Django models. + +:doc:`Encrypted fields ` may be used to protect +sensitive data like social security numbers, credit card information, or +personal health information. With Queryable Encryption, you can also perform +queries on encrypted fields. To use encrypted fields in your models, +import the necessary field types from ``django_mongodb_backend.models`` and +define your models as usual. + +Here are models based on the `Python Queryable Encryption Tutorial`_:: + + # myapp/models.py + from django.db import models + from django_mongodb_backend.models import EmbeddedModel + from django_mongodb_backend.fields import ( + EmbeddedModelField, + EncryptedCharField, + EncryptedEmbeddedModelField, + ) + + + class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + + class Patient(models.Model): + patient_name = models.CharField(max_length=255) + patient_id = models.BigIntegerField() + patient_record = EmbeddedModelField("PatientRecord") + + def __str__(self): + return f"{self.patient_name} ({self.patient_id})" + + class Billing(EmbeddedModel): + cc_type = models.CharField(max_length=50) + cc_number = models.CharField(max_length=20) + +.. _Python Queryable Encryption Tutorial: https://github.com/mongodb/docs/tree/main/content/manual/manual/source/includes/qe-tutorials/python + +.. _qe-migrations: + +Migrations +========== + +Once you have defined your models, create a migration as usual: + +.. code-block:: console + + $ python manage.py makemigrations + +Then run the migrations with: + +.. code-block:: console + + $ python manage.py migrate --database encrypted + +.. warning:: + + Be aware that you cannot add encrypted fields to existing models, nor can + you change the definition of an encrypted field, for example, to make it + queryable. + +Creating encrypted data +======================= + +Now create and manipulate instances of the data just like any other Django +model data. The fields will automatically handle encryption and decryption, +ensuring that :ref:`sensitive data is stored securely in the database +`. + +Here's an example of creating a new ``Patient`` instance with encrypted fields: + +.. code-block:: pycon + + >>> from myapp.models import Patient, PatientRecord, Billing + >>> billing = Billing(cc_type="Visa", cc_number="4111111111111111") + >>> record = PatientRecord(ssn="123-45-6789", billing=billing, bill_amount=250.75) + >>> patient = Patient(patient_name="John Doe", patient_id=1001, patient_record=record) + >>> patient.save() + +Querying encrypted fields +========================= + +In order to query encrypted fields, you must include the :ref:`queries +` argument. For example, notice ``PatientRecord``\'s +``ssn`` field:: + + class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + +You can perform a equality query just like you would on a non-encrypted field: + +.. code-block:: pycon + + >>> patient = Patient.objects.get(patient_record__ssn="123-45-6789") + >>> patient.patient_name + 'John Doe' + +.. _qe-configuring-kms: + +Configuring the Key Management Service (KMS) +============================================ + +A local KMS provider with a hardcoded key is suitable for local development and +testing, but production environment, you should securely :ref:`store and manage +your encryption keys `. + +To use Queryable Encryption, you must configure a Key Management Service (KMS) +to store and manage the encryption keys used to encrypt and decrypt data. + +There are two primary configuration points: + +#. The ``kms_providers`` parameter of + :class:`~pymongo.encryption_options.AutoEncryptionOpts` (see the + ``kms_providers`` parameter in + :class:`~pymongo.encryption_options.AutoEncryptionOpts` for the available + providers (``aws``, ``azure``, ``gcp``, etc.) and provider options). + +#. The :setting:`KMS_CREDENTIALS ` inner option of + :setting:`DATABASES`. The keys for each provider are documented under the + ``master_key`` parameter of + :meth:`~pymongo.encryption.ClientEncryption.create_data_key`. + +Here's an example of KMS configuration with ``aws``:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + kms_providers={ + "aws": { + "accessKeyId": "your-access-key-id", + "secretAccessKey": "your-secret-access-key", + }, + }, + ), + }, + "KMS_CREDENTIALS": { + "aws": { + "key": "...", # Amazon Resource Name + "region": "...", # AWS region + }, + }, + }, + } + +.. _qe-configuring-encrypted-fields-map: + +Configuring the ``encrypted_fields_map`` option +=============================================== + + +.. admonition:: Required configuration + + As :ref:`described here `, + ensure ``django_mongodb_backend`` is listed in + :setting:`django:INSTALLED_APPS` to enable the + :djadmin:`showencryptedfieldsmap` command. + +Encryption keys are created when you :ref:`run migrations for models that have +encrypted fields `. + +To see the encrypted fields map for your models (which includes the encryption +key IDs), run the :djadmin:`showencryptedfieldsmap` command:: + + $ python manage.py showencryptedfieldsmap --database encrypted + +In a production environment, it's recommended to include this map in your +settings to protect against a malicious server advertising a false encrypted +fields map:: + + from bson import json_util + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + encrypted_fields_map=json_util.loads( + """{ + "encrypt_patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "keyId": { + "$binary": { + "base64": "2MA29LaARIOqymYHGmi2mQ==", + "subType": "04" + } + }, + "queries": { + "queryType": "equality" + } + }, + ] + }}""" + ), + ), + }, + }, + } diff --git a/docs/index.rst b/docs/index.rst index dc2124f85..d332e9822 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,6 +46,7 @@ Models - :doc:`ref/database` - :doc:`ref/contrib/gis` - :doc:`ref/django-admin` +- :doc:`ref/models/encrypted-fields` **Topic guides:** diff --git a/docs/ref/django-admin.rst b/docs/ref/django-admin.rst index a491714cf..567aeaab1 100644 --- a/docs/ref/django-admin.rst +++ b/docs/ref/django-admin.rst @@ -5,6 +5,8 @@ Management commands Django MongoDB Backend includes some :doc:`Django management commands `. +.. _qe-showencryptedfieldsmap-required-configuration: + Required configuration ====================== @@ -13,3 +15,20 @@ in the :setting:`INSTALLED_APPS` setting. Available commands ================== + +``showencryptedfieldsmap`` +-------------------------- + +.. versionadded:: 6.0.1 + +.. django-admin:: showencryptedfieldsmap + + This command generates output for includision in + :class:`~pymongo.encryption_options.AutoEncryptionOpts`\'s + ``encrypted_fields_map`` argument. + + See :ref:`qe-configuring-encrypted-fields-map`. + + .. django-admin-option:: --database DATABASE + + Specifies the database to use. Defaults to ``default``. diff --git a/docs/ref/index.rst b/docs/ref/index.rst index 94a11a2a8..47b27d466 100644 --- a/docs/ref/index.rst +++ b/docs/ref/index.rst @@ -9,5 +9,7 @@ API reference forms contrib/index database + models/encrypted-fields django-admin utils + settings diff --git a/docs/ref/models/encrypted-fields.rst b/docs/ref/models/encrypted-fields.rst new file mode 100644 index 000000000..9a2ab8123 --- /dev/null +++ b/docs/ref/models/encrypted-fields.rst @@ -0,0 +1,169 @@ +================ +Encrypted fields +================ + +.. currentmodule:: django_mongodb_backend.fields + +.. versionadded:: 6.0.1 + +To use encrypted fields, you must :doc:`configure Queryable Encryption +`. + +The following tables detailed which fields have encrypted counterparts. In all +cases, the encrypted field names are simply prefixed with ``Encrypted``, e.g. +``EncryptedCharField``. They are importable from +``django_mongodb_backend.fields``. + +.. csv-table:: ``django.db.models`` + :header: "Model Field", "Encrypted version available?" + + :class:`~django.db.models.BigIntegerField`, Yes + :class:`~django.db.models.BinaryField`, Yes + :class:`~django.db.models.BooleanField`, Yes + :class:`~django.db.models.CharField`, Yes + :class:`~django.db.models.DateField`, Yes + :class:`~django.db.models.DateTimeField`, Yes + :class:`~django.db.models.DecimalField`, Yes + :class:`~django.db.models.DurationField`, Yes + :class:`~django.db.models.EmailField`, Yes + :class:`~django.db.models.FileField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.FilePathField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.GenericIPAddressField`, Yes + :class:`~django.db.models.ImageField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.IntegerField`, Yes + :class:`~django.db.models.JSONField`, No: ``JSONField`` isn't recommended. + :class:`~django.db.models.PositiveIntegerField`, Yes + :class:`~django.db.models.PositiveBigIntegerField`, Yes + :class:`~django.db.models.PositiveSmallIntegerField`, Yes + :class:`~django.db.models.SlugField`, No: it requires a unique index which Queryable Encryption doesn't support. + :class:`~django.db.models.SmallIntegerField`, Yes + :class:`~django.db.models.TimeField`, Yes + :class:`~django.db.models.TextField`, Yes + :class:`~django.db.models.URLField`, Yes + :class:`~django.db.models.UUIDField`, Yes + +.. csv-table:: ``django_mongodb_backend.fields`` + :header: "Model Field", "Encrypted version available?" + + :class:`ArrayField`, Yes + :class:`EmbeddedModelArrayField`, Yes + :class:`EmbeddedModelField`, Yes + :class:`ObjectIdField`, Yes + :class:`PolymorphicEmbeddedModelField`, No: may be implemented in the future. + :class:`PolymorphicEmbeddedModelArrayField`, No: may be implemented in the future. + +.. _encrypted-fields-queries: + +``EncryptedField.queries`` +-------------------------- + +Most encrypted fields* take an optional ``queries`` argument. It's a dictionary +that specifies the type of queries that can be performed on the field, as well +as any query options. + +The :ref:`available query types ` depend +on your version of MongoDB. For example, in MongoDB 8.0, the supported types +are ``equality`` and ``range``. + +The supported lookups for ``equality`` queries are: :lookup:`exact` and +lookup:`in`. The supported operators are AND (``&``) and OR (``|``). + +The supported lookups for ``range`` queries include those of ``equality`` +queries as well as :lookup:`lt`, :lookup:`lte`, :lookup:`gt`, and +:lookup:`gte`. + +\* These fields don't support the ``queries`` argument: + +- ``EncryptedArrayField`` +- ``EncryptedEmbeddedModelArrayField`` +- ``EncryptedEmbeddedModelField`` + +Embedded model encryption +========================= + +There are two ways to encrypt embedded models. You can either encrypt the +entire subdocument, in which case you can't query any the subdocuments fields, +or you can encrypt only selected fields of the subdocument. + +Encrypting the entire subdocument +--------------------------------- + +To encrypt a subdocument, use ``EncryptedEmbeddedModelField`` or +``EncryptedEmbeddedModelArrayField``. In this case, the field's embedded model +cannot have any encrypted fields. + +Encrypting selected fields of a subdocument +------------------------------------------- + +To encrypt only select fields of a subdocument, use :class:`EmbeddedModelField` +and any of the other encrypted fields on the embedded model. + +MongoDB doesn't support encrypting selected fields of +``EmbeddedModelArrayField``. + +Limitations +=========== + +MongoDB imposes some restrictions on encrypted fields: + +* They cannot be indexed. +* They cannot be part of a unique constraint. +* They cannot be null. + +``QuerySet`` limitations +------------------------ + +In addition to :ref:`Django MongoDB Backend's QuerySet limitations +`, some ``QuerySet`` methods aren't +supported on encrypted fields. Each unsupported method is followed by a sample +error message from the database. Depending on the exact query, error messages +may vary. + +- :meth:`~django.db.models.query.QuerySet.order_by`: Cannot add an encrypted + field as a prefix of another encrypted field. +- :meth:`~django.db.models.query.QuerySet.alias`, + :meth:`~django.db.models.query.QuerySet.annotate`, + :meth:`~django.db.models.query.QuerySet.distinct`: Cannot group on field + '' which is encrypted with the random algorithm or whose + encryption properties are not known until runtime. +- :meth:`~django.db.models.query.QuerySet.dates`, + :meth:`~django.db.models.query.QuerySet.datetimes`: If the value type is a + date, the type of the index must also be date (and vice versa). +- :meth:`~django.db.models.query.QuerySet.in_bulk`: Encrypted fields can't have + unique constraints. +- Queries that join multiple collections and require the ``let`` operator. Such + queries usually involve expressions or subqueries: Non-empty 'let' field is + not allowed in the $lookup aggregation stage over an encrypted collection. + +There are also several ``QuerySet`` methods that aren't permitted on any models +(regardless of whether or not they have encrypted fields) that use a database +connection with Automatic Encryption. Each unsupported method is followed by a +sample error message from the database. + +- :meth:`~django.db.models.query.QuerySet.update`: Multi-document updates are + not allowed with Queryable Encryption. +- :meth:`~django.db.models.query.QuerySet.aggregate`: Invalid reference to an + encrypted field within aggregate expression. +- :meth:`~django.db.models.query.QuerySet.union`: Aggregation stage $unionWith + is not allowed or supported with automatic encryption. + +``EncryptedFieldMixin`` +======================= + +.. class:: EncryptedFieldMixin + + .. versionadded:: 6.0.1 + + Use this mixin to create encrypted versions of your own custom fields. For + example, to create an encrypted version of ``MyField``:: + + from django.db import models + from django_mongodb_backend.fields import EncryptedFieldMixin + from myapp.fields import MyField + + + class MyEncryptedField(EncryptedFieldMixin, MyField): + pass + + This adds the :ref:`queries ` argument to the + field. diff --git a/docs/ref/settings.rst b/docs/ref/settings.rst new file mode 100644 index 000000000..6aba8a75a --- /dev/null +++ b/docs/ref/settings.rst @@ -0,0 +1,43 @@ +======== +Settings +======== + +Queryable Encryption +==================== + +The following :setting:`django:DATABASES` inner options support configuration of +Key Management Service (KMS) credentials for Queryable Encryption. + +.. setting:: DATABASE-KMS-CREDENTIALS + +``KMS_CREDENTIALS`` +------------------- + +Default: ``{}`` (empty dictionary) + +A dictionary of Key Management Service (KMS) credential key-value pairs. These +credentials are required to access your KMS provider (such as AWS KMS, Azure Key +Vault, or GCP KMS) for encrypting and decrypting data using Queryable +Encryption. + +For example after :doc:`/howto/queryable-encryption`, to configure AWS KMS, +Azure Key Vault, or GCP KMS credentials, you can set ``KMS_CREDENTIALS`` in +your :setting:`django:DATABASES` settings as follows: + +.. code-block:: python + + DATABASES["encrypted"]["KMS_CREDENTIALS"] = { + "aws": { + "key": os.getenv("AWS_KEY_ARN", ""), + "region": os.getenv("AWS_KEY_REGION", ""), + }, + "azure": { + "key": os.getenv("AZURE_KEY_VAULT_URL", ""), + "client_id": os.getenv("AZURE_CLIENT_ID", ""), + "client_secret": os.getenv("AZURE_CLIENT_SECRET", ""), + }, + "gcp": { + "key": os.getenv("GCP_KEY_NAME", ""), + "project_id": os.getenv("GCP_PROJECT_ID", ""), + }, + } diff --git a/docs/topics/known-issues.rst b/docs/topics/known-issues.rst index 01259b49a..dd28e6fed 100644 --- a/docs/topics/known-issues.rst +++ b/docs/topics/known-issues.rst @@ -26,6 +26,8 @@ Model fields - :class:`~django.db.models.CompositePrimaryKey` - :class:`~django.db.models.GeneratedField` +.. _known-issues-limitations-querying: + Querying ======== diff --git a/pyproject.toml b/pyproject.toml index acecf20fb..0d9f1d981 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ docs = [ "furo>=2025.7.19", "sphinx-copybutton", ] +encryption = ["pymongo[encryption]"] [project.urls] Homepage = "https://www.mongodb.org" diff --git a/tests/backend_/test_features.py b/tests/backend_/test_features.py index 05959fa70..d505c7fab 100644 --- a/tests/backend_/test_features.py +++ b/tests/backend_/test_features.py @@ -44,3 +44,83 @@ def mocked_command(command): with patch("pymongo.synchronous.database.Database.command", wraps=mocked_command): self.assertIs(connection.features._supports_transactions, False) + + +class SupportsQueryableEncryptionTests(TestCase): + def setUp(self): + # Clear the cached property. + connection.features.__dict__.pop("supports_queryable_encryption", None) + # Must initialize the feature before patching it. + connection.features._supports_transactions # noqa: B018 + + def tearDown(self): + del connection.features.supports_queryable_encryption + + @staticmethod + def enterprise_response(command): + if command == "buildInfo": + return {"modules": ["enterprise"]} + raise Exception("Unexpected command") + + @staticmethod + def non_enterprise_response(command): + if command == "buildInfo": + return {"modules": []} + raise Exception("Unexpected command") + + def test_supported_on_atlas(self): + """Supported on MongoDB 8.0+ Atlas replica set or sharded cluster.""" + with ( + patch( + "pymongo.synchronous.database.Database.command", wraps=self.non_enterprise_response + ), + patch("django.db.connection.features.supports_atlas_search", True), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, True) + + def test_supported_on_enterprise(self): + """Supported on MongoDB 8.0+ Enterprise replica set or sharded cluster.""" + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, True) + + def test_atlas_or_enterprise_required(self): + """Not supported on MongoDB Community Edition.""" + with ( + patch( + "pymongo.synchronous.database.Database.command", wraps=self.non_enterprise_response + ), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) + + def test_transactions_required(self): + """ + Not supported if database isn't a replica set or sharded cluster + (i.e. DatabaseFeatures._supports_transactions = False). + """ + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", False), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) + + def test_mongodb_8_0_required(self): + """Not supported on MongoDB < 8.0""" + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", False), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) diff --git a/tests/encryption_/__init__.py b/tests/encryption_/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/encryption_/models.py b/tests/encryption_/models.py new file mode 100644 index 000000000..995e4760c --- /dev/null +++ b/tests/encryption_/models.py @@ -0,0 +1,184 @@ +from django.db import models + +from django_mongodb_backend.fields import ( + EmbeddedModelField, + EncryptedArrayField, + EncryptedBigIntegerField, + EncryptedBinaryField, + EncryptedBooleanField, + EncryptedCharField, + EncryptedDateField, + EncryptedDateTimeField, + EncryptedDecimalField, + EncryptedDurationField, + EncryptedEmailField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedFloatField, + EncryptedGenericIPAddressField, + EncryptedIntegerField, + EncryptedObjectIdField, + EncryptedPositiveBigIntegerField, + EncryptedPositiveIntegerField, + EncryptedPositiveSmallIntegerField, + EncryptedSmallIntegerField, + EncryptedTextField, + EncryptedTimeField, + EncryptedURLField, + EncryptedUUIDField, +) +from django_mongodb_backend.models import EmbeddedModel + + +class Author(models.Model): + name = models.CharField(max_length=255) + + +class Book(models.Model): + title = models.CharField(max_length=255) + author = models.ForeignKey(Author, models.CASCADE) + + +class EncryptedTestModel(models.Model): + class Meta: + abstract = True + required_db_features = {"supports_queryable_encryption"} + + +# Array models +class ArrayModel(EncryptedTestModel): + values = EncryptedArrayField( + models.IntegerField(), + size=5, + ) + + +# Embedded models +class Patient(EncryptedTestModel): + patient_name = models.CharField(max_length=255) + patient_id = models.BigIntegerField() + patient_record = EmbeddedModelField("PatientRecord") + + def __str__(self): + return f"{self.patient_name} ({self.patient_id})" + + +class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + + +class Billing(EmbeddedModel): + cc_type = models.CharField(max_length=50) + cc_number = models.CharField(max_length=20) + + +# Embedded array models +class Actor(EmbeddedModel): + name = models.CharField(max_length=100) + + +class Movie(EncryptedTestModel): + title = models.CharField(max_length=200) + plot = models.TextField(blank=True) + runtime = models.IntegerField(default=0) + released = models.DateTimeField("release date") + cast = EncryptedEmbeddedModelArrayField(Actor) + + def __str__(self): + return self.title + + +# Equality-queryable field models +class BinaryModel(EncryptedTestModel): + value = EncryptedBinaryField(queries={"queryType": "equality"}) + + +class BooleanModel(EncryptedTestModel): + value = EncryptedBooleanField(queries={"queryType": "equality"}) + + +class CharModel(EncryptedTestModel): + value = EncryptedCharField(max_length=255, queries={"queryType": "equality"}) + + +class EmailModel(EncryptedTestModel): + value = EncryptedEmailField(max_length=255, queries={"queryType": "equality"}) + + +class GenericIPAddressModel(EncryptedTestModel): + value = EncryptedGenericIPAddressField(queries={"queryType": "equality"}) + + +class ObjectIdModel(EncryptedTestModel): + value = EncryptedObjectIdField(queries={"queryType": "equality"}) + + +class TextModel(EncryptedTestModel): + value = EncryptedTextField(queries={"queryType": "equality"}) + + +class URLModel(EncryptedTestModel): + value = EncryptedURLField(max_length=500, queries={"queryType": "equality"}) + + +class UUIDModel(EncryptedTestModel): + value = EncryptedUUIDField(queries={"queryType": "equality"}) + + +# Range-queryable field models +class BigIntegerModel(EncryptedTestModel): + value = EncryptedBigIntegerField(queries={"queryType": "range"}) + + +class DateModel(EncryptedTestModel): + value = EncryptedDateField(queries={"queryType": "range"}) + + +class DateTimeModel(EncryptedTestModel): + value = EncryptedDateTimeField(queries={"queryType": "range"}) + + +class DecimalModel(EncryptedTestModel): + value = EncryptedDecimalField(max_digits=10, decimal_places=2, queries={"queryType": "range"}) + + +class DurationModel(EncryptedTestModel): + value = EncryptedDurationField(queries={"queryType": "range"}) + + +class FloatModel(EncryptedTestModel): + value = EncryptedFloatField(queries={"queryType": "range"}) + + +class IntegerModel(EncryptedTestModel): + value = EncryptedIntegerField(queries={"queryType": "range"}) + + +class PositiveBigIntegerModel(EncryptedTestModel): + value = EncryptedPositiveBigIntegerField(queries={"queryType": "range"}) + + +class PositiveIntegerModel(EncryptedTestModel): + value = EncryptedPositiveIntegerField(queries={"queryType": "range"}) + + +class PositiveSmallIntegerModel(EncryptedTestModel): + value = EncryptedPositiveSmallIntegerField(queries={"queryType": "range"}) + + +class SmallIntegerModel(EncryptedTestModel): + value = EncryptedSmallIntegerField(queries={"queryType": "range"}) + + +class TimeModel(EncryptedTestModel): + value = EncryptedTimeField(queries={"queryType": "range"}) + + +class EncryptionKey(models.Model): + key_alt_name = models.CharField(max_length=500, db_column="keyAltNames") + + class Meta: + db_table = "__keyVault" + managed = False diff --git a/tests/encryption_/test_base.py b/tests/encryption_/test_base.py new file mode 100644 index 000000000..0c165d19a --- /dev/null +++ b/tests/encryption_/test_base.py @@ -0,0 +1,21 @@ +import pymongo +from bson.binary import Binary +from django.conf import settings +from django.db import connections +from django.test import TestCase, skipUnlessDBFeature + + +@skipUnlessDBFeature("supports_queryable_encryption") +class EncryptionTestCase(TestCase): + databases = {"default", "encrypted"} + maxDiff = None + + def assertEncrypted(self, model, field): + # Access encrypted database from an unencrypted connection + conn_params = connections["default"].get_connection_params() + db_name = settings.DATABASES["encrypted"]["NAME"] + with pymongo.MongoClient(**conn_params) as new_connection: + db = new_connection[db_name] + collection = db[model._meta.db_table] + data = collection.find_one({}, {field: 1, "_id": 0}) + self.assertIsInstance(data[field], Binary) diff --git a/tests/encryption_/test_fields.py b/tests/encryption_/test_fields.py new file mode 100644 index 000000000..3769f6725 --- /dev/null +++ b/tests/encryption_/test_fields.py @@ -0,0 +1,423 @@ +import datetime +import uuid +from decimal import Decimal +from operator import attrgetter + +from bson import ObjectId +from django.db import DatabaseError +from django.db.models import Avg, Count, F, Q + +from django_mongodb_backend.fields import ( + EncryptedArrayField, + EncryptedCharField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedIntegerField, +) + +from .models import ( + Actor, + ArrayModel, + Author, + BigIntegerModel, + Billing, + BinaryModel, + Book, + BooleanModel, + CharModel, + DateModel, + DateTimeModel, + DecimalModel, + DurationModel, + EmailModel, + FloatModel, + GenericIPAddressModel, + IntegerModel, + Movie, + ObjectIdModel, + Patient, + PatientRecord, + PositiveBigIntegerModel, + PositiveIntegerModel, + PositiveSmallIntegerModel, + SmallIntegerModel, + TextModel, + TimeModel, + URLModel, + UUIDModel, +) +from .test_base import EncryptionTestCase + + +class ArrayModelTests(EncryptionTestCase): + def setUp(self): + self.array_model = ArrayModel.objects.create(values=[1, 2, 3, 4, 5]) + + def test_array(self): + array_model = ArrayModel.objects.get(id=self.array_model.id) + self.assertEqual(array_model.values, [1, 2, 3, 4, 5]) + self.assertEncrypted(self.array_model, "values") + + +class EmbeddedModelTests(EncryptionTestCase): + def setUp(self): + self.billing = Billing(cc_type="Visa", cc_number="4111111111111111") + self.patient_record = PatientRecord(ssn="123-45-6789", billing=self.billing) + self.patient = Patient.objects.create( + patient_name="John Doe", patient_id=123456789, patient_record=self.patient_record + ) + + def test_object(self): + patient = Patient.objects.get(id=self.patient.id) + self.assertEqual(patient.patient_record.ssn, "123-45-6789") + self.assertEqual(patient.patient_record.billing.cc_type, "Visa") + self.assertEqual(patient.patient_record.billing.cc_number, "4111111111111111") + + +class EmbeddedModelArrayTests(EncryptionTestCase): + def setUp(self): + self.actor1 = Actor(name="Actor One") + self.actor2 = Actor(name="Actor Two") + self.movie = Movie.objects.create( + title="Sample Movie", + cast=[self.actor1, self.actor2], + released=datetime.date(2024, 6, 1), + ) + + def test_array(self): + movie = Movie.objects.get(id=self.movie.id) + self.assertEqual(len(movie.cast), 2) + self.assertEqual(movie.cast[0].name, "Actor One") + self.assertEqual(movie.cast[1].name, "Actor Two") + self.assertEncrypted(movie, "cast") + + +class FieldTests(EncryptionTestCase): + def assertEquality(self, model_cls, val): + obj = model_cls.objects.create(value=val) + self.assertEqual(model_cls.objects.get(value=val), obj) + self.assertEqual(model_cls.objects.get(value__in=[val]), obj) + self.assertQuerySetEqual(model_cls.objects.exclude(value=val), []) + + def assertRange(self, model_cls, *, low, high, threshold): + obj1 = model_cls.objects.create(value=low) + obj2 = model_cls.objects.create(value=high) + self.assertEqual(model_cls.objects.get(value=low).value, low) + self.assertEqual(model_cls.objects.get(value=high).value, high) + self.assertEqual(model_cls.objects.exclude(value=high).get().value, low) + self.assertCountEqual(model_cls.objects.filter(Q(value=high) | Q(value=low)), [obj1, obj2]) + self.assertQuerySetEqual( + model_cls.objects.filter(value__gt=threshold), [high], attrgetter("value") + ) + self.assertQuerySetEqual( + model_cls.objects.filter(value__gte=threshold), [high], attrgetter("value") + ) + self.assertQuerySetEqual( + model_cls.objects.filter(value__lt=threshold), [low], attrgetter("value") + ) + self.assertQuerySetEqual( + model_cls.objects.filter(value__lte=threshold), [low], attrgetter("value") + ) + self.assertQuerySetEqual( + model_cls.objects.filter(value__in=[low]), [low], attrgetter("value") + ) + msg = ( + "Comparison disallowed between Queryable Encryption encrypted " + "fields and non-constant expressions; field 'value' is encrypted." + ) + with self.assertRaisesMessage(DatabaseError, msg): + self.assertQuerySetEqual( + model_cls.objects.filter(value__lte=F("value")), [low], attrgetter("value") + ) + + # Equality-only fields + def test_binary(self): + self.assertEquality(BinaryModel, b"\x00\x01\x02") + self.assertEncrypted(BinaryModel, "value") + + def test_boolean(self): + self.assertEquality(BooleanModel, True) + self.assertEncrypted(BooleanModel, "value") + + def test_char(self): + self.assertEquality(CharModel, "hello") + self.assertEncrypted(CharModel, "value") + + def test_email(self): + self.assertEquality(EmailModel, "test@example.com") + self.assertEncrypted(EmailModel, "value") + + def test_ip(self): + self.assertEquality(GenericIPAddressModel, "192.168.0.1") + self.assertEncrypted(GenericIPAddressModel, "value") + + def test_objectid(self): + self.assertEquality(ObjectIdModel, ObjectId()) + self.assertEncrypted(ObjectIdModel, "value") + + def test_text(self): + self.assertEquality(TextModel, "some text") + self.assertEncrypted(TextModel, "value") + + def test_url(self): + self.assertEquality(URLModel, "https://example.com") + self.assertEncrypted(URLModel, "value") + + def test_uuid(self): + self.assertEquality(UUIDModel, uuid.uuid4()) + self.assertEncrypted(UUIDModel, "value") + + # Range fields + def test_big_integer(self): + self.assertRange(BigIntegerModel, low=100, high=200, threshold=150) + self.assertEncrypted(BigIntegerModel, "value") + + def test_date(self): + self.assertRange( + DateModel, + low=datetime.date(2024, 6, 1), + high=datetime.date(2024, 6, 10), + threshold=datetime.date(2024, 6, 5), + ) + self.assertEncrypted(DateModel, "value") + + def test_datetime(self): + self.assertRange( + DateTimeModel, + low=datetime.datetime(2024, 6, 1, 12, 0), + high=datetime.datetime(2024, 6, 2, 12, 0), + threshold=datetime.datetime(2024, 6, 2, 0, 0), + ) + self.assertEncrypted(DateTimeModel, "value") + + def test_decimal(self): + self.assertRange( + DecimalModel, + low=Decimal("123.45"), + high=Decimal("200.50"), + threshold=Decimal("150"), + ) + self.assertEncrypted(DecimalModel, "value") + + def test_duration(self): + self.assertRange( + DurationModel, + low=datetime.timedelta(days=3), + high=datetime.timedelta(days=10), + threshold=datetime.timedelta(days=5), + ) + self.assertEncrypted(DurationModel, "value") + + def test_float(self): + self.assertRange(FloatModel, low=1.23, high=4.56, threshold=3.0) + self.assertEncrypted(FloatModel, "value") + + def test_integer(self): + self.assertRange(IntegerModel, low=5, high=10, threshold=7) + self.assertEncrypted(IntegerModel, "value") + + def test_positive_big_integer(self): + self.assertRange(PositiveBigIntegerModel, low=100, high=500, threshold=200) + self.assertEncrypted(PositiveBigIntegerModel, "value") + + def test_positive_integer(self): + self.assertRange(PositiveIntegerModel, low=10, high=20, threshold=15) + self.assertEncrypted(PositiveIntegerModel, "value") + + def test_positive_small_integer(self): + self.assertRange(PositiveSmallIntegerModel, low=5, high=8, threshold=6) + self.assertEncrypted(PositiveSmallIntegerModel, "value") + + def test_small_integer(self): + self.assertRange(SmallIntegerModel, low=-5, high=2, threshold=0) + self.assertEncrypted(SmallIntegerModel, "value") + + def test_time(self): + self.assertRange( + TimeModel, + low=datetime.time(10, 0), + high=datetime.time(15, 0), + threshold=datetime.time(12, 0), + ) + self.assertEncrypted(TimeModel, "value") + + +class QueryTests(EncryptionTestCase): + def test_aggregate_avg(self): + msg = ( + "csfle \"analyze_query\" failed: Accumulator '$avg' cannot aggregate encrypted fields." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.aggregate(Avg("value"))) + + def test_aggregate_count(self): + msg = "Invalid reference to an encrypted field within aggregate expression: value" + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.aggregate(Count("value"))) + + def test_alias(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.alias(avg=Avg("value"))) + + def test_annotate(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.annotate(avg=Avg("value"))) + + def test_bulk_create(self): + CharModel.objects.bulk_create([CharModel(value="abc"), CharModel(value="xyz")]) + self.assertQuerySetEqual( + CharModel.objects.order_by("pk"), ["abc", "xyz"], attrgetter("value") + ) + + def test_bulk_update(self): + objs = [ + CharModel.objects.create(value="abc"), + CharModel.objects.create(value="xyz"), + ] + objs[0].value = "def" + objs[1].value = "mno" + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + CharModel.objects.bulk_update(objs, ["value"]) + + def test_contains(self): + obj = CharModel.objects.create(value="abc") + self.assertIs(CharModel.objects.contains(obj), True) + + def test_count(self): + CharModel.objects.create(value="a") + CharModel.objects.create(value="b") + self.assertEqual(CharModel.objects.count(), 2) + + def test_dates(self): + msg = ( + "If the value type is a date, the type of the index must also be date (and vice versa)." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(DateModel.objects.dates("value", "year")) + + def test_datetimes(self): + msg = ( + "If the value type is a date, the type of the index must also be date (and vice versa)." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(DateTimeModel.objects.datetimes("value", "year")) + + def test_distinct(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.distinct("value")) + + def test_exclude(self): + obj1 = CharModel.objects.create(value="abc") + obj2 = CharModel.objects.create(value="xyz") + self.assertSequenceEqual(CharModel.objects.exclude(value=obj1.value), [obj2]) + + def test_exists(self): + self.assertIs(CharModel.objects.exists(), False) + + def test_get_or_create(self): + obj1, created1 = CharModel.objects.get_or_create(value="abc") + self.assertIs(created1, True) + obj2, created2 = CharModel.objects.get_or_create(value="abc") + self.assertIs(created2, False) + self.assertEqual(obj1, obj2) + + def test_join(self): + book = Book.objects.create(title="Book", author=Author.objects.create(name="Bob")) + self.assertSequenceEqual(Book.objects.filter(author__name="Bob"), [book]) + + def test_join_with_let(self): + msg = ( + "Non-empty 'let' field is not allowed in the $lookup aggregation " + "stage over an encrypted collection." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(Book.objects.filter(author__name=F("title"))) + + def test_order_by(self): + msg = "Cannot add an encrypted field as a prefix of another encrypted field" + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.order_by("value")) + + def test_select_related(self): + Book.objects.create(title="Book", author=Author.objects.create(name="Bob")) + with self.assertNumQueries(1, using="encrypted"): + books = Book.objects.select_related("author") + self.assertEqual(books[0].author.name, "Bob") + + def test_update(self): + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + self.assertEqual(CharModel.objects.update(value="xyz"), 1) + + def test_update_or_create(self): + CharModel.objects.create(value="xyz") + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + CharModel.objects.update_or_create(value="xyz", defaults={"plain": "abc"}) + + def test_union(self): + msg = "Aggregation stage $unionWith is not allowed or supported with automatic encryption." + qs1 = IntegerModel.objects.filter(value__gt=1) + qs2 = IntegerModel.objects.filter(value__gte=8) + with self.assertRaisesMessage(DatabaseError, msg): + list(qs1.union(qs2)) + + def test_values(self): + list(CharModel.objects.values("value")) + + def test_values_list(self): + list(CharModel.objects.values_list("value")) + + +class FieldMixinTests(EncryptionTestCase): + def test_db_index(self): + msg = "'db_index=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(db_index=True) + + def test_null(self): + msg = "'null=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(null=True) + + def test_unique(self): + msg = "'unique=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(unique=True) + + def test_deconstruct(self): + field = EncryptedCharField(max_length=50, queries={"field": "value"}) + field.name = "ssn" + name, path, args, kwargs = field.deconstruct() + self.assertEqual(name, "ssn") + self.assertEqual(path, "django_mongodb_backend.fields.EncryptedCharField") + self.assertEqual(args, []) + self.assertEqual(kwargs["queries"], {"field": "value"}) + + def test_fields_without_queries(self): + """Some field types (array, object) can't be queried.""" + for field in ( + EncryptedArrayField, + EncryptedEmbeddedModelField, + EncryptedEmbeddedModelArrayField, + ): + with self.subTest(field=field): + msg = f"{field.__name__} does not support the queries argument." + with self.assertRaisesMessage(ValueError, msg): + field(Actor, queries={}) diff --git a/tests/encryption_/test_management.py b/tests/encryption_/test_management.py new file mode 100644 index 000000000..180325675 --- /dev/null +++ b/tests/encryption_/test_management.py @@ -0,0 +1,137 @@ +from io import StringIO + +from bson import json_util +from django.core.exceptions import ImproperlyConfigured +from django.core.management import call_command +from django.db import connections +from django.test import modify_settings + +from .models import EncryptionKey +from .test_base import EncryptionTestCase + + +@modify_settings(INSTALLED_APPS={"prepend": "django_mongodb_backend"}) +class CommandTests(EncryptionTestCase): + # Expected encrypted field maps for all Encrypted* models + expected_maps = { + "encryption__patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "queries": {"queryType": "equality"}, + }, + {"bsonType": "object", "path": "patient_record.billing"}, + ] + }, + # Equality-queryable fields + "encryption__binarymodel": { + "fields": [ + {"bsonType": "binData", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__booleanmodel": { + "fields": [{"bsonType": "bool", "path": "value", "queries": {"queryType": "equality"}}] + }, + "encryption__charmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__emailmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__genericipaddressmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__textmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__urlmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + # Range-queryable fields + "encryption__bigintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__datemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__datetimemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__decimalmodel": { + "fields": [{"bsonType": "decimal", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__durationmodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__floatmodel": { + "fields": [{"bsonType": "double", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__integermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positivebigintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positiveintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positivesmallintegermodel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__smallintegermodel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__timemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + } + + def _compare_output(self, expected, actual): + for field in actual["fields"]: + del field["keyId"] # Can't compare dynamic value + self.assertEqual(expected, actual) + + def test_show_encrypted_fields_map(self): + out = StringIO() + call_command("showencryptedfieldsmap", "--database", "encrypted", verbosity=0, stdout=out) + command_output = json_util.loads(out.getvalue()) + + # Loop through each expected model + for model_key, expected in self.expected_maps.items(): + with self.subTest(model=model_key): + self.assertIn(model_key, command_output) + self._compare_output(expected, command_output[model_key]) + + def test_missing_key(self): + connection = connections["encrypted"] + auto_encryption_opts = connection.connection._options.auto_encryption_opts + kms_providers = auto_encryption_opts._kms_providers + test_key = "encryption__patient.patient_record.ssn" + msg = ( + f"Encryption key {test_key} not found. Have migrated the " + " model?" + ) + EncryptionKey.objects.filter(key_alt_name=test_key).delete() + try: + with self.assertRaisesMessage(ImproperlyConfigured, msg): + call_command("showencryptedfieldsmap", "--database", "encrypted", verbosity=0) + finally: + # Replace the deleted key. + kms_provider = next(iter(kms_providers.keys())) + master_key = connection.settings_dict["KMS_CREDENTIALS"][kms_provider] + connection.client_encryption.create_data_key( + kms_provider=kms_provider, + master_key=master_key, + key_alt_names=[test_key], + ) diff --git a/tests/encryption_/test_schema.py b/tests/encryption_/test_schema.py new file mode 100644 index 000000000..05b54aefd --- /dev/null +++ b/tests/encryption_/test_schema.py @@ -0,0 +1,172 @@ +from bson.binary import Binary +from django.core.exceptions import ImproperlyConfigured +from django.db import NotSupportedError, connections + +from . import models +from .models import EncryptionKey +from .test_base import EncryptionTestCase + + +class SchemaTests(EncryptionTestCase): + # Expected encrypted fields map per model + expected_map = { + "Patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "queries": {"queryType": "equality"}, + }, + {"bsonType": "object", "path": "patient_record.billing"}, + ] + }, + "BinaryModel": { + "fields": [ + {"bsonType": "binData", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "BooleanModel": { + "fields": [{"bsonType": "bool", "path": "value", "queries": {"queryType": "equality"}}] + }, + "CharModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "EmailModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "GenericIPAddressModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "TextModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "URLModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "BigIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "DateModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "DateTimeModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "DecimalModel": { + "fields": [{"bsonType": "decimal", "path": "value", "queries": {"queryType": "range"}}] + }, + "DurationModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "FloatModel": { + "fields": [{"bsonType": "double", "path": "value", "queries": {"queryType": "range"}}] + }, + "IntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveBigIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveSmallIntegerModel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "SmallIntegerModel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "TimeModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + } + + def test_get_encrypted_fields_all_models(self): + """ + Loops through all models, + checks their encrypted fields map from the schema editor, + and compares to expected BSON type & queries mapping. + """ + # Deleting all keys is only correct only if this test includes all + # test models. This test may not be needed since it's tested when the + # test runner migrates all models. If any subTest fails, the key vault + # will be left in an inconsistent state. + EncryptionKey.objects.all().delete() + connection = connections["encrypted"] + for model_name, expected in self.expected_map.items(): + with self.subTest(model=model_name): + model_class = getattr(models, model_name) + with connection.schema_editor() as editor: + encrypted_fields = editor._get_encrypted_fields(model_class) + for field in encrypted_fields["fields"]: + del field["keyId"] # Can't compare dynamic value + self.assertEqual(encrypted_fields, expected) + + def test_key_creation_and_lookup(self): + """ + Use _get_encrypted_fields to + generate and store a data key in the vault, then + query the vault with the keyAltName. + """ + model_class = models.CharModel + test_key_alt_name = f"{model_class._meta.db_table}.value" + # Delete the test key and verify it's gone. + EncryptionKey.objects.filter(key_alt_name=test_key_alt_name).delete() + with self.assertRaises(EncryptionKey.DoesNotExist): + EncryptionKey.objects.get(key_alt_name=test_key_alt_name) + # Regenerate the keyId. + with connections["encrypted"].schema_editor() as editor: + encrypted_fields = editor._get_encrypted_fields(model_class) + # Validate schema contains a keyId for the field. + field_info = encrypted_fields["fields"][0] + self.assertEqual(field_info["path"], "value") + self.assertIsInstance(field_info["keyId"], Binary) + # Lookup in key vault by the keyAltName. + key = EncryptionKey.objects.get(key_alt_name=test_key_alt_name) + self.assertEqual(key.id, field_info["keyId"]) + self.assertEqual(key.key_alt_name, [test_key_alt_name]) + + def test_missing_auto_encryption_opts(self): + connection = connections["default"] + msg = ( + "Tried to create model encryption_.Patient in 'default' database. " + "The model has encrypted fields but DATABASES['default']['OPTIONS'] " + 'is missing the "auto_encryption_opts" parameter. If the model ' + "should not be created in this database, adjust your database " + "routers." + ) + with ( + self.assertRaisesMessage(ImproperlyConfigured, msg), + connection.schema_editor() as editor, + ): + editor.create_model(models.Patient) + + def test_multiple_kms_providers(self): + connection = connections["encrypted"] + auto_encryption_opts = connection.connection._options.auto_encryption_opts + kms_providers = auto_encryption_opts._kms_providers + # Mock multiple kms_providers by using a list of length > 1. + auto_encryption_opts._kms_providers = [{}, {}] + msg = ( + "Multiple KMS providers per database aren't supported. Please " + "create a feature request with details about your use case." + ) + try: + with ( + self.assertRaisesMessage(NotSupportedError, msg), + connection.schema_editor() as editor, + ): + editor.create_model(models.Patient) + finally: + # Restore the original value. + auto_encryption_opts._kms_providers = kms_providers diff --git a/tests/raw_query_/test_raw_aggregate.py b/tests/raw_query_/test_raw_aggregate.py index 99dcd5faf..96df2f925 100644 --- a/tests/raw_query_/test_raw_aggregate.py +++ b/tests/raw_query_/test_raw_aggregate.py @@ -111,7 +111,7 @@ def assertAnnotations(self, results, expected_annotations): self.assertEqual(getattr(result, annotation), value) def test_rawqueryset_repr(self): - queryset = RawQuerySet(pipeline=[]) + queryset = RawQuerySet(pipeline=[], model=Book) self.assertEqual(repr(queryset), "") self.assertEqual(repr(queryset.query), "")