diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b07d898..49febecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,8 +19,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added `parent_ids` internal field to collections to support multi-catalog hierarchies. Collections can now belong to multiple catalogs, with parent catalog IDs stored in this field for efficient querying and management. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) + ### Changed +- Have opensearch datetime, geometry and collections fields defined as constant strings [#553](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/553) + ### Fixed - Fix unawaited coroutine in `stac_fastapi.core.core`. [#551](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/551) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 75915607..7576f287 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -154,6 +154,20 @@ def __attrs_post_init__(self): aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING + # constants for field names + # they are used in multiple methods + # and could be overwritten in subclasses used with alternate opensearch mappings. + PROPERTIES_DATETIME_FIELD = "properties.datetime" + PROPERTIES_START_DATETIME_FIELD = "properties.start_datetime" + PROPERTIES_END_DATETIME_FIELD = "properties.end_datetime" + COLLECTION_FIELD = "collection" + GEOMETRY_FIELD = "geometry" + + @staticmethod + def __nested_field__(field: str): + """Convert opensearch field to nested field format.""" + return field.replace(".", "__") + """CORE LOGIC""" async def get_all_collections( @@ -436,7 +450,10 @@ def apply_ids_filter(search: Search, item_ids: List[str]): @staticmethod def apply_collections_filter(search: Search, collection_ids: List[str]): """Database logic to search a list of STAC collection ids.""" - return search.filter("terms", collection=collection_ids) + collection_nested_field = DatabaseLogic.__nested_field__( + DatabaseLogic.COLLECTION_FIELD + ) + return search.filter("terms", **{collection_nested_field: collection_ids}) @staticmethod def apply_datetime_filter( @@ -461,6 +478,16 @@ def apply_datetime_filter( if not datetime_search: return search, datetime_search + nested_datetime_field = DatabaseLogic.__nested_field__( + DatabaseLogic.PROPERTIES_DATETIME_FIELD + ) + nested_start_datetime_field = DatabaseLogic.__nested_field__( + DatabaseLogic.PROPERTIES_START_DATETIME_FIELD + ) + nested_end_datetime_field = DatabaseLogic.__nested_field__( + DatabaseLogic.PROPERTIES_END_DATETIME_FIELD + ) + if USE_DATETIME: if "eq" in datetime_search: # For exact matches, include: @@ -470,28 +497,42 @@ def apply_datetime_filter( Q( "bool", filter=[ - Q("exists", field="properties.datetime"), + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD), Q( "term", - **{"properties__datetime": datetime_search["eq"]}, + **{nested_datetime_field: datetime_search["eq"]}, ), ], ), Q( "bool", - must_not=[Q("exists", field="properties.datetime")], + must_not=[ + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD) + ], filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD, + ), Q( "range", - properties__start_datetime={ - "lte": datetime_search["eq"] + **{ + nested_start_datetime_field: { + "lte": datetime_search["eq"] + } }, ), Q( "range", - properties__end_datetime={"gte": datetime_search["eq"]}, + **{ + nested_end_datetime_field: { + "gte": datetime_search["eq"] + } + }, ), ], ), @@ -504,32 +545,46 @@ def apply_datetime_filter( Q( "bool", filter=[ - Q("exists", field="properties.datetime"), + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD), Q( "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], + **{ + nested_datetime_field: { + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + } }, ), ], ), Q( "bool", - must_not=[Q("exists", field="properties.datetime")], + must_not=[ + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD) + ], filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD, + ), Q( "range", - properties__start_datetime={ - "lte": datetime_search["lte"] + **{ + nested_start_datetime_field: { + "lte": datetime_search["lte"] + } }, ), Q( "range", - properties__end_datetime={ - "gte": datetime_search["gte"] + **{ + nested_end_datetime_field: { + "gte": datetime_search["gte"] + } }, ), ], @@ -545,15 +600,26 @@ def apply_datetime_filter( filter_query = Q( "bool", filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q("exists", field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD), Q( "range", - properties__start_datetime={"lte": datetime_search["eq"]}, + **{ + nested_start_datetime_field: { + "lte": datetime_search["eq"] + } + }, ), Q( "range", - properties__end_datetime={"gte": datetime_search["eq"]}, + **{ + nested_end_datetime_field: { + "gte": datetime_search["eq"] + } + }, ), ], ) @@ -561,15 +627,26 @@ def apply_datetime_filter( filter_query = Q( "bool", filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q("exists", field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD), Q( "range", - properties__start_datetime={"lte": datetime_search["lte"]}, + **{ + nested_start_datetime_field: { + "lte": datetime_search["lte"] + } + }, ), Q( "range", - properties__end_datetime={"gte": datetime_search["gte"]}, + **{ + nested_end_datetime_field: { + "gte": datetime_search["gte"] + } + }, ), ], ) @@ -594,7 +671,7 @@ def apply_bbox_filter(search: Search, bbox: List): Q( { "geo_shape": { - "geometry": { + DatabaseLogic.GEOMETRY_FIELD: { "shape": { "type": "polygon", "coordinates": bbox2polygon(*bbox), @@ -1708,7 +1785,7 @@ def bulk_sync( kwargs = kwargs or {} # Resolve the `refresh` parameter - refresh = kwargs.get("refresh", self.async_settings.database_refresh) + refresh = kwargs.get("refresh", self.sync_settings.database_refresh) refresh = validate_refresh(refresh) # Log the bulk insert attempt diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 14529ac3..1bf6c0c1 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -154,6 +154,20 @@ def __attrs_post_init__(self): aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING + # constants for field names + # they are used in multiple methods + # and could be overwritten in subclasses used with alternate opensearch mappings. + PROPERTIES_DATETIME_FIELD = "properties.datetime" + PROPERTIES_START_DATETIME_FIELD = "properties.start_datetime" + PROPERTIES_END_DATETIME_FIELD = "properties.end_datetime" + COLLECTION_FIELD = "collection" + GEOMETRY_FIELD = "geometry" + + @staticmethod + def __nested_field__(field: str): + """Convert opensearch field to nested field format.""" + return field.replace(".", "__") + """CORE LOGIC""" async def get_all_collections( @@ -436,7 +450,10 @@ def apply_ids_filter(search: Search, item_ids: List[str]): @staticmethod def apply_collections_filter(search: Search, collection_ids: List[str]): """Database logic to search a list of STAC collection ids.""" - return search.filter("terms", collection=collection_ids) + collection_nested_field = DatabaseLogic.__nested_field__( + DatabaseLogic.COLLECTION_FIELD + ) + return search.filter("terms", **{collection_nested_field: collection_ids}) @staticmethod def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]): @@ -479,6 +496,16 @@ def apply_datetime_filter( # False: Always search only by start/end datetime USE_DATETIME = get_bool_env("USE_DATETIME", default=True) + nested_datetime_field = DatabaseLogic.__nested_field__( + DatabaseLogic.PROPERTIES_DATETIME_FIELD + ) + nested_start_datetime_field = DatabaseLogic.__nested_field__( + DatabaseLogic.PROPERTIES_START_DATETIME_FIELD + ) + nested_end_datetime_field = DatabaseLogic.__nested_field__( + DatabaseLogic.PROPERTIES_END_DATETIME_FIELD + ) + if USE_DATETIME: if "eq" in datetime_search: # For exact matches, include: @@ -488,28 +515,42 @@ def apply_datetime_filter( Q( "bool", filter=[ - Q("exists", field="properties.datetime"), + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD), Q( "term", - **{"properties__datetime": datetime_search["eq"]}, + **{nested_datetime_field: datetime_search["eq"]}, ), ], ), Q( "bool", - must_not=[Q("exists", field="properties.datetime")], + must_not=[ + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD) + ], filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD, + ), Q( "range", - properties__start_datetime={ - "lte": datetime_search["eq"] + **{ + nested_start_datetime_field: { + "lte": datetime_search["eq"] + } }, ), Q( "range", - properties__end_datetime={"gte": datetime_search["eq"]}, + **{ + nested_end_datetime_field: { + "gte": datetime_search["eq"] + } + }, ), ], ), @@ -522,32 +563,46 @@ def apply_datetime_filter( Q( "bool", filter=[ - Q("exists", field="properties.datetime"), + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD), Q( "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], + **{ + nested_datetime_field: { + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + } }, ), ], ), Q( "bool", - must_not=[Q("exists", field="properties.datetime")], + must_not=[ + Q("exists", field=DatabaseLogic.PROPERTIES_DATETIME_FIELD) + ], filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD, + ), Q( "range", - properties__start_datetime={ - "lte": datetime_search["lte"] + **{ + nested_start_datetime_field: { + "lte": datetime_search["lte"] + } }, ), Q( "range", - properties__end_datetime={ - "gte": datetime_search["gte"] + **{ + nested_end_datetime_field: { + "gte": datetime_search["gte"] + } }, ), ], @@ -563,15 +618,26 @@ def apply_datetime_filter( filter_query = Q( "bool", filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q("exists", field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD), Q( "range", - properties__start_datetime={"lte": datetime_search["eq"]}, + **{ + nested_start_datetime_field: { + "lte": datetime_search["eq"] + } + }, ), Q( "range", - properties__end_datetime={"gte": datetime_search["eq"]}, + **{ + nested_end_datetime_field: { + "gte": datetime_search["eq"] + } + }, ), ], ) @@ -579,15 +645,26 @@ def apply_datetime_filter( filter_query = Q( "bool", filter=[ - Q("exists", field="properties.start_datetime"), - Q("exists", field="properties.end_datetime"), + Q( + "exists", + field=DatabaseLogic.PROPERTIES_START_DATETIME_FIELD, + ), + Q("exists", field=DatabaseLogic.PROPERTIES_END_DATETIME_FIELD), Q( "range", - properties__start_datetime={"lte": datetime_search["lte"]}, + **{ + nested_start_datetime_field: { + "lte": datetime_search["lte"] + } + }, ), Q( "range", - properties__end_datetime={"gte": datetime_search["gte"]}, + **{ + nested_end_datetime_field: { + "gte": datetime_search["gte"] + } + }, ), ], ) @@ -612,7 +689,7 @@ def apply_bbox_filter(search: Search, bbox: List): Q( { "geo_shape": { - "geometry": { + DatabaseLogic.GEOMETRY_FIELD: { "shape": { "type": "polygon", "coordinates": bbox2polygon(*bbox),