Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion backend/app/api/docs/collections/info.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
Retrieve detailed information about a specific collection by its collection id. This endpoint returns the collection object including its project, organization,
timestamps, and associated LLM service details (`llm_service_id` and `llm_service_name`).

Additionally, if the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by Kaapi, but also by Vector store provider.
If the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by Kaapi, but also by Vector store provider.

Additionally, if you set the `include_url` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved document. If you don't set it to true, the URL will not be included in the response.
30 changes: 23 additions & 7 deletions backend/app/api/routes/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
CollectionJobCrud,
DocumentCollectionCrud,
)
from app.core.cloud import get_cloud_storage
from app.models import (
DocumentPublic,
CollectionJobStatus,
CollectionActionType,
CollectionJobCreate,
Expand All @@ -32,6 +32,7 @@
create_collection as create_service,
delete_collection as delete_service,
)
from app.services.documents.helpers import build_document_schemas


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -184,8 +185,16 @@ def collection_info(
True,
description="If true, include documents linked to this collection",
),
skip: int = Query(0, ge=0),
limit: int = Query(100, gt=0, le=100),
include_url: bool = Query(
True, description="Include a signed URL to access the document"
),
limit: int
| None = Query(
None,
gt=0,
le=500,
description="Limit number of documents returned (default: all, max: 500)",
),
):
collection_crud = CollectionCrud(session, current_user.project_.id)
collection = collection_crud.read_one(collection_id)
Expand All @@ -194,9 +203,16 @@ def collection_info(

if include_docs:
document_collection_crud = DocumentCollectionCrud(session)
docs = document_collection_crud.read(collection, skip, limit)
collection_with_docs.documents = [
DocumentPublic.model_validate(doc) for doc in docs
]
documents = document_collection_crud.read(collection, skip=None, limit=limit)

storage = None
if include_url and documents:
storage = get_cloud_storage(
session=session, project_id=current_user.project_.id
)

collection_with_docs.documents = build_document_schemas(
documents=documents, storage=storage, include_url=include_url
)

return APIResponse.success_response(collection_with_docs)
4 changes: 2 additions & 2 deletions backend/app/api/routes/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,10 @@
Query,
UploadFile,
)
from pydantic import HttpUrl
from fastapi import Path as FastPath

from app.api.deps import AuthContextDep, SessionDep
from app.api.permissions import Permission, require_permission
from app.core.cloud import get_cloud_storage
from app.crud import CollectionCrud, DocumentCrud
from app.crud.rag import OpenAIAssistantCrud, OpenAIVectorStoreCrud
from app.models import (
Expand All @@ -28,6 +26,7 @@
TransformationJobInfo,
DocTransformationJobPublic,
)
from app.core.cloud import get_cloud_storage
from app.services.collections.helpers import pick_service_for_documennt
from app.services.documents.helpers import (
schedule_transformation,
Expand Down Expand Up @@ -261,4 +260,5 @@ def doc_info(
include_url=include_url,
storage=storage,
)

return APIResponse.success_response(doc_schema)
39 changes: 39 additions & 0 deletions backend/app/tests/api/routes/collections/test_collection_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,42 @@ def test_collection_info_not_found_returns_404(
)

assert response.status_code == 404


def test_collection_info_include_docs_and_url(
client: TestClient,
db: Session,
user_api_key_header,
) -> None:
"""
Test that when include_docs=true and include_url=true,
the endpoint returns documents with their URLs.
"""
project = get_project(db, "Dalgo")
collection = get_collection(db, project)

document = link_document_to_collection(db, collection)

response = client.get(
f"{settings.API_V1_STR}/collections/{collection.id}",
headers=user_api_key_header,
params={"include_docs": "true", "include_url": "true"},
)

assert response.status_code == 200

data = response.json()
payload = data["data"]

assert payload["id"] == str(collection.id)

docs = payload.get("documents", [])
assert isinstance(docs, list)
assert len(docs) >= 1

doc_ids = {d["id"] for d in docs}
assert str(document.id) in doc_ids

doc = next(d for d in docs if d["id"] == str(document.id))
assert "signed_url" in doc
assert doc["signed_url"].startswith("https://")