ProjectTech4DevAI · nishika26 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025
diff --git a/backend/app/api/docs/collections/info.md b/backend/app/api/docs/collections/info.md
@@ -1,4 +1,6 @@
 Retrieve detailed information about a specific collection by its collection id. This endpoint returns the collection object including its project, organization,
 timestamps, and associated LLM service details (`llm_service_id` and `llm_service_name`).
 
-Additionally, if the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by Kaapi, but also by Vector store provider.
+If the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by Kaapi, but also by Vector store provider.
+
+Additionally, if you set the `include_url` parameter to true, a signed URL will be included in the response, which is a clickable link to access the retrieved document. If you don't set it to true, the URL will not be included in the response.
diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py
@@ -12,8 +12,8 @@
     CollectionJobCrud,
     DocumentCollectionCrud,
 )
+from app.core.cloud import get_cloud_storage
 from app.models import (
-    DocumentPublic,
     CollectionJobStatus,
     CollectionActionType,
     CollectionJobCreate,
@@ -32,6 +32,7 @@
     create_collection as create_service,
     delete_collection as delete_service,
 )
+from app.services.documents.helpers import build_document_schemas
 
 
 logger = logging.getLogger(__name__)
@@ -184,8 +185,16 @@ def collection_info(
         True,
         description="If true, include documents linked to this collection",
     ),
-    skip: int = Query(0, ge=0),
-    limit: int = Query(100, gt=0, le=100),
+    include_url: bool = Query(
+        True, description="Include a signed URL to access the document"
+    ),
+    limit: int
+    | None = Query(
+        None,
+        gt=0,
+        le=500,
+        description="Limit number of documents returned (default: all, max: 500)",
+    ),
 ):
     collection_crud = CollectionCrud(session, current_user.project_.id)
     collection = collection_crud.read_one(collection_id)
@@ -194,9 +203,16 @@ def collection_info(
 
     if include_docs:
         document_collection_crud = DocumentCollectionCrud(session)
-        docs = document_collection_crud.read(collection, skip, limit)
-        collection_with_docs.documents = [
-            DocumentPublic.model_validate(doc) for doc in docs
-        ]
+        documents = document_collection_crud.read(collection, skip=None, limit=limit)
+
+        storage = None
+        if include_url and documents:
+            storage = get_cloud_storage(
+                session=session, project_id=current_user.project_.id
+            )
+
+        collection_with_docs.documents = build_document_schemas(
+            documents=documents, storage=storage, include_url=include_url
+        )
 
     return APIResponse.success_response(collection_with_docs)
diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py
@@ -11,12 +11,10 @@
     Query,
     UploadFile,
 )
-from pydantic import HttpUrl
 from fastapi import Path as FastPath
 
 from app.api.deps import AuthContextDep, SessionDep
 from app.api.permissions import Permission, require_permission
-from app.core.cloud import get_cloud_storage
 from app.crud import CollectionCrud, DocumentCrud
 from app.crud.rag import OpenAIAssistantCrud, OpenAIVectorStoreCrud
 from app.models import (
@@ -28,6 +26,7 @@
     TransformationJobInfo,
     DocTransformationJobPublic,
 )
+from app.core.cloud import get_cloud_storage
 from app.services.collections.helpers import pick_service_for_documennt
 from app.services.documents.helpers import (
     schedule_transformation,
@@ -261,4 +260,5 @@ def doc_info(
         include_url=include_url,
         storage=storage,
     )
+
     return APIResponse.success_response(doc_schema)
diff --git a/backend/app/tests/api/routes/collections/test_collection_info.py b/backend/app/tests/api/routes/collections/test_collection_info.py
@@ -185,3 +185,42 @@ def test_collection_info_not_found_returns_404(
     )
 
     assert response.status_code == 404
+
+
+def test_collection_info_include_docs_and_url(
+    client: TestClient,
+    db: Session,
+    user_api_key_header,
+) -> None:
+    """
+    Test that when include_docs=true and include_url=true,
+    the endpoint returns documents with their URLs.
+    """
+    project = get_project(db, "Dalgo")
+    collection = get_collection(db, project)
+
+    document = link_document_to_collection(db, collection)
+
+    response = client.get(
+        f"{settings.API_V1_STR}/collections/{collection.id}",
+        headers=user_api_key_header,
+        params={"include_docs": "true", "include_url": "true"},
+    )
+
+    assert response.status_code == 200
+
+    data = response.json()
+    payload = data["data"]
+
+    assert payload["id"] == str(collection.id)
+
+    docs = payload.get("documents", [])
+    assert isinstance(docs, list)
+    assert len(docs) >= 1
+
+    doc_ids = {d["id"] for d in docs}
+    assert str(document.id) in doc_ids
+
+    doc = next(d for d in docs if d["id"] == str(document.id))
+    assert "signed_url" in doc
+    assert doc["signed_url"].startswith("https://")