From 811e4db2551b1bb27a140dbebff165648af3028a Mon Sep 17 00:00:00 2001 From: aliamerj Date: Sun, 25 May 2025 20:58:11 +0300 Subject: [PATCH] fix bugs for exist files and fix private files Signed-off-by: aliamerj --- DataSource/Aws/setAwsConnection.ts | 1 - .../UpdateConfigDirect/UpdateConfigDirect.tsx | 2 +- .../DirectUpload/setDirectUploadConnection.ts | 20 +- DataSource/Dropbox/setDropboxConnection.ts | 1 - .../GoogleDrive/setGoogleDriveConnection.ts | 1 - actions/connctions/delete/index.ts | 16 +- actions/connctions/sync/index.ts | 1 - app/api/chunks/[...ids]/route.ts | 4 +- app/api/connections/[id]/files/route.ts | 21 +- app/api/connections/[id]/route.ts | 22 +- app/api/connections/[id]/status/route.ts | 4 +- app/api/connections/[id]/sync/route.ts | 6 +- app/api/retrievals/route.ts | 15 +- app/api/upload/route.ts | 1 - .../ConfigConnection/ConfigConnection.tsx | 8 +- components/Connections/Connections.tsx | 4 +- .../DeleteConnection/DeleteConnection.tsx | 2 +- components/PipelineFlow/PipelineFlow.tsx | 2 +- components/StopConnection/StopConnection.tsx | 2 +- components/SyncConnection/SyncConnection.tsx | 2 +- components/UploadFileForm/UploadFileForm.tsx | 14 +- cypress.config.ts | 35 +- cypress/e2e/directUpload.cy.ts | 470 +++++++++++++++--- cypress/e2e/dropbox.cy.ts | 91 ++-- db/schemas/connections.ts | 1 - drizzle/meta/_journal.json | 21 + fileProcessors/index.ts | 111 +++-- qdrant/index.ts | 6 +- 28 files changed, 639 insertions(+), 245 deletions(-) diff --git a/DataSource/Aws/setAwsConnection.ts b/DataSource/Aws/setAwsConnection.ts index 5d25f0a..1f1a292 100644 --- a/DataSource/Aws/setAwsConnection.ts +++ b/DataSource/Aws/setAwsConnection.ts @@ -33,7 +33,6 @@ export const setAWSConnection = async (formData: FormData) => { } : undefined, metadata: config.data.metadata, isConfigSet: true, - isSyncing: true, limitPages:config.data.pageLimit, limitFiles: config.data.fileLimit, }).where(eq(connections.id, config.data.connectionId)) diff --git a/DataSource/DirectUpload/UpdateConfigDirect/UpdateConfigDirect.tsx b/DataSource/DirectUpload/UpdateConfigDirect/UpdateConfigDirect.tsx index de3aa7c..0c26291 100644 --- a/DataSource/DirectUpload/UpdateConfigDirect/UpdateConfigDirect.tsx +++ b/DataSource/DirectUpload/UpdateConfigDirect/UpdateConfigDirect.tsx @@ -19,7 +19,7 @@ export const UpdateConfigDirect = ({ connection, status }: { connection: Connect return ( - diff --git a/DataSource/DirectUpload/setDirectUploadConnection.ts b/DataSource/DirectUpload/setDirectUploadConnection.ts index 65ac264..4fd6936 100644 --- a/DataSource/DirectUpload/setDirectUploadConnection.ts +++ b/DataSource/DirectUpload/setDirectUploadConnection.ts @@ -1,6 +1,7 @@ import { databaseDrizzle } from "@/db"; import { connections, processedFiles } from "@/db/schemas/connections"; -import { qdrant_collection_name, qdrantCLient } from "@/qdrant"; +import { tryAndCatch } from "@/lib/try-catch"; +import { qdrant_collection_name, qdrantClient } from "@/qdrant"; import { and, eq } from "drizzle-orm"; import { z } from "zod"; @@ -52,6 +53,7 @@ const directUploadConfig = z.object({ }) const updateDirectUploadConfig = z.object({ + userId: z.string().min(5), connectionId: z.string().min(5), pageLimit: z.string().nullable().transform((str, ctx): number | null => { try { @@ -91,6 +93,7 @@ const updateDirectUploadConfig = z.object({ export const updateDirectUploadConnection = async (formData: FormData) => { const config = updateDirectUploadConfig.safeParse({ + userId: formData.get("userId"), connectionId: formData.get("connectionId"), identifier: formData.get("identifier"), metadata: formData.get("metadata") || "{}", @@ -110,7 +113,7 @@ export const updateDirectUploadConnection = async (formData: FormData) => { throw new Error(`Validation errors - ${errors}`) } - const connectionChunksIds: { chunksIds: string[] }[] = []; + const connectionChunksIds: { chunksIds: string[], name: string }[] = []; await databaseDrizzle .update(connections) @@ -125,14 +128,18 @@ export const updateDirectUploadConnection = async (formData: FormData) => { .where(and( eq(processedFiles.connectionId, config.data.connectionId), eq(processedFiles.name, fileName) - )).returning({ chunksIds: processedFiles.chunksIds }) + )).returning({ chunksIds: processedFiles.chunksIds, name: processedFiles.name }) connectionChunksIds.push(...files) } - for (const { chunksIds } of connectionChunksIds) { - await qdrantCLient.delete(qdrant_collection_name, { + for (const { chunksIds, name } of connectionChunksIds) { + await tryAndCatch(qdrantClient.delete(qdrant_collection_name, { points: chunksIds, - }) + filter: { + must: [{ "key": "_document_id", "match": { value: name } }, + { key: "_userId", match: { value: config.data.userId } }] + } + })) } const files = config.data.files.map(async (file) => ({ @@ -188,7 +195,6 @@ export const setDirectUploadConnection = async (formData: FormData) => { service: 'DIRECT_UPLOAD', metadata: metadata, isConfigSet: true, - isSyncing: true, limitPages: pageLimit, limitFiles: fileLimit, }).returning({ id: connections.id }) diff --git a/DataSource/Dropbox/setDropboxConnection.ts b/DataSource/Dropbox/setDropboxConnection.ts index fa72429..8810fc9 100644 --- a/DataSource/Dropbox/setDropboxConnection.ts +++ b/DataSource/Dropbox/setDropboxConnection.ts @@ -32,7 +32,6 @@ export const setDropboxConnection = async (formData: FormData) => { } : undefined, metadata: config.data.metadata, isConfigSet: true, - isSyncing: true, limitFiles: config.data.fileLimit, limitPages: config.data.pageLimit, }).where(eq(connections.id, config.data.connectionId)) diff --git a/DataSource/GoogleDrive/setGoogleDriveConnection.ts b/DataSource/GoogleDrive/setGoogleDriveConnection.ts index eb4df16..5f06387 100644 --- a/DataSource/GoogleDrive/setGoogleDriveConnection.ts +++ b/DataSource/GoogleDrive/setGoogleDriveConnection.ts @@ -34,7 +34,6 @@ export const setGoogleDriveConnection = async (formData: FormData) => { limitPages: config.data.pageLimit, limitFiles: config.data.fileLimit, isConfigSet: true, - isSyncing: true, }).where(eq(connections.id, config.data.connectionId)) return { diff --git a/actions/connctions/delete/index.ts b/actions/connctions/delete/index.ts index 39001d5..8d797ef 100644 --- a/actions/connctions/delete/index.ts +++ b/actions/connctions/delete/index.ts @@ -2,8 +2,9 @@ import { authOptions } from "@/auth"; import { databaseDrizzle } from "@/db"; import { connections, processedFiles } from "@/db/schemas/connections"; +import { tryAndCatch } from "@/lib/try-catch"; import { fromErrorToFormState, toFormState } from "@/lib/zodErrorHandle"; -import { qdrant_collection_name, qdrantCLient } from "@/qdrant"; +import { qdrant_collection_name, qdrantClient } from "@/qdrant"; import { eq } from "drizzle-orm"; import { getServerSession } from "next-auth"; import { revalidatePath } from "next/cache"; @@ -26,14 +27,19 @@ export async function deleteConnectionConfig(_: FormState, formData: FormData) { }) const connectionChunksIds = await databaseDrizzle - .select({ chunksIds: processedFiles.chunksIds }) + .select({ chunksIds: processedFiles.chunksIds, name: processedFiles.name }) .from(processedFiles) .where(eq(processedFiles.connectionId, id)) - for (const { chunksIds } of connectionChunksIds) { - await qdrantCLient.delete(qdrant_collection_name, { + for (const { chunksIds, name } of connectionChunksIds) { + await tryAndCatch(qdrantClient.delete(qdrant_collection_name, { points: chunksIds, - }) + filter: { + must: [ + { key: "_document_id", "match": { value: name } }, + { key: "_userId", match: { value: session.user.id } }] + } + })) } await databaseDrizzle diff --git a/actions/connctions/sync/index.ts b/actions/connctions/sync/index.ts index f59f397..53a09e6 100644 --- a/actions/connctions/sync/index.ts +++ b/actions/connctions/sync/index.ts @@ -77,7 +77,6 @@ export const syncConnectionConfig = async (_: FormState, formData: FormData) => links: [] }) await databaseDrizzle.update(connections).set({ - isSyncing: true, jobId: jobId }).where(eq(connections.id, connectionId)); diff --git a/app/api/chunks/[...ids]/route.ts b/app/api/chunks/[...ids]/route.ts index 389f803..a098022 100644 --- a/app/api/chunks/[...ids]/route.ts +++ b/app/api/chunks/[...ids]/route.ts @@ -4,7 +4,7 @@ import { NextRequest, NextResponse } from "next/server"; import { APIError } from "@/lib/APIError"; import { databaseDrizzle } from "@/db"; import { arrayContains } from "drizzle-orm"; -import { qdrant_collection_name, qdrantCLient } from "@/qdrant"; +import { qdrant_collection_name, qdrantClient } from "@/qdrant"; type Params = { params: Promise<{ @@ -62,7 +62,7 @@ export async function GET(request: NextRequest, { params }: Params) { ) } - const { data: chunks, error: retrieveError } = await tryAndCatch(qdrantCLient.retrieve(qdrant_collection_name, { + const { data: chunks, error: retrieveError } = await tryAndCatch(qdrantClient.retrieve(qdrant_collection_name, { ids: chunksIds, with_payload: true, })) diff --git a/app/api/connections/[id]/files/route.ts b/app/api/connections/[id]/files/route.ts index 0654e33..a47e3bb 100644 --- a/app/api/connections/[id]/files/route.ts +++ b/app/api/connections/[id]/files/route.ts @@ -2,7 +2,7 @@ import { databaseDrizzle } from "@/db" import { processedFiles } from "@/db/schemas/connections" import { checkAuth } from "@/lib/api_key" import { tryAndCatch } from "@/lib/try-catch" -import { qdrant_collection_name, qdrantCLient } from "@/qdrant" +import { qdrant_collection_name, qdrantClient } from "@/qdrant" import { and, eq } from "drizzle-orm" import { NextRequest, NextResponse } from "next/server" import { APIError } from "openai" @@ -80,12 +80,11 @@ export async function DELETE(request: NextRequest, { params }: Params) { } const { id } = await params - const connectionChunksIds: { chunksIds: string[] }[] = []; + const connectionChunksIds: { chunksIds: string[], name: string }[] = []; const connection = await databaseDrizzle.query.connections.findFirst({ where: (c, ops) => ops.and(ops.eq(c.userId, userId), ops.eq(c.id, id)), columns: { id: true, - isSyncing: true, jobId: true, }, }) @@ -97,7 +96,7 @@ export async function DELETE(request: NextRequest, { params }: Params) { }, { status: 403 }) } - if (connection.isSyncing || connection.jobId) { + if (connection.jobId) { return NextResponse.json( { code: 'processing_in_progress', @@ -117,7 +116,7 @@ export async function DELETE(request: NextRequest, { params }: Params) { .where(and( eq(processedFiles.connectionId, id), eq(processedFiles.name, fileName) - )).returning({ chunksIds: processedFiles.chunksIds })) + )).returning({ chunksIds: processedFiles.chunksIds, name: processedFiles.name })) if (error) { return NextResponse.json({ @@ -128,10 +127,16 @@ export async function DELETE(request: NextRequest, { params }: Params) { connectionChunksIds.push(...data) } - for (const { chunksIds } of connectionChunksIds) { - const { error } = await tryAndCatch(qdrantCLient.delete(qdrant_collection_name, { + for (const { chunksIds, name } of connectionChunksIds) { + const { error } = await tryAndCatch(qdrantClient.delete(qdrant_collection_name, { points: chunksIds, - wait: isWait + wait: isWait, + filter: { + must: [ + { key: "_document_id", match: { value: name } }, + { key: "_userId", match: { value: userId } } + ] + } })) if (error) { return NextResponse.json({ diff --git a/app/api/connections/[id]/route.ts b/app/api/connections/[id]/route.ts index ac1954e..a3b5ad3 100644 --- a/app/api/connections/[id]/route.ts +++ b/app/api/connections/[id]/route.ts @@ -5,7 +5,7 @@ import { APIError } from "@/lib/APIError"; import { databaseDrizzle } from "@/db"; import { eq } from "drizzle-orm"; import { connections } from "@/db/schemas/connections"; -import { qdrant_collection_name, qdrantCLient } from "@/qdrant"; +import { qdrant_collection_name, qdrantClient } from "@/qdrant"; import { setConnectionToProcess } from "@/fileProcessors/connectors"; import { connectionProcessFiles, directProcessFiles } from "@/fileProcessors"; import { addToProcessFilesQueue } from "@/workers/queues/jobs/processFiles.job"; @@ -109,12 +109,12 @@ export async function DELETE(request: NextRequest, { params }: Params) { where: (conn, ops) => ops.and(ops.eq(conn.id, id), ops.eq(conn.userId, userId)), columns: { jobId: true, - isSyncing: true, }, with: { files: { columns: { - chunksIds: true + chunksIds: true, + name: true, } } } @@ -139,7 +139,7 @@ export async function DELETE(request: NextRequest, { params }: Params) { ) } - if (conn.isSyncing || conn.jobId) { + if (conn.jobId) { return NextResponse.json( { code: 'processing_in_progress', @@ -148,10 +148,16 @@ export async function DELETE(request: NextRequest, { params }: Params) { { status: 409 } ) } - for (const { chunksIds } of conn.files) { - await tryAndCatch(qdrantCLient.delete(qdrant_collection_name, { + for (const { chunksIds, name } of conn.files) { + await tryAndCatch(qdrantClient.delete(qdrant_collection_name, { points: chunksIds, wait: wait === "true", + filter: { + must: [ + { key: "_document_id", match: { value: name } }, + { key: "_userId", match: { value: userId } }, + ] + } })) } @@ -190,9 +196,9 @@ export async function PUT(request: NextRequest, { params }: Params) { const { data: conn, error: queryError } = await tryAndCatch(databaseDrizzle.query.connections.findFirst({ where: (conn, ops) => ops.and(ops.eq(conn.id, id), ops.eq(conn.userId, userId)), columns: { - isSyncing: true, service: true, folderName: true, + jobId: true, } })) if (queryError) { @@ -215,7 +221,7 @@ export async function PUT(request: NextRequest, { params }: Params) { ) } - if (conn.isSyncing) { + if (conn.jobId) { return NextResponse.json( { code: 'already_syncing', diff --git a/app/api/connections/[id]/status/route.ts b/app/api/connections/[id]/status/route.ts index 8da1b19..5693e85 100644 --- a/app/api/connections/[id]/status/route.ts +++ b/app/api/connections/[id]/status/route.ts @@ -29,7 +29,7 @@ export async function GET(request: NextRequest, { params }: Params) { where: (conn, ops) => ops.and(ops.eq(conn.userId, userId!), ops.eq(conn.id, id)), columns: { isConfigSet: true, - isSyncing: true, + jobId:true, }, })) if (queryError) { @@ -53,7 +53,7 @@ export async function GET(request: NextRequest, { params }: Params) { const response = { status: conn.isConfigSet ? 'ready' : 'config_missing', - isSyncing: conn.isSyncing + isSyncing: !!conn.jobId } return NextResponse.json(response, { status: 200 }); diff --git a/app/api/connections/[id]/sync/route.ts b/app/api/connections/[id]/sync/route.ts index a4e4912..7f47962 100644 --- a/app/api/connections/[id]/sync/route.ts +++ b/app/api/connections/[id]/sync/route.ts @@ -35,8 +35,8 @@ export async function POST(request: NextRequest, { params }: Params) { id: true, service: true, isConfigSet: true, - isSyncing: true, - metadata: true + metadata: true, + jobId:true, }, with: { files: { @@ -75,7 +75,7 @@ export async function POST(request: NextRequest, { params }: Params) { { status: 400 }, ) } - if (conn.isSyncing) { + if (conn.jobId) { return NextResponse.json( { code: 'already_syncing', diff --git a/app/api/retrievals/route.ts b/app/api/retrievals/route.ts index 1097f99..2bdca6e 100644 --- a/app/api/retrievals/route.ts +++ b/app/api/retrievals/route.ts @@ -4,7 +4,7 @@ import { apiKeys, users } from '@/db/schemas/users'; import { hashApiKey } from '@/lib/api_key'; import { Plans } from '@/lib/Plans'; import { expandQuery, generateHypotheticalAnswer, vectorizeText } from '@/openAi'; -import { qdrant_collection_name, qdrantCLient } from '@/qdrant'; +import { qdrant_collection_name, qdrantClient } from '@/qdrant'; import { RetrievalFilter } from '@/validations/retrievalsFilteringSchema' import { and, eq, sql } from 'drizzle-orm'; import { getServerSession } from 'next-auth'; @@ -105,17 +105,22 @@ export async function POST(request: NextRequest) { `Please upgrade to increase your limit.` }, { status: 429, - headers: { "Retry-After": "3600" } + headers: { "Retry-After": "3600" } }); } const { query, filter, top_chunk, rerank, min_score_threshold } = validation.data const queries = await expandQuery(query) const vectors = await vectorizeText(queries) - - const queryPoints = await qdrantCLient.search(qdrant_collection_name, { + const queryPoints = await qdrantClient.search(qdrant_collection_name, { vector: vectors, - filter: filter ? { must: [{ nested: { key: "_metadata", filter: filter } }] } : undefined, + filter: filter ? { + must: [ + { nested: { key: "_metadata", filter: filter } }, + { key: "_userId", match: { value: userId } } + ] + } : + { must: [{ key: "_userId", match: { value: userId } }] }, limit: rerank ? top_chunk * 2 : top_chunk, with_payload: true, with_vector: true diff --git a/app/api/upload/route.ts b/app/api/upload/route.ts index d8adeee..a903b16 100644 --- a/app/api/upload/route.ts +++ b/app/api/upload/route.ts @@ -25,7 +25,6 @@ export async function POST(request: NextRequest) { code: "bad_request", message: errorForm.message, }, { status: 400 }) - formData.set("userId", userId) formData.set("service", "DIRECT_UPLOAD") const { diff --git a/components/ConfigConnection/ConfigConnection.tsx b/components/ConfigConnection/ConfigConnection.tsx index f49ee14..81d3823 100644 --- a/components/ConfigConnection/ConfigConnection.tsx +++ b/components/ConfigConnection/ConfigConnection.tsx @@ -97,7 +97,7 @@ export const ConfigConnection = ({ connection, directory, status, open, setOpen, return ( setOpen(o)} > - @@ -135,7 +135,7 @@ export const ConfigConnection = ({ connection, directory, status, open, setOpen, type={isTest ? 'text' : 'button'} readOnly={!isTest} /> - @@ -161,7 +161,7 @@ export const ConfigConnection = ({ connection, directory, status, open, setOpen, s + f.totalPages, 0) !== 0 ? connection.files.reduce((s, f) => s + f.totalPages, 0) : undefined} placeholder="Enter page limit" @@ -176,7 +176,7 @@ export const ConfigConnection = ({ connection, directory, status, open, setOpen, - {status === 'PROCESSING' || (!status && connection.isSyncing) ? ( + {status === 'PROCESSING' || (!status && connection.jobId) ? ( ) : errorMessage ? () : ()} - {status === 'FINISHED' || (!status && !connection.isSyncing) ? errorMessage ? "Sync Failed" : "Sync completed" : "Currently syncing"} + {status === 'FINISHED' || (!status && !connection.jobId) ? errorMessage ? "Sync Failed" : "Sync completed" : "Currently syncing"} ) diff --git a/components/DeleteConnection/DeleteConnection.tsx b/components/DeleteConnection/DeleteConnection.tsx index 343e486..1abf1a9 100644 --- a/components/DeleteConnection/DeleteConnection.tsx +++ b/components/DeleteConnection/DeleteConnection.tsx @@ -50,7 +50,7 @@ export const DeleteConnection = ({ connection, status }: { connection: Connectio setOpen(e)} > - diff --git a/components/PipelineFlow/PipelineFlow.tsx b/components/PipelineFlow/PipelineFlow.tsx index f6cec62..3c3d1bc 100644 --- a/components/PipelineFlow/PipelineFlow.tsx +++ b/components/PipelineFlow/PipelineFlow.tsx @@ -173,7 +173,7 @@ export default function PipelineFlow({ connections }: { connections: ConnectionQ id: `${conn.id}-to-app`, source: conn.isConfigSet ? conn.id : "1", target: 'app-logo', - type: conn.isSyncing ? "animatedSvgEdge" : undefined, + type: conn.jobId ? "animatedSvgEdge" : undefined, data: { duration: 2, shape: "package", diff --git a/components/StopConnection/StopConnection.tsx b/components/StopConnection/StopConnection.tsx index e54f8ab..d1a6f10 100644 --- a/components/StopConnection/StopConnection.tsx +++ b/components/StopConnection/StopConnection.tsx @@ -38,7 +38,7 @@ export const StopConnection = ({ connection, status }: { } return ( - diff --git a/components/SyncConnection/SyncConnection.tsx b/components/SyncConnection/SyncConnection.tsx index caf88a3..a9e2c59 100644 --- a/components/SyncConnection/SyncConnection.tsx +++ b/components/SyncConnection/SyncConnection.tsx @@ -50,7 +50,7 @@ export const SyncConnection = ({ connection, status }: { return ( setOpen(e)} > - diff --git a/components/UploadFileForm/UploadFileForm.tsx b/components/UploadFileForm/UploadFileForm.tsx index 3e7510e..73c0254 100644 --- a/components/UploadFileForm/UploadFileForm.tsx +++ b/components/UploadFileForm/UploadFileForm.tsx @@ -161,7 +161,11 @@ export const DataInput = ({ files, setFiles, links, setLinks, currentFiles, remo const droppedFiles = Array.from(e.dataTransfer.files); const validFiles = droppedFiles.filter((f) => { if (f.type !== "application/pdf") { - setInvalidFile(f.name); + setInvalidFile(`${f.name} is not supported. Please upload PDFs only.`); + return false; + } + if (currentFiles.includes(f.name)) { + setInvalidFile(`${f.name} is already added`); return false; } return true; @@ -174,7 +178,11 @@ export const DataInput = ({ files, setFiles, links, setLinks, currentFiles, remo const selectedFiles = Array.from(e.target.files || []); const validFiles = selectedFiles.filter((f) => { if (f.type !== "application/pdf") { - setInvalidFile(f.name); + setInvalidFile(`${f.name} is not supported. Please upload PDFs only.`); + return false; + } + if (currentFiles.includes(f.name)) { + setInvalidFile(`${f.name} is already added`); return false; } return true; @@ -235,7 +243,7 @@ export const DataInput = ({ files, setFiles, links, setLinks, currentFiles, remo {invalidFile && (
- {`${invalidFile} is not supported. Please upload PDFs only.`} + {invalidFile}
)} diff --git a/cypress.config.ts b/cypress.config.ts index fbbe974..612dbf4 100644 --- a/cypress.config.ts +++ b/cypress.config.ts @@ -8,7 +8,7 @@ import { QdrantClient } from "@qdrant/js-client-rest"; dotenv.config({ path: ".env" }); const qdrant_collection_name = "documents" -const qdrantCLient = new QdrantClient({ url: process.env.QDRANT_DB_URL!, apiKey: process.env.QDRANT_DB_KEY }); +const qdrantClient = new QdrantClient({ url: process.env.QDRANT_DB_URL!, apiKey: process.env.QDRANT_DB_KEY }); export default defineConfig({ retries: { @@ -18,13 +18,23 @@ export default defineConfig({ chromeWebSecurity: false, watchForFileChanges: false, e2e: { + defaultCommandTimeout: 10_000, setupNodeEvents(on) { on("task", { + async getUserId({ email }: { email: string }) { + const user = await databaseDrizzle.query.users.findFirst({ + where: (u, ops) => ops.eq(u.email, email), + columns: { + id: true, + } + }) + return user?.id || null + }, async createCollection() { try { - const { collections } = await qdrantCLient.getCollections(); + const { collections } = await qdrantClient.getCollections(); if (!collections.find(col => col.name === qdrant_collection_name)) { - await qdrantCLient.createCollection(qdrant_collection_name, { + await qdrantClient.createCollection(qdrant_collection_name, { vectors: { size: 1536, distance: 'Cosine' }, }); } @@ -32,18 +42,21 @@ export default defineConfig({ return null }, async deleteCollection() { - return await qdrantCLient.deleteCollection(qdrant_collection_name) + return await qdrantClient.deleteCollection(qdrant_collection_name) }, async getPointsById({ chunkIds }: { chunkIds: string[] }) { - return await qdrantCLient.retrieve(qdrant_collection_name, { + return await qdrantClient.retrieve(qdrant_collection_name, { ids: chunkIds, with_payload: true, }) }, - async getPointsNumberByFileName({ fileName }: { fileName: string }) { - const existingPoints = await qdrantCLient.scroll(qdrant_collection_name, { + async getPointsNumberByFileName({ fileName, userId }: { fileName: string, userId: string }) { + const existingPoints = await qdrantClient.scroll(qdrant_collection_name, { filter: { - must: [{ key: "_document_id", match: { value: fileName } }] + must: [ + { key: "_document_id", match: { value: fileName } }, + { key: "_userId", match: { value: userId } } + ] }, }); return existingPoints.points.length @@ -54,7 +67,7 @@ export default defineConfig({ await databaseDrizzle.insert(apiKeys).values({ userId: id, - name: "test_API", + name: id.slice(0, 5), generatedTime: new Date(), apiKey: hashedKey, }); @@ -93,7 +106,7 @@ export default defineConfig({ where: (u, ops) => ops.eq(u.email, email), with: { connections: { - where: (c, ops) => ops.eq(c.isSyncing, false), + where: (c, ops) => ops.isNull(c.jobId), with: { files: true, } @@ -109,7 +122,7 @@ export default defineConfig({ where: (u, ops) => ops.eq(u.email, email), with: { connections: { - where: (c, ops) => ops.eq(c.isSyncing, false), + where: (c, ops) => ops.isNull(c.jobId), with: { files: true, } diff --git a/cypress/e2e/directUpload.cy.ts b/cypress/e2e/directUpload.cy.ts index 1de2ecb..c0d85dd 100644 --- a/cypress/e2e/directUpload.cy.ts +++ b/cypress/e2e/directUpload.cy.ts @@ -206,11 +206,13 @@ describe("Direct Upload UI", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 2 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) - }) + cy.wait(1000) // check the UI @@ -235,14 +237,14 @@ describe("Direct Upload UI", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 2 }] }) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).eq(0) + }) }) cy.wait(1000) @@ -271,14 +273,15 @@ describe("Direct Upload UI", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null expect(conn.files.length).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).eq(0) + }) }) cy.wait(1000) @@ -305,10 +308,10 @@ describe("Direct Upload UI", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "sample.pdf", totalPages: 1 }] }) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) }) cy.wait(1000) @@ -334,14 +337,14 @@ describe("Direct Upload UI", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 1 }, { name: "sample.pdf", totalPages: 1 }] }) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) }) cy.wait(1000) @@ -372,13 +375,16 @@ describe("Direct Upload UI", () => { const { conns } = res as { conns: ConnectionTable[] } expect(conns).to.have.length(0); }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) }) }) @@ -403,16 +409,18 @@ describe("Direct Upload UI", () => { .then(({ conns }: any) => { const conn = (conns as FileConnectionQuery[])[0] expect(conn.files.length).eq(2) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).greaterThan(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).greaterThan(0) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).greaterThan(0) + }) }) + // Delete connection cy.task('getConnections', { email: fakeUser.email }) .then(res => { @@ -435,13 +443,16 @@ describe("Direct Upload UI", () => { const { conns } = res as { conns: ConnectionTable[] } expect(conns).to.have.length(0); }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) }) }) @@ -491,16 +502,319 @@ describe("Direct Upload UI", () => { }) describe("Direct Upload API", () => { + const otherUser = { + name: "test man2", + email: "tester2x@dcup.dev", + provider: "google", + image: `https://via.placeholder.com/200/7732bb/c0392b.png?text=tester`, + plan: "OS" + } beforeEach(() => { cy.visit('/') - cy.wait(1000) cy.task("createCollection") + cy.wait(1000) }) afterEach(() => { cy.task('deleteUser', { email: fakeUser.email }) + cy.task('deleteUser', { email: otherUser.email }) cy.task("deleteCollection") }) + it("should not remove the same file from user, when Deleting a file from other user", () => { + cy.task('addNewUser', fakeUser).then(user => { + const u = user as typeof users.$inferSelect + cy.task('createApiKey', { id: u.id }).then(key => { + // upload pdf + cy.uploadFileWithApi({ + apiKey: key as string, + fileName: "invo.pdf", + response: { + code: "ok", + message: 'Your file was successfully uploaded and processed.' + } + }) + cy.wait(5000) + // // Check + cy.task("getConnection", { email: fakeUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + }) + }) + }) + cy.task('addNewUser', otherUser).then(user => { + const u = user as typeof users.$inferSelect + cy.task('createApiKey', { id: u.id }).then(key => { + // upload pdf + cy.uploadFileWithApi({ + apiKey: key as string, + fileName: "invo.pdf", + response: { + code: "ok", + message: 'Your file was successfully uploaded and processed.' + } + }) + cy.wait(5000) + // // Check + cy.task("getConnection", { email: otherUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + + cy.request({ + method: "DELETE", + url: `/api/connections/${conn.id}/files`, + headers: { + "Authorization": `Bearer ${key}` + }, + body: { + file: "invo.pdf" + } + }).then(res => { + expect(res.status).eq(200) + expect(res.body.code).eq("ok") + expect(res.body.message).eq("Deleted 'invo.pdf' successfully") + }) + }) + }) + }) + cy.wait(1000) + cy.task("getConnection", { email: fakeUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + }) + + cy.task("getConnection", { email: otherUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).eq(0) + }) + }) + }) + + it("should not affect another user's connection, when Deleting a connection from one user", () => { + cy.task('addNewUser', fakeUser).then(user => { + const u = user as typeof users.$inferSelect + cy.task('createApiKey', { id: u.id }).then(key => { + // upload pdf + cy.uploadFileWithApi({ + apiKey: key as string, + fileName: "invo.pdf", + response: { + code: "ok", + message: 'Your file was successfully uploaded and processed.' + } + }) + cy.wait(1000) + // // Check + cy.task("getConnection", { email: fakeUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + }) + }) + }) + + cy.task('addNewUser', otherUser).then(user => { + const u = user as typeof users.$inferSelect + cy.task('createApiKey', { id: u.id }).then(key => { + // upload pdf + cy.uploadFileWithApi({ + apiKey: key as string, + fileName: "invo.pdf", + response: { + code: "ok", + message: 'Your file was successfully uploaded and processed.' + } + }) + cy.wait(1000) + // // Check + cy.task("getConnection", { email: otherUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + + cy.request({ + method: "DELETE", + url: `/api/connections/${conn.id}`, + headers: { + "Authorization": `Bearer ${key}` + }, + }).then(res => { + expect(res.status).eq(200) + expect(res.body.code).eq("ok") + expect(res.body.message).eq("Connection has been successfully deleted") + }) + }) + }) + }) + cy.wait(1000) + cy.task("getConnection", { email: fakeUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + }) + + cy.task("getUserId", { email: otherUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) + }) + }) + + it("should not share files between users", () => { + cy.task('addNewUser', fakeUser).then(user => { + const u = user as typeof users.$inferSelect + cy.task('createApiKey', { id: u.id }).then(key => { + // upload pdf + cy.uploadFileWithApi({ + apiKey: key as string, + fileName: "invo.pdf", + response: { + code: "ok", + message: 'Your file was successfully uploaded and processed.' + } + }) + cy.wait(1000) + // // Check + cy.task("getConnection", { email: fakeUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 3 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + }) + }) + }) + + cy.task('addNewUser', otherUser).then(user => { + const u = user as typeof users.$inferSelect + cy.task('createApiKey', { id: u.id }).then(key => { + // upload pdf + cy.uploadFileWithApi({ + apiKey: key as string, + fileName: "sample.pdf", + response: { + code: "ok", + message: 'Your file was successfully uploaded and processed.' + } + }) + cy.wait(1000) + // // Check + cy.task("getConnection", { email: otherUser.email }) + .then(({ conns }: any) => { + const conn = (conns as FileConnectionQuery[])[0] + expect(conn.service).eq("DIRECT_UPLOAD") + expect(conn.metadata).eq("{}") + expect(conn.limitPages).to.be.null + expect(conn.limitFiles).to.be.null + cy.checkIndexedFiles({ conn, files: [{ name: "sample.pdf", totalPages: 1 }] }) + + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + }) + }) + }) + cy.wait(1000) + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).to.eq(0) + }) + }) + + cy.task("getUserId", { email: otherUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).to.eq(0) + }) + }) + }) + it('should handle file upload, addition, and removal with database synchronization', () => { cy.task('addNewUser', fakeUser).then(user => { const u = user as typeof users.$inferSelect @@ -678,7 +992,7 @@ describe("Direct Upload API", () => { expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 2 }] }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) @@ -704,11 +1018,11 @@ describe("Direct Upload API", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 2 }] }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) .then(points => { expect(points).eq(0) }) @@ -741,14 +1055,14 @@ describe("Direct Upload API", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null expect(conn.files.length).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).eq(0) + }) }) }) @@ -777,7 +1091,7 @@ describe("Direct Upload API", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "sample.pdf", totalPages: 1 }] }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) @@ -804,11 +1118,11 @@ describe("Direct Upload API", () => { expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, files: [{ name: "invo.pdf", totalPages: 1 }, { name: "sample.pdf", totalPages: 1 }] }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) @@ -834,13 +1148,17 @@ describe("Direct Upload API", () => { const { conns } = res as { conns: ConnectionTable[] } expect(conns).to.have.length(0); }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) }) }) }) diff --git a/cypress/e2e/dropbox.cy.ts b/cypress/e2e/dropbox.cy.ts index dc4db06..9ec5548 100644 --- a/cypress/e2e/dropbox.cy.ts +++ b/cypress/e2e/dropbox.cy.ts @@ -249,7 +249,7 @@ describe("Dropbox connection UI Testing", () => { expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, source: "DROPBOX", files: [{ name: "invo.pdf", totalPages: 2 }] }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) @@ -280,11 +280,11 @@ describe("Dropbox connection UI Testing", () => { expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, source: "DROPBOX", files: [{ name: "invo.pdf", totalPages: 2 }] }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) .then(points => { expect(points).eq(0) }) @@ -315,11 +315,11 @@ describe("Dropbox connection UI Testing", () => { expect(conn.limitFiles).to.be.null expect(conn.files.length).eq(1) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) .then(points => { expect(points).eq(0) }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) .then(points => { expect(points).greaterThan(0) }) @@ -357,7 +357,7 @@ describe("Dropbox connection UI Testing", () => { expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, source: "DROPBOX", files: [{ name: "sample.pdf", totalPages: 1 }] }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) .then(points => { expect(points).to.be.greaterThan(0) }) @@ -388,19 +388,17 @@ describe("Dropbox connection UI Testing", () => { expect(conn.limitPages).eq(2) expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, source: "DROPBOX", files: [{ name: "invo.pdf", totalPages: 1 }, { name: "sample.pdf", totalPages: 1 }] }) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).to.be.greaterThan(0) + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId: conn.userId }) + .then(points => { + expect(points).to.be.greaterThan(0) + }) }) cy.wait(1000) - // check the UI - cy.get('[data-test="folderName"]').should('contain.text', "_TEST_/sample.pdf") cy.get('[data-test="processedFile"]').should('contain.text', 2) cy.get('[data-test="processedPage"]').should('contain.text', 2) @@ -427,13 +425,16 @@ describe("Dropbox connection UI Testing", () => { const { conns } = res as { conns: ConnectionTable[] } expect(conns).to.have.length(0); }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) }) }) @@ -455,27 +456,24 @@ describe("Dropbox connection UI Testing", () => { cy.task("getConnection", { email: fakeUser.email }) .then(({ conns }: any) => { const conn = (conns as FileConnectionQuery[])[0] + expect(conn.files.length).eq(2) expect(conn.service).eq("DROPBOX") expect(conn.metadata).eq("{}") expect(conn.limitPages).to.be.null expect(conn.limitFiles).to.be.null cy.checkIndexedFiles({ conn, source: "DROPBOX", files: [{ name: "invo.pdf", totalPages: 3 }, { name: "sample.pdf", totalPages: 1 }] }) }) - cy.wait(1000) - // check - cy.task("getConnection", { email: fakeUser.email }) - .then(({ conns }: any) => { - const conn = (conns as FileConnectionQuery[])[0] - expect(conn.files.length).eq(2) - }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).greaterThan(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).greaterThan(0) + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).greaterThan(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).greaterThan(0) + }) }) // Delete connection @@ -492,7 +490,6 @@ describe("Dropbox connection UI Testing", () => { cy.get(`[data-test="btn-delete-${conns[0].identifier}"]`, { timeout: 15000 }) .should('not.exist') }) - cy.wait(1000) // check cy.task('getConnections', { email: fakeUser.email }) @@ -500,13 +497,17 @@ describe("Dropbox connection UI Testing", () => { const { conns } = res as { conns: ConnectionTable[] } expect(conns).to.have.length(0); }) - cy.task("getPointsNumberByFileName", { fileName: "invo.pdf" }) - .then(points => { - expect(points).eq(0) - }) - cy.task("getPointsNumberByFileName", { fileName: "sample.pdf" }) - .then(points => { - expect(points).eq(0) + + cy.task("getUserId", { email: fakeUser.email }) + .then(userId => { + cy.task("getPointsNumberByFileName", { fileName: "invo.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) + cy.task("getPointsNumberByFileName", { fileName: "sample.pdf", userId }) + .then(points => { + expect(points).eq(0) + }) }) }) diff --git a/db/schemas/connections.ts b/db/schemas/connections.ts index cf54d45..fba687d 100644 --- a/db/schemas/connections.ts +++ b/db/schemas/connections.ts @@ -33,7 +33,6 @@ export const connections = pgTable("connection", { limitPages: integer("limit_pages"), limitFiles: integer("limit_files"), lastSynced: timestamp("last_synced", { withTimezone: true }), - isSyncing: boolean("is_syncing").default(false).notNull(), jobId: text("job_id"), isConfigSet: boolean("is_config_set").default(false).notNull(), createdAt: timestamp("createdAt", { withTimezone: true }).notNull().defaultNow(), diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index c5704fa..09ecd83 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -85,6 +85,27 @@ "when": 1748097469946, "tag": "0011_keen_night_nurse", "breakpoints": true + }, + { + "idx": 12, + "version": "7", + "when": 1748160451042, + "tag": "0012_living_wallop", + "breakpoints": true + }, + { + "idx": 13, + "version": "7", + "when": 1748166534627, + "tag": "0013_stormy_garia", + "breakpoints": true + }, + { + "idx": 14, + "version": "7", + "when": 1748169746433, + "tag": "0014_volatile_galactus", + "breakpoints": true } ] } \ No newline at end of file diff --git a/fileProcessors/index.ts b/fileProcessors/index.ts index d44b661..e6f7b46 100644 --- a/fileProcessors/index.ts +++ b/fileProcessors/index.ts @@ -3,13 +3,14 @@ import { databaseDrizzle } from "@/db"; import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { v4 as uuidv4 } from 'uuid'; import { getTitleAndSummary, vectorizeText } from "@/openAi"; -import { qdrant_collection_name, qdrantCLient } from "@/qdrant"; +import { qdrant_collection_name, qdrantClient } from "@/qdrant"; import { publishProgress } from "@/events"; import { connections, processedFiles } from "@/db/schemas/connections"; import { and, eq } from "drizzle-orm"; import { getFileContent } from "./connectors"; import { processPdfLink, processPdfBuffer } from "./Files/pdf"; import { TQueue } from "@/workers/queues/jobs/processFiles.job"; +import { tryAndCatch } from "@/lib/try-catch"; export type FileContent = { name: string, @@ -64,7 +65,7 @@ export const directProcessFiles = async ({ files, metadata, service, connectionI if (pageLimit && pageLimit < currentPagesCount) { await databaseDrizzle .update(connections) - .set({ isSyncing: false, jobId: null }) + .set({ jobId: null }) .where(eq(connections.id, connectionId)) await publishProgress({ @@ -76,7 +77,7 @@ export const directProcessFiles = async ({ files, metadata, service, connectionI }) return; } - return await processFiles(filesContent, service, connectionId, pageLimit, fileLimit, currentPagesCount, connection.files.length, checkCancel) + return await processFiles(connection.userId, filesContent, service, connectionId, pageLimit, fileLimit, currentPagesCount, connection.files.length, checkCancel) } export const connectionProcessFiles = async ({ connectionId, service, pageLimit, fileLimit }: TQueue, checkCancel?: () => Promise) => { @@ -100,16 +101,22 @@ export const connectionProcessFiles = async ({ connectionId, service, pageLimit, eq(processedFiles.name, file.name) )) - await qdrantCLient.delete(qdrant_collection_name, { + await tryAndCatch(qdrantClient.delete(qdrant_collection_name, { points: file.chunksIds, - }) + filter: { + must: [ + { key: "_document_id", match: { value: file.name } }, + { key: "_userId", match: { value: connection.userId } } + ] + } + })) } // check the limits const currentPagesCount = connection?.files.reduce((sum, f) => f.totalPages + sum, 0) ?? 0 if (pageLimit && pageLimit < currentPagesCount) { await databaseDrizzle .update(connections) - .set({ isSyncing: false, jobId: null }) + .set({ jobId: null }) .where(eq(connections.id, connectionId)) await publishProgress({ @@ -121,12 +128,10 @@ export const connectionProcessFiles = async ({ connectionId, service, pageLimit, }) return; } - return processFiles(filesContent, service, connectionId, pageLimit, fileLimit, 0, 0, checkCancel) + return processFiles(connection.userId, filesContent, service, connectionId, pageLimit, fileLimit, 0, 0, checkCancel) } -const delay = (ms: number) => new Promise(res => setTimeout(res, ms)) - -const processFiles = async (filesContent: FileContent[], service: string, connectionId: string, pageLimit: number | null, fileLimit: number | null, currentPagesCount: number, currentFileCount: number, checkCancel?: () => Promise) => { +const processFiles = async (userId: string, filesContent: FileContent[], service: string, connectionId: string, pageLimit: number | null, fileLimit: number | null, currentPagesCount: number, currentFileCount: number, checkCancel?: () => Promise) => { const completedFiles: typeof processedFiles.$inferInsert[] = [] const allPoints = []; let processedPage = 0; @@ -147,6 +152,7 @@ const processFiles = async (filesContent: FileContent[], service: string, connec if (fileLimit !== null && fileLimit > 0 && fileIndex >= fileLimit) break; const baseMetadata = { _document_id: file.name, + _userId: userId, _metadata: { ...file.metadata, source: service, @@ -157,7 +163,10 @@ const processFiles = async (filesContent: FileContent[], service: string, connec shouldCancel = true; break; } - await delay(1000) + if (process.env.NEXT_PUBLIC_APP_ENV === 'TEST') { + const delay = (ms: number) => new Promise(res => setTimeout(res, ms)) + await delay(1000) + } const page = file.pages[pageIndex] if (limits <= 0 || shouldCancel) break; @@ -197,7 +206,7 @@ const processFiles = async (filesContent: FileContent[], service: string, connec } if (allPoints.length > 0) { - await qdrantCLient.upsert(qdrant_collection_name, { + await qdrantClient.upsert(qdrant_collection_name, { points: allPoints, wait: true }) @@ -217,7 +226,6 @@ const processFiles = async (filesContent: FileContent[], service: string, connec .update(connections) .set({ lastSynced: now, - isSyncing: false, jobId: null, }) .where(eq(connections.id, connectionId)) @@ -258,26 +266,28 @@ const processingTextPage = async (pageText: string, pageIndex: number, baseMetad _hash: textHash, }; - try { - const existingPoints = await qdrantCLient.scroll(qdrant_collection_name, { - filter: { - must: [{ key: "_hash", match: { value: textHash } }] - }, - limit: 1, - with_payload: true, - with_vector: true, - }); - if (existingPoints.points.length > 0) { - return { - id: existingPoints.points[0].id, - vector: existingPoints.points[0].vector as number[], - payload: { - ...existingPoints.points[0].payload, - _metadata: baseMetadata._metadata - } + const { data: existingPoints } = await tryAndCatch(qdrantClient.scroll(qdrant_collection_name, { + filter: { + must: [ + { key: "_hash", match: { value: textHash } }, + { key: "_userId", match: { value: baseMetadata._userId } } + ] + }, + limit: 1, + with_payload: true, + with_vector: true, + })); + + if (existingPoints && existingPoints.points.length > 0) { + return { + id: existingPoints.points[0].id, + vector: existingPoints.points[0].vector as number[], + payload: { + ...existingPoints.points[0].payload, + _metadata: baseMetadata._metadata } } - } catch { } + } const textVectors = await vectorizeText(chunk); const { title, summary } = await getTitleAndSummary(chunk, JSON.stringify(textMetadata)) @@ -306,27 +316,28 @@ const processingTablePage = async (tables: unknown[], pageIndex: number, baseMet _content: pageTables, _hash: tableHash, }; - - try { - const existingPoints = await qdrantCLient.scroll(qdrant_collection_name, { - filter: { - must: [{ key: "_hash", match: { value: tableHash } }] - }, - limit: 1, - with_payload: true, - with_vector: true, - }); - if (existingPoints.points.length > 0) { - return { - id: existingPoints.points[0].id, - vector: existingPoints.points[0].vector as number[], - payload: { - ...existingPoints.points[0].payload, - _metadata: baseMetadata._metadata - } + const { data: existingPoints } = await tryAndCatch(qdrantClient.scroll(qdrant_collection_name, { + filter: { + must: [ + { key: "_hash", match: { value: tableHash } }, + { key: "_userId", match: { value: baseMetadata._userId } } + ] + }, + limit: 1, + with_payload: true, + with_vector: true, + })); + + if (existingPoints && existingPoints.points.length > 0) { + return { + id: existingPoints.points[0].id, + vector: existingPoints.points[0].vector as number[], + payload: { + ...existingPoints.points[0].payload, + _metadata: baseMetadata._metadata } } - } catch { } + } const tableVectors = await vectorizeText(pageTables); const { title, summary } = await getTitleAndSummary(pageTables, JSON.stringify(tableMetadata)) diff --git a/qdrant/index.ts b/qdrant/index.ts index 0256d58..b75d1c4 100644 --- a/qdrant/index.ts +++ b/qdrant/index.ts @@ -2,13 +2,13 @@ import { QdrantClient } from "@qdrant/js-client-rest"; export const qdrant_collection_name = "documents"; -export const qdrantCLient = new QdrantClient({ url: process.env.QDRANT_DB_URL!, apiKey: process.env.QDRANT_DB_KEY }); +export const qdrantClient = new QdrantClient({ url: process.env.QDRANT_DB_URL!, apiKey: process.env.QDRANT_DB_KEY }); if (process.env.NEXT_PHASE !== 'phase-production-build') { - const { collections } = await qdrantCLient.getCollections(); + const { collections } = await qdrantClient.getCollections(); if (!collections.find(col => col.name === qdrant_collection_name)) { - await qdrantCLient.createCollection(qdrant_collection_name, { + await qdrantClient.createCollection(qdrant_collection_name, { vectors: { size: 1536, distance: 'Cosine' }, }); }