I am doing a pg vector search, and when I have too many documents, it returns no results. Ideas?
I have a pgvector column and roughly 50,000 records. Here are my indexes
and here is my function to match:
CREATE INDEX "documents_embedding_idx" ON "public"."documents" USING "hnsw" ("embedding" "public"."vector_cosine_ops");
CREATE INDEX "documents_userId_idx" ON "public"."documents" USING "btree" ("userId");CREATE INDEX "documents_embedding_idx" ON "public"."documents" USING "hnsw" ("embedding" "public"."vector_cosine_ops");
CREATE INDEX "documents_userId_idx" ON "public"."documents" USING "btree" ("userId");and here is my function to match:
CREATE OR REPLACE FUNCTION "public"."match_documents"("query_embedding" "public"."vector", "match_threshold" double precision, "match_count" integer, "userid" "uuid") RETURNS TABLE("id" "text", "chunktext" "text", "sourceId" "text", "similarity" double precision, "document" "jsonb")
LANGUAGE "sql" STABLE
AS $$
SELECT
documents.id,
documents."chunkText",
documents."sourceId",
1 - (documents.embedding <=> query_embedding) AS similarity,
documents.document
FROM documents
WHERE
1 - (documents.embedding <=> query_embedding) > match_threshold
AND documents."userId" = userId
ORDER BY similarity DESC
LIMIT match_count;
$$;CREATE OR REPLACE FUNCTION "public"."match_documents"("query_embedding" "public"."vector", "match_threshold" double precision, "match_count" integer, "userid" "uuid") RETURNS TABLE("id" "text", "chunktext" "text", "sourceId" "text", "similarity" double precision, "document" "jsonb")
LANGUAGE "sql" STABLE
AS $$
SELECT
documents.id,
documents."chunkText",
documents."sourceId",
1 - (documents.embedding <=> query_embedding) AS similarity,
documents.document
FROM documents
WHERE
1 - (documents.embedding <=> query_embedding) > match_threshold
AND documents."userId" = userId
ORDER BY similarity DESC
LIMIT match_count;
$$;