Skip to content

Commit

Permalink
fix: embedding recall drop-dead halt (#1415)
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu authored May 9, 2024
1 parent afe5039 commit d4169bf
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
1 change: 1 addition & 0 deletions packages/service/common/vectorStore/controller.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export type InsertVectorProps = {
};

export type EmbeddingRecallProps = {
teamId: string;
datasetIds: string[];
similarity?: number;
efSearch?: number;
Expand Down
11 changes: 8 additions & 3 deletions packages/service/common/vectorStore/pg/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,16 @@ export const embeddingRecall = async (
): Promise<{
results: EmbeddingRecallItemType[];
}> => {
const { datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props;
const { teamId, datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props;

try {
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${efSearch};
select id, collection_id, vector <#> '[${vectors[0]}]' AS score
from ${PgDatasetTableName}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
where team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
AND vector <#> '[${vectors[0]}]' < -${similarity}
order by score limit ${limit};
COMMIT;`
Expand All @@ -153,10 +154,14 @@ export const embeddingRecall = async (
}))
};
} catch (error) {
console.log(error);
if (retry <= 0) {
return Promise.reject(error);
}
return embeddingRecall(props);
return embeddingRecall({
...props,
retry: retry - 1
});
}
};

Expand Down
11 changes: 6 additions & 5 deletions packages/service/core/dataset/search/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const countRecallLimit = () => {
if (searchMode === DatasetSearchModeEnum.embedding) {
return {
embeddingLimit: 150,
embeddingLimit: 100,
fullTextLimit: 0
};
}
if (searchMode === DatasetSearchModeEnum.fullTextRecall) {
return {
embeddingLimit: 0,
fullTextLimit: 150
fullTextLimit: 100
};
}
return {
embeddingLimit: 100,
fullTextLimit: 80
embeddingLimit: 80,
fullTextLimit: 60
};
};
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
Expand All @@ -82,9 +82,10 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
});

const { results } = await recallFromVectorStore({
teamId,
datasetIds,
vectors,
limit,
datasetIds,
efSearch: global.systemEnv?.pgHNSWEfSearch
});

Expand Down

0 comments on commit d4169bf

Please sign in to comment.