Skip to content

Commit

Permalink
perf: open push data api
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu committed Aug 29, 2023
1 parent 19d7edb commit e0de04d
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 77 deletions.
65 changes: 42 additions & 23 deletions client/src/pages/api/openapi/kb/pushData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { PgTrainingTableName, TrainingModeEnum } from '@/constants/plugin';
import { startQueue } from '@/service/utils/tools';
import { PgClient } from '@/service/pg';
import { modelToolMap } from '@/utils/plugin';
import { getVectorModel } from '@/service/utils/data';

export type DateItemType = { a: string; q: string; source?: string };

Expand All @@ -22,17 +23,25 @@ export type Response = {
insertLen: number;
};

const modeMaxToken = {
[TrainingModeEnum.index]: 6000,
[TrainingModeEnum.qa]: 12000
const modeMap = {
[TrainingModeEnum.index]: true,
[TrainingModeEnum.qa]: true
};

export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { kbId, data, mode, prompt } = req.body as Props;
const { kbId, data, mode = TrainingModeEnum.index, prompt } = req.body as Props;

if (!kbId || !Array.isArray(data)) {
throw new Error('缺少参数');
throw new Error('KbId or data is empty');
}

if (modeMap[mode] === undefined) {
throw new Error('Mode is error');
}

if (data.length > 500) {
throw new Error('Data is too long, max 500');
}

await connectToDatabase();
Expand Down Expand Up @@ -64,25 +73,42 @@ export async function pushDataToKb({
mode,
prompt
}: { userId: string } & Props): Promise<Response> {
await authKb({
userId,
kbId
});
const [kb, vectorModel] = await Promise.all([
authKb({
userId,
kbId
}),
(async () => {
if (mode === TrainingModeEnum.index) {
const vectorModel = (await KB.findById(kbId, 'vectorModel'))?.vectorModel;

return getVectorModel(vectorModel || global.vectorModels[0].model);
}
return global.vectorModels[0];
})()
]);

const modeMaxToken = {
[TrainingModeEnum.index]: vectorModel.maxToken,
[TrainingModeEnum.qa]: global.qaModel.maxToken * 0.8
};

// 过滤重复的 qa 内容
const set = new Set();
const filterData: DateItemType[] = [];

data.forEach((item) => {
if (!item.q) return;

const text = item.q + item.a;

// count token
// count q token
const token = modelToolMap.countTokens({
model: 'gpt-3.5-turbo',
messages: [{ obj: 'System', value: item.q }]
});

if (token > modeMaxToken[TrainingModeEnum.qa]) {
if (token > modeMaxToken[mode]) {
return;
}

Expand Down Expand Up @@ -138,15 +164,8 @@ export async function pushDataToKb({
.filter((item) => item.status === 'fulfilled')
.map<DateItemType>((item: any) => item.value);

const vectorModel = await (async () => {
if (mode === TrainingModeEnum.index) {
return (await KB.findById(kbId, 'vectorModel'))?.vectorModel || global.vectorModels[0].model;
}
return global.vectorModels[0].model;
})();

// 插入记录
await TrainingData.insertMany(
const insertRes = await TrainingData.insertMany(
insertData.map((item) => ({
q: item.q,
a: item.a,
Expand All @@ -155,21 +174,21 @@ export async function pushDataToKb({
kbId,
mode,
prompt,
vectorModel
vectorModel: vectorModel.model
}))
);

insertData.length > 0 && startQueue();
insertRes.length > 0 && startQueue();

return {
insertLen: insertData.length
insertLen: insertRes.length
};
}

export const config = {
api: {
bodyParser: {
sizeLimit: '20mb'
sizeLimit: '12mb'
}
}
};
51 changes: 0 additions & 51 deletions client/src/pages/api/openapi/text/sensitiveCheck.ts

This file was deleted.

2 changes: 1 addition & 1 deletion client/src/pages/kb/detail/components/Import/Chunk.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ const ChunkImport = ({ kbId }: { kbId: string }) => {

// subsection import
let success = 0;
const step = 500;
const step = 300;
for (let i = 0; i < chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
Expand Down
2 changes: 1 addition & 1 deletion client/src/pages/kb/detail/components/Import/Csv.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ const CsvImport = ({ kbId }: { kbId: string }) => {

// subsection import
let success = 0;
const step = 500;
const step = 300;
for (let i = 0; i < filterChunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
Expand Down
2 changes: 1 addition & 1 deletion client/src/pages/kb/detail/components/Import/QA.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ const QAImport = ({ kbId }: { kbId: string }) => {

// subsection import
let success = 0;
const step = 300;
const step = 200;
for (let i = 0; i < chunks.length; i += step) {
const { insertLen } = await postKbDataFromList({
kbId,
Expand Down
6 changes: 6 additions & 0 deletions client/src/pages/kb/detail/components/Info.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ const Info = (
</Box>
<Box flex={[1, '0 0 300px']}>{getValues('vectorModel').name}</Box>
</Flex>
<Flex mt={8} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
MaxTokens
</Box>
<Box flex={[1, '0 0 300px']}>{getValues('vectorModel').maxToken}</Box>
</Flex>
<Flex mt={5} w={'100%'} alignItems={'center'}>
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
知识库头像
Expand Down

0 comments on commit e0de04d

Please sign in to comment.