Skip to content

Commit

Permalink
Extraction schema (#398)
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu authored Oct 14, 2023
1 parent 7db8d3e commit dd8f274
Show file tree
Hide file tree
Showing 193 changed files with 2,120 additions and 15,778 deletions.
8 changes: 2 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ ARG name
# copy packages and one project
COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY ./packages ./packages
COPY ./projects/$name/package.json ./projects/$name/package.json
COPY ./projects/$name/pnpm-lock.yaml ./projects/$name/pnpm-lock.yaml
COPY ./projects/$name ./projects/$name

RUN \
[ -f pnpm-lock.yaml ] && pnpm install || \
Expand All @@ -27,10 +26,7 @@ ARG name
# copy common node_modules and one project node_modules
COPY --from=deps /app/node_modules ./node_modules
COPY --from=deps /app/packages ./packages
COPY ./projects/$name ./projects/$name
COPY --from=deps /app/projects/$name/node_modules ./projects/$name/node_modules
COPY pnpm-lock.yaml pnpm-workspace.yaml ./
COPY ./packages ./packages
COPY --from=deps /app/projects/$name ./projects/$name

# Uncomment the following line in case you want to disable telemetry during the build.
ENV NEXT_TELEMETRY_DISABLED 1
Expand Down
2 changes: 1 addition & 1 deletion docSite/content/docs/development/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ weight: 520
"SystemParams": {
"vectorMaxProcess": 15, // 向量生成最大进程,结合数据库性能和 key 来设置
"qaMaxProcess": 15, // QA 生成最大进程,结合数据库性能和 key 来设置
"pgIvfflatProbe": 20 // pg vector 搜索探针。没有设置索引前可忽略,通常 50w 组以上才需要设置。
"pgHNSWEfSearch": 40 // pg vector 索引参数,越大精度高但速度慢
},
"ChatModels": [
{
Expand Down
2 changes: 1 addition & 1 deletion docSite/content/docs/development/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ git clone [email protected]:<github_username>/FastGPT.git

- `vectorMaxProcess`: 向量生成最大进程,根据数据库和 key 的并发数来决定,通常单个 120 号,2c4g 服务器设置 10~15。
- `qaMaxProcess`: QA 生成最大进程
- `pgIvfflatProbe`: PostgreSQL vector 搜索探针,没有添加 vector 索引时可忽略
- `pgHNSWEfSearch`: PostgreSQL vector 索引参数,越大搜索精度越高但是速度越慢,具体可看 pgvector 官方说明

### 5. 运行

Expand Down
2 changes: 1 addition & 1 deletion docSite/content/docs/installation/docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ brew install orbstack

依次执行下面命令,创建 FastGPT 文件并拉取`docker-compose.yml``config.json`,执行完后目录下会有 2 个文件。

非 Linux 环境或无法访问外网环境,可手动创建一个目录,并下载下面2个链接的文件
非 Linux 环境或无法访问外网环境,可手动创建一个目录,并下载下面2个链接的文件: [docker-compose.yml](https://github.com/labring/FastGPT/blob/main/files/deploy/fastgpt/docker-compose.yml),[config.json](https://github.com/labring/FastGPT/blob/main/projects/app/data/config.json)

**注意: `docker-compose.yml` 配置文件中 Mongo 为 5.x,部分服务器不支持,需手动更改其镜像版本为 4.4.24**

Expand Down
4 changes: 4 additions & 0 deletions docSite/content/docs/pricing.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ weight: 10
| 星火2.0 - 对话 | 0.01 |
| chatglm_pro - 对话 | 0.01 |
| 通义千问 - 对话 | 0.01 |
| 问题分类 | 0.03 |
| 内容提取 | 0.03 |
| 下一步指引 | 0.015 |

{{< /table >}}

{{% alert context="warning" %}}
Expand Down
8 changes: 4 additions & 4 deletions files/deploy/fastgpt/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
version: '3.3'
services:
pg:
image: ankane/pgvector:v0.4.2 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.4.2 # 阿里云
image: ankane/pgvector:v0.5.0 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.5.0 # 阿里云
container_name: pg
restart: always
ports: # 生产环境建议不要暴露
Expand Down Expand Up @@ -66,8 +66,8 @@ networks:
# version: '3.3'
# services:
# pg:
# image: ankane/pgvector:v0.4.2 # dockerhub
# # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.4.2 # 阿里云
# image: ankane/pgvector:v0.5.0 # dockerhub
# # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/pgvector:v0.5.0 # 阿里云
# container_name: pg
# restart: always
# ports: # 生产环境建议不要暴露
Expand Down
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
},
"devDependencies": {
"husky": "^8.0.3",
"i18next": "^23.2.11",
"lint-staged": "^13.2.1",
"next-i18next": "^14.0.0",
"prettier": "^3.0.3",
"react-i18next": "^13.0.2"
"i18next": "^23.2.11",
"react-i18next": "^13.0.2",
"next-i18next": "^14.0.0"
},
"lint-staged": {
"./**/**/*.{ts,tsx,scss}": "npm run format"
Expand Down
File renamed without changes.
6 changes: 6 additions & 0 deletions packages/common/mongo/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import mongoose from 'mongoose';

export default mongoose;
export * from 'mongoose';

export const connectionMongo = global.mongodb || mongoose;
74 changes: 74 additions & 0 deletions packages/common/mongo/init.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import mongoose from './index';
import 'winston-mongodb';
import { createLogger, format, transports } from 'winston';

/**
* connect MongoDB and init data
*/
export async function connectMongo({
beforeHook,
afterHook
}: {
beforeHook?: () => any;
afterHook?: () => any;
}): Promise<void> {
if (global.mongodb) {
return;
}
global.mongodb = mongoose;

beforeHook && (await beforeHook());

// logger
initLogger();

console.log('mongo start connect');
try {
mongoose.set('strictQuery', true);
await mongoose.connect(process.env.MONGODB_URI as string, {
bufferCommands: true,
maxConnecting: Number(process.env.DB_MAX_LINK || 5),
maxPoolSize: Number(process.env.DB_MAX_LINK || 5),
minPoolSize: 2
});

console.log('mongo connected');

afterHook && (await afterHook());
} catch (error) {
console.log('error->', 'mongo connect error', error);
global.mongodb = undefined;
}
}

function initLogger() {
global.logger = createLogger({
transports: [
new transports.MongoDB({
db: process.env.MONGODB_URI as string,
collection: 'server_logs',
options: {
useUnifiedTopology: true
},
cappedSize: 500000000,
tryReconnect: true,
metaKey: 'meta',
format: format.combine(format.timestamp(), format.json())
}),
new transports.Console({
format: format.combine(
format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
format.printf((info) => {
if (info.level === 'error') {
console.log(info.meta);
return `[${info.level.toLocaleUpperCase()}]: ${[info.timestamp]}: ${info.message}`;
}
return `[${info.level.toLocaleUpperCase()}]: ${[info.timestamp]}: ${info.message}${
info.meta ? `: ${JSON.stringify(info.meta)}` : ''
}`;
})
)
})
]
});
}
11 changes: 10 additions & 1 deletion packages/common/package.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
{
"name": "@fastgpt/common",
"version": "1.0.0"
"version": "1.0.0",
"dependencies": {
"mongoose": "^7.0.2",
"winston": "^3.10.0",
"winston-mongodb": "^5.1.1",
"axios": "^1.5.1"
},
"devDependencies": {
"@types/node": "^20.8.5"
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { POST } from './request';

export const postTextCensor = (data: { text: string }) =>
POST<{ code?: number; message: string }>('/plugins/censor/text_baidu', data)
POST<{ code?: number; message: string }>('/common/censor/text_baidu', data)
.then((res) => {
if (res?.code === 5000) {
return Promise.reject(res);
Expand Down
File renamed without changes.
27 changes: 27 additions & 0 deletions packages/common/tools/str.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,32 @@
import crypto from 'crypto';

export function strIsLink(str?: string) {
if (!str) return false;
if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
return false;
}

export const hashStr = (psw: string) => {
return crypto.createHash('sha256').update(psw).digest('hex');
};

/* simple text, remove chinese space and extra \n */
export const simpleText = (text: string) => {
text = text.replace(/([\u4e00-\u9fa5])[\s&&[^\n]]+([\u4e00-\u9fa5])/g, '$1$2');
text = text.replace(/\n{2,}/g, '\n');
text = text.replace(/[\s&&[^\n]]{2,}/g, ' ');
text = text.replace(/[\x00-\x08]/g, ' ');

// replace empty \n
let newText = '';
let lastChar = '';
for (let i = 0; i < text.length; i++) {
const currentChar = text[i];
if (currentChar === '\n' && !/[。?!;.?!;]/g.test(lastChar)) {
} else {
newText += currentChar;
}
lastChar = currentChar;
}
return newText;
};
Empty file added packages/common/type/chat.d.ts
Empty file.
43 changes: 43 additions & 0 deletions packages/common/type/index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import type { Mongoose } from '../mongo';
import type { Logger } from 'winston';

export type FeConfigsType = {
show_emptyChat?: boolean;
show_register?: boolean;
show_appStore?: boolean;
show_contact?: boolean;
show_git?: boolean;
show_doc?: boolean;
show_pay?: boolean;
show_openai_account?: boolean;
show_promotion?: boolean;
hide_app_flow?: boolean;
openAPIUrl?: string;
systemTitle?: string;
authorText?: string;
googleClientVerKey?: string;
isPlus?: boolean;
oauth?: {
github?: string;
google?: string;
};
limit?: {
exportLimitMinutes?: number;
};
scripts?: { [key: string]: string }[];
};

export type SystemEnvType = {
pluginBaseUrl?: string;
openapiPrefix?: string;
vectorMaxProcess: number;
qaMaxProcess: number;
pgHNSWEfSearch: number;
};

declare global {
var mongodb: Mongoose | undefined;
var logger: Logger;
var feConfigs: FeConfigsType;
var systemEnv: SystemEnvType;
}
2 changes: 1 addition & 1 deletion packages/core/ai/config.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { UserModelSchema } from '../user/type';
import type { UserModelSchema } from '@fastgpt/support/user/type.d';
import OpenAI from 'openai';

export const openaiBaseUrl = process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
Expand Down
4 changes: 2 additions & 2 deletions packages/core/ai/functions/createQuestionGuide.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export async function createQuestionGuide({
messages: ChatCompletionRequestMessage[];
model: string;
}) {
const ai = getAIApi();
const ai = getAIApi(undefined, 48000);
const data = await ai.chat.completions.create({
model: model,
temperature: 0,
Expand All @@ -25,7 +25,7 @@ export async function createQuestionGuide({
stream: false
});

const answer = data.choices?.[0].message?.content || '';
const answer = data.choices?.[0]?.message?.content || '';
const totalTokens = data.usage?.total_tokens || 0;

const start = answer.indexOf('[');
Expand Down
Empty file added packages/core/chat/type.d.ts
Empty file.
21 changes: 21 additions & 0 deletions packages/core/dataset/constant.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset'
}

export const DatasetTypeMap = {
[DatasetTypeEnum.folder]: {
name: 'folder'
},
[DatasetTypeEnum.dataset]: {
name: 'dataset'
}
};

export enum FileStatusEnum {
embedding = 'embedding',
ready = 'ready'
}

export enum DatasetSpecialIdEnum {
manual = 'manual',
mark = 'mark'
Expand All @@ -13,3 +32,5 @@ export const datasetSpecialIdMap = {
}
};
export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];

export const FolderAvatarSrc = '/imgs/files/folder.svg';
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { Schema, model, models, Model } from 'mongoose';
import { kbSchema as SchemaType } from '@/types/mongoSchema';
import { KbTypeMap } from '@/constants/dataset';
import { connectionMongo, type Model } from '@fastgpt/common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetSchemaType } from './type';
import { DatasetTypeMap } from './constant';

const kbSchema = new Schema({
const DatasetSchema = new Schema({
parentId: {
type: Schema.Types.ObjectId,
ref: 'kb',
Expand Down Expand Up @@ -32,7 +33,7 @@ const kbSchema = new Schema({
},
type: {
type: String,
enum: Object.keys(KbTypeMap),
enum: Object.keys(DatasetTypeMap),
required: true,
default: 'dataset'
},
Expand All @@ -42,4 +43,4 @@ const kbSchema = new Schema({
}
});

export const KB: Model<SchemaType> = models['kb'] || model('kb', kbSchema);
export const MongoDataset: Model<DatasetSchemaType> = models['kb'] || model('kb', DatasetSchema);
13 changes: 13 additions & 0 deletions packages/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { DatasetTypeEnum } from './constant';

export type DatasetSchemaType = {
_id: string;
userId: string;
parentId: string;
updateTime: Date;
avatar: string;
name: string;
vectorModel: string;
tags: string[];
type: `${DatasetTypeEnum}`;
};
2 changes: 1 addition & 1 deletion packages/core/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
"incremental": true,
"baseUrl": "."
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", "**/*.d.ts"],
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", "**/*.d.ts", "../**/*.d.ts"],
"exclude": ["node_modules"]
}
Loading

0 comments on commit dd8f274

Please sign in to comment.