Skip to content

Commit

Permalink
External dataset (#1485)
Browse files Browse the repository at this point in the history
* fix: revert version

* feat: external collection

* import context

* external ui

* doc

* fix: ts

* clear invalid data

* feat: rename sub name

* fix: node if else edge remove

* fix: init

* api size

* fix: if else node refresh
  • Loading branch information
c121914yu authored May 15, 2024
1 parent fb04889 commit cd87625
Show file tree
Hide file tree
Showing 74 changed files with 1,878 additions and 1,349 deletions.
13 changes: 3 additions & 10 deletions .vscode/nextapi.code-snippets
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,11 @@
"",
"type ContextType = {$1};",
"",
"type ContextValueType = {};",
"",
"export const Context = createContext<ContextType>({});",
"",
"export const ContextProvider = ({",
" children,",
" value",
"}: {",
" children: ReactNode;",
" value: ContextValueType;",
"}) => {",
" return <Context.Provider value={value}>{children}</Context.Provider>;",
"export const ContextProvider = ({ children }: { children: ReactNode }) => {",
" const contextValue: ContextType = {};",
" return <Context.Provider value={contextValue}>{children}</Context.Provider>;",
"};",
],
"description": "FastGPT usecontext template"
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ COPY --from=mainDeps /app/projects/$name/node_modules ./projects/$name/node_modu
RUN [ -z "$proxy" ] || sed -i 's/dl-cdn.alpinelinux.org/mirrors.ustc.edu.cn/g' /etc/apk/repositories

RUN apk add --no-cache libc6-compat && npm install -g [email protected]

ENV NODE_OPTIONS="--max-old-space-size=4096"
RUN pnpm --filter=$name build

# --------- runner -----------
Expand Down
3 changes: 2 additions & 1 deletion docSite/content/docs/development/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,5 @@ OneAPI 的 API Key 配置错误,需要修改`OPENAI_API_KEY`环境变量,并
### bad_response_status_code bad response status code 503

1. 模型服务不可用
2. ....
2. 模型接口参数异常(温度、max token等可能不适配)
3. ....
3 changes: 2 additions & 1 deletion docSite/content/docs/development/upgrading/481.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ curl --location --request POST 'https://{{host}}/api/admin/clearInvalidData' \
## V4.8.1 更新说明

1. 新增 - 知识库重新选择向量模型重建
2. 修复 - 定时器清理脏数据任务
2. 修复 - 工作流删除节点的动态输入和输出时候,没有正确的删除连接线,导致可能出现逻辑异常。
3. 修复 - 定时器清理脏数据任务
8 changes: 5 additions & 3 deletions packages/global/core/dataset/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@ export type DatasetUpdateBody = {
intro?: string;
permission?: DatasetSchemaType['permission'];
agentModel?: LLMModelItemType;
websiteConfig?: DatasetSchemaType['websiteConfig'];
status?: DatasetSchemaType['status'];

websiteConfig?: DatasetSchemaType['websiteConfig'];
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
};

/* ================= collection ===================== */
export type DatasetCollectionChunkMetadataType = {
parentId?: string;
trainingType?: `${TrainingModeEnum}`;
trainingType?: TrainingModeEnum;
chunkSize?: number;
chunkSplitter?: string;
qaPrompt?: string;
Expand Down Expand Up @@ -78,7 +80,7 @@ export type PostWebsiteSyncParams = {
export type PushDatasetDataProps = {
collectionId: string;
data: PushDatasetDataChunkProps[];
trainingMode: `${TrainingModeEnum}`;
trainingMode: TrainingModeEnum;
prompt?: string;
billId?: string;
};
Expand Down
6 changes: 6 additions & 0 deletions packages/global/core/dataset/collection/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/* sourceId = prefix-id; id=fileId;link url;externalId */
export enum CollectionSourcePrefixEnum {
local = 'local',
link = 'link',
external = 'external'
}
17 changes: 12 additions & 5 deletions packages/global/core/dataset/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,29 @@
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset',
websiteDataset = 'websiteDataset' // depp link
websiteDataset = 'websiteDataset', // depp link
externalFile = 'externalFile'
}
export const DatasetTypeMap = {
[DatasetTypeEnum.folder]: {
icon: 'common/folderFill',
label: 'core.dataset.Folder Dataset',
label: 'Folder Dataset',
collectionLabel: 'common.Folder'
},
[DatasetTypeEnum.dataset]: {
icon: 'core/dataset/commonDataset',
label: 'core.dataset.Common Dataset',
label: 'Common Dataset',
collectionLabel: 'common.File'
},
[DatasetTypeEnum.websiteDataset]: {
icon: 'core/dataset/websiteDataset',
label: 'core.dataset.Website Dataset',
label: 'Website Dataset',
collectionLabel: 'common.Website'
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/commonDataset',
label: 'External File',
collectionLabel: 'common.File'
}
};

Expand Down Expand Up @@ -77,7 +83,8 @@ export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
csvTable = 'csvTable'
csvTable = 'csvTable',
externalFile = 'externalFile'
}

export enum TrainingModeEnum {
Expand Down
23 changes: 17 additions & 6 deletions packages/global/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ export type DatasetSchemaType = {
vectorModel: string;
agentModel: string;
intro: string;
type: `${DatasetTypeEnum}`;
type: DatasetTypeEnum;
status: `${DatasetStatusEnum}`;
permission: `${PermissionTypeEnum}`;

// metadata
websiteConfig?: {
url: string;
selector: string;
};
externalReadUrl?: string;
};

export type DatasetCollectionSchemaType = {
Expand All @@ -42,16 +45,18 @@ export type DatasetCollectionSchemaType = {
createTime: Date;
updateTime: Date;

trainingType: `${TrainingModeEnum}`;
trainingType: TrainingModeEnum;
chunkSize: number;
chunkSplitter?: string;
qaPrompt?: string;

fileId?: string;
rawLink?: string;
sourceId?: string; // relate CollectionSourcePrefixEnum
fileId?: string; // local file id
rawLink?: string; // link url

rawTextLength?: number;
hashRawText?: string;
externalSourceUrl?: string; // external import url
metadata?: {
webPageSelector?: string;
relatedImgId?: string; // The id of the associated image collections
Expand Down Expand Up @@ -93,7 +98,7 @@ export type DatasetTrainingSchemaType = {
billId: string;
expireAt: Date;
lockTime: Date;
mode: `${TrainingModeEnum}`;
mode: TrainingModeEnum;
model: string;
prompt: string;
dataId?: string;
Expand All @@ -112,13 +117,19 @@ export type DatasetDataWithCollectionType = Omit<DatasetDataSchemaType, 'collect
};

/* ================= dataset ===================== */
export type DatasetSimpleItemType = {
_id: string;
avatar: string;
name: string;
vectorModel: VectorModelItemType;
};
export type DatasetListItemType = {
_id: string;
parentId: string;
avatar: string;
name: string;
intro: string;
type: `${DatasetTypeEnum}`;
type: DatasetTypeEnum;
isOwner: boolean;
canWrite: boolean;
permission: `${PermissionTypeEnum}`;
Expand Down
2 changes: 1 addition & 1 deletion packages/global/core/dataset/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: strin
};
}

export const predictDataLimitLength = (mode: `${TrainingModeEnum}`, data: any[]) => {
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
if (mode === TrainingModeEnum.auto) return data.length * 5;
return data.length;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const AssignedAnswerModule: FlowNodeTemplateType = {
intro:
'该模块可以直接回复一段指定的内容。常用于引导、提示。非字符串内容传入时,会转成字符串进行输出。',
version: '481',
isTool: true,
inputs: [
{
key: NodeInputKeyEnum.answerText,
Expand Down
25 changes: 11 additions & 14 deletions packages/service/core/dataset/collection/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ const DatasetCollectionSchema = new Schema({
ref: DatasetColCollectionName,
default: null
},
userId: {
// abandoned
type: Schema.Types.ObjectId,
ref: 'user'
},
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
Expand Down Expand Up @@ -54,6 +49,7 @@ const DatasetCollectionSchema = new Schema({
default: () => new Date()
},

// chunk filed
trainingType: {
type: String,
enum: Object.keys(TrainingTypeMap),
Expand All @@ -70,20 +66,21 @@ const DatasetCollectionSchema = new Schema({
type: String
},

sourceId: String,
// local file collection
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String
},
// web link collection
rawLink: String,

rawTextLength: {
type: Number
},
hashRawText: {
type: String
},
// external collection

// metadata
rawTextLength: Number,
hashRawText: String,
externalSourceUrl: String, // external import url
metadata: {
type: Object,
default: {}
Expand Down
3 changes: 2 additions & 1 deletion packages/service/core/dataset/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ const DatasetSchema = new Schema({
default: 'body'
}
}
}
},
externalReadUrl: String
});

try {
Expand Down
2 changes: 1 addition & 1 deletion packages/service/support/wallet/sub/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
} from '@fastgpt/global/support/wallet/sub/constants';
import type { TeamSubSchema } from '@fastgpt/global/support/wallet/sub/type';

export const subCollectionName = 'team.subscriptions';
export const subCollectionName = 'team_subscriptions';

const SubSchema = new Schema({
teamId: {
Expand Down
6 changes: 3 additions & 3 deletions packages/web/components/common/DndDrag/DragIcon.tsx
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { DragHandleIcon } from '@chakra-ui/icons';
import { Box } from '@chakra-ui/react';
import { Box, BoxProps } from '@chakra-ui/react';
import React from 'react';
import { DraggableProvided } from 'react-beautiful-dnd';

const DragIcon = ({ provided }: { provided: DraggableProvided }) => {
const DragIcon = ({ provided, ...props }: { provided: DraggableProvided } & BoxProps) => {
return (
<Box {...provided.dragHandleProps}>
<Box {...provided.dragHandleProps} {...props}>
<DragHandleIcon color={'myGray.500'} _hover={{ color: 'primary.600' }} />
</Box>
);
Expand Down
8 changes: 1 addition & 7 deletions projects/app/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@

{
"parser": "@typescript-eslint/parser", // 确保使用了 TypeScript 解析器
"plugins": ["@typescript-eslint"], // 引入 TypeScript 插件

"extends": "next/core-web-vitals",
"rules": {
"react-hooks/rules-of-hooks": 0,
"@typescript-eslint/consistent-type-imports": "warn" // 或者 "error" 来强制执行

"react-hooks/rules-of-hooks": 0
}
}
7 changes: 1 addition & 6 deletions projects/app/i18n/en/common.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"Add new": "Add new",
"App": "App",
"Export": "Export",
"Folder": "Folder",
Expand Down Expand Up @@ -509,18 +510,14 @@
"Choose Dataset": "Associate dataset",
"Chunk amount": "Number of chunks",
"Collection": "Dataset",
"Common Dataset": "Common dataset",
"Common Dataset Desc": "Can be built by importing files, web links, or manual entry",
"Create dataset": "Create a dataset",
"Dataset": "Dataset",
"Dataset ID": "Dataset ID",
"Dataset Type": "Dataset type",
"Delete Confirm": "Confirm to delete this dataset? Data cannot be recovered after deletion, please confirm!",
"Delete Website Tips": "Confirm to delete this site?",
"Empty Dataset": "",
"Empty Dataset Tips": "No datasets yet, go create one!",
"File collection": "File dataset",
"Folder Dataset": "Folder",
"Folder placeholder": "This is a directory",
"Go Dataset": "Go to dataset",
"Intro Placeholder": "This dataset has no introduction~",
Expand All @@ -540,8 +537,6 @@
"Table collection": "Table dataset",
"Text collection": "Text dataset",
"Total chunks": "Total chunks: {{total}}",
"Website Dataset": "Web site synchronization",
"Website Dataset Desc": "Web site synchronization allows you to use a web page link to build a dataset",
"collection": {
"Click top config website": "Click to configure website",
"Collection name": "Dataset name",
Expand Down
13 changes: 12 additions & 1 deletion projects/app/i18n/en/dataset.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
{
"Common Dataset": "Common dataset",
"Common Dataset Desc": "Can be built by importing files, web links, or manual entry",
"Confirm to rebuild embedding tip": "Are you sure to switch the knowledge base index? Switching index is a very heavy operation that requires re-indexing all the data in your knowledge base, which may take a long time. Please ensure that the remaining points in your account are sufficient.",
"External file": "External file",
"External file Dataset Desc": "You can import files from an external file library to build a knowledge base. Files are not stored twice",
"External id": "File id",
"External read url": "External read url",
"External url": "File read url",
"Folder Dataset": "Folder",
"Rebuild embedding start tip": "The task of switching index models has begun",
"Rebuilding index count": "Rebuilding count: {{count}}",
"The knowledge base has indexes that are being trained or being rebuilt": "The knowledge base has indexes that are being trained or being rebuilt"
"The knowledge base has indexes that are being trained or being rebuilt": "The knowledge base has indexes that are being trained or being rebuilt",
"Website Dataset": "Web site",
"Website Dataset Desc": "Web site synchronization allows you to use a web page link to build a dataset",
"filename": "filename"
}
6 changes: 1 addition & 5 deletions projects/app/i18n/zh/common.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"Add new": "新增",
"App": "应用",
"Export": "导出",
"Folder": "文件夹",
Expand Down Expand Up @@ -509,8 +510,6 @@
"Choose Dataset": "关联知识库",
"Chunk amount": "分段数",
"Collection": "数据集",
"Common Dataset": "通用知识库",
"Common Dataset Desc": "可通过导入文件、网页链接或手动录入形式构建知识库",
"Create dataset": "创建一个知识库",
"Dataset": "知识库",
"Dataset ID": "知识库 ID",
Expand All @@ -520,7 +519,6 @@
"Empty Dataset": "",
"Empty Dataset Tips": "还没有知识库,快去创建一个吧!",
"File collection": "文件数据集",
"Folder Dataset": "文件夹",
"Folder placeholder": "这是一个目录",
"Go Dataset": "前往知识库",
"Intro Placeholder": "这个知识库还没有介绍~",
Expand All @@ -540,8 +538,6 @@
"Table collection": "表格数据集",
"Text collection": "文本数据集",
"Total chunks": "总分段: {{total}}",
"Website Dataset": "Web 站点同步",
"Website Dataset Desc": "Web 站点同步允许你直接使用一个网页链接构建知识库",
"collection": {
"Click top config website": "点击配置网站",
"Collection name": "数据集名称",
Expand Down
Loading

0 comments on commit cd87625

Please sign in to comment.