From bc0ac6d26b8f79f3ac4d6346ae96287b59686b04 Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Thu, 18 Apr 2024 12:03:30 +0800 Subject: [PATCH] Fix: websync doc and export dataset ux (#1225) * Revert "lafAccount add pat & re request when token invalid (#76)" (#77) This reverts commit 83d85dfe37adcaef4833385ea52ee79fd84720be. * perf: workflow ux * system config * perf: export data * doc * update doc * fix: whisper --- docSite/content/docs/course/websync.md | 80 +++++++++++++++++++ .../core/module/template/system/userGuide.ts | 8 ++ projects/app/public/locales/en/common.json | 2 + projects/app/public/locales/zh/common.json | 6 +- .../src/pages/api/core/dataset/exportAll.ts | 3 +- projects/app/src/pages/dataset/list/index.tsx | 8 +- projects/app/src/web/common/api/xmlFetch.ts | 45 +++++++---- 7 files changed, 131 insertions(+), 21 deletions(-) create mode 100644 docSite/content/docs/course/websync.md diff --git a/docSite/content/docs/course/websync.md b/docSite/content/docs/course/websync.md new file mode 100644 index 00000000000..b3f70a1f133 --- /dev/null +++ b/docSite/content/docs/course/websync.md @@ -0,0 +1,80 @@ +--- +title: 'Web 站点同步' +description: 'FastGPT Web 站点同步功能介绍和使用方式' +icon: 'language' +draft: false +toc: true +weight: 105 +--- + +![](/imgs/webSync1.jpg) + +该功能目前仅向商业版用户开放。 + +## 什么是 Web 站点同步 + +Web 站点同步利用爬虫的技术,可以通过一个入口网站,自动捕获`同域名`下的所有网站,目前最多支持`200`个子页面。出于合规与安全角度,FastGPT 仅支持`静态站点`的爬取,主要用于各个文档站点快速构建知识库。 + +Tips: 国内的媒体站点基本不可用,公众号、csdn、知乎等。可以通过终端发送`curl`请求检测是否为静态站点,例如: + +```bash +curl https://doc.fastgpt.in/docs/intro/ +``` + +## 如何使用 + +### 1. 新建知识库,选择 Web 站点同步 + +![](/imgs/webSync2.png) + +![](/imgs/webSync3.png) + +### 2. 点击配置站点信息 + +![](/imgs/webSync4.png) + +### 3. 填写网址和选择器 + +![](/imgs/webSync5.jpg) + +好了, 现在点击开始同步,静等系统自动抓取网站信息即可。 + + +## 创建应用,绑定知识库 + +![](/imgs/webSync6.webp) + +## 选择器如何使用 + +选择器是 HTML CSS JS 的产物,你可以通过选择器来定位到你需要抓取的具体内容,而不是整个站点。使用方式为: + +### 首先打开浏览器调试面板(通常是 F12,或者【右键 - 检查】) + +![](/imgs/webSync7.webp) + +![](/imgs/webSync8.webp) + +### 输入对应元素的选择器 + +[菜鸟教程 css 选择器](https://www.runoob.com/cssref/css-selectors.html),具体选择器的使用方式可以参考菜鸟教程。 + +上图中,我们选中了一个区域,对应的是`div`标签,它有 `data-prismjs-copy`, `data-prismjs-copy-success`, `data-prismjs-copy-error` 三个属性,这里我们用到一个就够。所以选择器是: +**`div[data-prismjs-copy]`** + +除了属性选择器,常见的还有类和ID选择器。例如: + +![](/imgs/webSync9.webp) + +上图 class 里的是类名(可能包含多个类名,都是空格隔开的,选择一个即可),选择器可以为:**`.docs-content`** + +### 多选择器使用 + +在开头的演示中,我们对 FastGPT 文档是使用了多选择器的方式来选择,通过逗号隔开了两个选择器。 + +![](/imgs/webSync10.webp) + +我们希望选中上图两个标签中的内容,此时就需要两组选择器。一组是:`.docs-content .mb-0.d-flex`,含义是 `docs-content` 类下同时包含 `mb-0`和`d-flex` 两个类的子元素; + +另一组是`.docs-content div[data-prismjs-copy]`,含义是`docs-content` 类下包含`data-prismjs-copy`属性的`div`元素。 + +把两组选择器用逗号隔开即可:`.docs-content .mb-0.d-flex, .docs-content div[data-prismjs-copy]` \ No newline at end of file diff --git a/packages/global/core/module/template/system/userGuide.ts b/packages/global/core/module/template/system/userGuide.ts index dc0483c7441..ba851a72897 100644 --- a/packages/global/core/module/template/system/userGuide.ts +++ b/packages/global/core/module/template/system/userGuide.ts @@ -46,6 +46,14 @@ export const UserGuideModule: FlowNodeTemplateType = { label: '', showTargetInApp: false, showTargetInPlugin: false + }, + { + key: ModuleInputKeyEnum.whisper, + type: FlowNodeInputTypeEnum.hidden, + valueType: ModuleIOValueTypeEnum.any, + label: '', + showTargetInApp: false, + showTargetInPlugin: false } ], outputs: [] diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index 17b83610f5c..d9e90555202 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -566,6 +566,7 @@ "Set Empty Result Tip": ",Response empty text", "Set Website Config": "Configuring Website", "Similarity": "Similarity", + "Start export": "Export started", "Sync Time": "Update Time", "Table collection": "Table collection", "Text collection": "Text collection", @@ -965,6 +966,7 @@ "AI support tool tip": "A model that supports function calls allows better use of tool calls.", "Ai chat": "LLM Chat", "Ai chat intro": "Request LLM chat", + "App system setting": "", "Assigned reply": "Assigned reply", "Assigned reply intro": "The module can respond directly to a specified piece of content. Often used to guide and prompt. When non-string content is passed in, it is converted to a string for output.", "Basic Node": "Basic Node", diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 70cf757b577..5f277fab157 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -566,6 +566,7 @@ "Set Empty Result Tip": ",未搜索到内容时回复指定内容", "Set Website Config": "开始配置网站信息", "Similarity": "相关度", + "Start export": "已开始导出", "Sync Time": "最后更新时间", "Table collection": "表格数据集", "Text collection": "文本数据集", @@ -610,7 +611,8 @@ "success": "开始同步" } }, - "training": {} + "training": { + } }, "data": { "Auxiliary Data": "辅助数据", @@ -966,6 +968,7 @@ "AI support tool tip": "支持函数调用的模型,可以更好的使用工具调用。", "Ai chat": "AI 对话", "Ai chat intro": "AI 大模型对话", + "App system setting": "系统配置", "Assigned reply": "指定回复", "Assigned reply intro": "该模块可以直接回复一段指定的内容。常用于引导、提示。非字符串内容传入时,会转成字符串进行输出。", "Basic Node": "基础功能", @@ -997,7 +1000,6 @@ "Tool module": "工具", "UnKnow Module": "未知模块", "User guide": "用户引导", - "App system setting": "系统配置", "http body placeholder": "与APIFox相同的语法", "textEditor": "文本加工", "textEditor intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。" diff --git a/projects/app/src/pages/api/core/dataset/exportAll.ts b/projects/app/src/pages/api/core/dataset/exportAll.ts index b0d9453e00c..4fed878ac92 100644 --- a/projects/app/src/pages/api/core/dataset/exportAll.ts +++ b/projects/app/src/pages/api/core/dataset/exportAll.ts @@ -71,7 +71,6 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex cursor.on('end', () => { cursor.close(); res.end(); - updateExportDatasetLimit(teamId); }); cursor.on('error', (err) => { @@ -79,6 +78,8 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex res.status(500); res.end(); }); + + updateExportDatasetLimit(teamId); } catch (err) { res.status(500); addLog.error(`export dataset error`, err); diff --git a/projects/app/src/pages/dataset/list/index.tsx b/projects/app/src/pages/dataset/list/index.tsx index 399c5c46145..504c51e7f41 100644 --- a/projects/app/src/pages/dataset/list/index.tsx +++ b/projects/app/src/pages/dataset/list/index.tsx @@ -92,11 +92,17 @@ const Kb = () => { setLoading(true); await checkTeamExportDatasetLimit(dataset._id); - xmlDownloadFetch({ + await xmlDownloadFetch({ url: `/api/core/dataset/exportAll?datasetId=${dataset._id}`, filename: `${dataset.name}.csv` }); }, + onSuccess() { + toast({ + status: 'success', + title: t('core.dataset.Start export') + }); + }, onSettled() { setLoading(false); }, diff --git a/projects/app/src/web/common/api/xmlFetch.ts b/projects/app/src/web/common/api/xmlFetch.ts index 238e77c9f82..b8174a0ee10 100644 --- a/projects/app/src/web/common/api/xmlFetch.ts +++ b/projects/app/src/web/common/api/xmlFetch.ts @@ -1,20 +1,31 @@ import { getToken } from '@/web/support/user/auth'; +import { hasHttps } from '@fastgpt/web/common/system/utils'; -export const xmlDownloadFetch = ({ url, filename }: { url: string; filename: string }) => { - const xhr = new XMLHttpRequest(); - xhr.open('GET', url, true); - xhr.setRequestHeader('token', getToken()); - xhr.responseType = 'blob'; - xhr.onload = function (e) { - if (this.status == 200) { - const blob = this.response; - const a = document.createElement('a'); - const url = URL.createObjectURL(blob); - a.href = url; - a.download = filename; - a.click(); - window.URL.revokeObjectURL(url); - } - }; - xhr.send(); +export const xmlDownloadFetch = async ({ url, filename }: { url: string; filename: string }) => { + if (hasHttps()) { + const a = document.createElement('a'); + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + } else { + const response = await fetch(url, { + headers: { + token: `${getToken()}` + } + }); + if (!response.ok) throw new Error('Network response was not ok.'); + + const blob = await response.blob(); + const downloadUrl = window.URL.createObjectURL(blob); + const a = document.createElement('a'); + a.style.display = 'none'; // 隐藏元素 + a.href = downloadUrl; + a.download = filename; + document.body.appendChild(a); + a.click(); // 模拟用户点击 + document.body.removeChild(a); + window.URL.revokeObjectURL(downloadUrl); // 清理生成的URL + } };