-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
181 changed files
with
2,484 additions
and
1,537 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
name: Build FastGPT images in Personal warehouse | ||
on: | ||
workflow_dispatch: | ||
push: | ||
paths: | ||
- 'projects/app/**' | ||
- 'packages/**' | ||
branches: | ||
- 'main' | ||
jobs: | ||
build-fastgpt-images: | ||
runs-on: ubuntu-20.04 | ||
if: github.repository != 'labring/FastGPT' | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v2 | ||
with: | ||
driver-opts: network=host | ||
- name: Cache Docker layers | ||
uses: actions/cache@v3 | ||
with: | ||
path: /tmp/.buildx-cache | ||
key: ${{ runner.os }}-buildx-${{ github.sha }} | ||
restore-keys: | | ||
${{ runner.os }}-buildx- | ||
- name: Login to GitHub Container Registry | ||
uses: docker/login-action@v2 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.repository_owner }} | ||
password: ${{ secrets.GH_PAT }} | ||
- name: Set DOCKER_REPO_TAGGED based on branch or tag | ||
run: | | ||
echo "DOCKER_REPO_TAGGED=ghcr.io/${{ github.repository_owner }}/fastgpt:latest" >> $GITHUB_ENV | ||
- name: Build and publish image for main branch or tag push event | ||
env: | ||
DOCKER_REPO_TAGGED: ${{ env.DOCKER_REPO_TAGGED }} | ||
run: | | ||
docker buildx build \ | ||
--build-arg name=app \ | ||
--label "org.opencontainers.image.source=https://github.com/${{ github.repository_owner }}/FastGPT" \ | ||
--label "org.opencontainers.image.description=fastgpt image" \ | ||
--push \ | ||
--cache-from=type=local,src=/tmp/.buildx-cache \ | ||
--cache-to=type=local,dest=/tmp/.buildx-cache \ | ||
-t ${DOCKER_REPO_TAGGED} \ | ||
-f Dockerfile \ | ||
. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import { getErrText } from '../error/utils'; | ||
import { countPromptTokens } from './tiktoken'; | ||
|
||
/** | ||
* text split into chunks | ||
* maxLen - one chunk len. max: 3500 | ||
* overlapLen - The size of the before and after Text | ||
* maxLen > overlapLen | ||
* markdown | ||
*/ | ||
export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => { | ||
const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props; | ||
const tempMarker = 'SPLIT_HERE_SPLIT_HERE'; | ||
|
||
const stepReg: Record<number, RegExp> = { | ||
0: /^(#\s[^\n]+)\n/gm, | ||
1: /^(##\s[^\n]+)\n/gm, | ||
2: /^(###\s[^\n]+)\n/gm, | ||
3: /^(####\s[^\n]+)\n/gm, | ||
|
||
4: /(\n\n)/g, | ||
5: /([\n])/g, | ||
6: /[。]|(?!<[^a-zA-Z])\.\s/g, | ||
7: /([!?]|!\s|\?\s)/g, | ||
8: /([;]|;\s)/g, | ||
9: /([,]|,\s)/g | ||
}; | ||
|
||
const splitTextRecursively = ({ | ||
text = '', | ||
step, | ||
lastChunk, | ||
overlayChunk | ||
}: { | ||
text: string; | ||
step: number; | ||
lastChunk: string; | ||
overlayChunk: string; | ||
}) => { | ||
if (text.length <= maxLen) { | ||
return [text]; | ||
} | ||
const reg = stepReg[step]; | ||
const isMarkdownSplit = step < 4; | ||
|
||
if (!reg) { | ||
// use slice-maxLen to split text | ||
const chunks: string[] = []; | ||
let chunk = ''; | ||
for (let i = 0; i < text.length; i += maxLen - overlapLen) { | ||
chunk = text.slice(i, i + maxLen); | ||
chunks.push(chunk); | ||
} | ||
return chunks; | ||
} | ||
|
||
// split text by special char | ||
const splitTexts = text | ||
.replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`) | ||
.split(`${tempMarker}`) | ||
.filter((part) => part); | ||
|
||
let chunks: string[] = []; | ||
for (let i = 0; i < splitTexts.length; i++) { | ||
let text = splitTexts[i]; | ||
let chunkToken = countPromptTokens(lastChunk, ''); | ||
const textToken = countPromptTokens(text, ''); | ||
|
||
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen) | ||
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) { | ||
// last chunk is too large, push it to chunks, not add to next chunk | ||
if (chunkToken > maxLen * 0.7) { | ||
chunks.push(lastChunk); | ||
lastChunk = ''; | ||
overlayChunk = ''; | ||
} | ||
// chunk is small, insert to next chunks | ||
const innerChunks = splitTextRecursively({ | ||
text, | ||
step: step + 1, | ||
lastChunk, | ||
overlayChunk | ||
}); | ||
if (innerChunks.length === 0) continue; | ||
chunks = chunks.concat(innerChunks); | ||
lastChunk = ''; | ||
overlayChunk = ''; | ||
continue; | ||
} | ||
|
||
// size less than maxLen, push text to last chunk | ||
lastChunk += text; | ||
chunkToken += textToken; // Definitely less than 1.4 * maxLen | ||
|
||
// size over lapLen, push it to next chunk | ||
if ( | ||
overlapLen !== 0 && | ||
!isMarkdownSplit && | ||
chunkToken >= maxLen - overlapLen && | ||
textToken < overlapLen | ||
) { | ||
overlayChunk += text; | ||
} | ||
if (chunkToken >= maxLen) { | ||
chunks.push(lastChunk); | ||
lastChunk = overlayChunk; | ||
overlayChunk = ''; | ||
} | ||
} | ||
|
||
/* If the last chunk is independent, it needs to be push chunks. */ | ||
if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) { | ||
chunks.push(lastChunk); | ||
} | ||
|
||
return chunks; | ||
}; | ||
|
||
try { | ||
const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' }); | ||
|
||
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0); | ||
|
||
return { | ||
chunks, | ||
tokens | ||
}; | ||
} catch (err) { | ||
throw new Error(getErrText(err)); | ||
} | ||
}; |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import type { Tiktoken } from 'js-tiktoken'; | ||
|
||
declare global { | ||
var TikToken: Tiktoken; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
export type PostReRankProps = { | ||
query: string; | ||
inputs: { id: string; text: string }[]; | ||
}; | ||
export type PostReRankResponse = { id: string; score: number }[]; |
8 changes: 4 additions & 4 deletions
8
...cts/app/src/utils/common/adapt/message.ts → packages/global/core/chat/adapt.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import { DatasetDataIndexItemType } from './type'; | ||
|
||
/* ================= dataset ===================== */ | ||
|
||
/* ================= collection ===================== */ | ||
|
||
/* ================= data ===================== */ | ||
export type PgSearchRawType = { | ||
id: string; | ||
team_id: string; | ||
tmb_id: string; | ||
collection_id: string; | ||
data_id: string; | ||
score: number; | ||
}; | ||
export type PushDatasetDataChunkProps = { | ||
q: string; // embedding content | ||
a?: string; // bonus content | ||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[]; | ||
}; |
Oops, something went wrong.