From 5883d586364c0a4debae41cd56c065575eb8125f Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 11:00:02 +0100 Subject: [PATCH 01/17] chore(community): Bump faker dev dep to 8.4.1 --- libs/langchain-community/package.json | 2 +- yarn.lock | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 7b826ad1e106..fafedbaea34f 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -68,7 +68,7 @@ "@cloudflare/workers-types": "^4.20230922.0", "@datastax/astra-db-ts": "^1.0.1", "@elastic/elasticsearch": "^8.4.0", - "@faker-js/faker": "^7.6.0", + "@faker-js/faker": "8.4.1", "@getmetal/metal-sdk": "^4.0.0", "@getzep/zep-cloud": "^1.0.6", "@getzep/zep-js": "^0.9.0", diff --git a/yarn.lock b/yarn.lock index 5e6b6ed60a57..6aa947124f55 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10048,6 +10048,13 @@ __metadata: languageName: node linkType: hard +"@faker-js/faker@npm:8.4.1, @faker-js/faker@npm:^8.4.1": + version: 8.4.1 + resolution: "@faker-js/faker@npm:8.4.1" + checksum: d802d531f8929562715adc279cfec763c9a4bc596ec67b0ce43fd0ae61b285d2b0eec6f1f4aa852452a63721a842fe7e81926dce7bd92acca94b01e2a1f55f5a + languageName: node + linkType: hard + "@faker-js/faker@npm:^7.6.0": version: 7.6.0 resolution: "@faker-js/faker@npm:7.6.0" @@ -10069,13 +10076,6 @@ __metadata: languageName: node linkType: hard -"@faker-js/faker@npm:^8.4.1": - version: 8.4.1 - resolution: "@faker-js/faker@npm:8.4.1" - checksum: d802d531f8929562715adc279cfec763c9a4bc596ec67b0ce43fd0ae61b285d2b0eec6f1f4aa852452a63721a842fe7e81926dce7bd92acca94b01e2a1f55f5a - languageName: node - linkType: hard - "@fastify/busboy@npm:^1.2.1": version: 1.2.1 resolution: "@fastify/busboy@npm:1.2.1" @@ -11709,7 +11709,7 @@ __metadata: "@cloudflare/workers-types": ^4.20230922.0 "@datastax/astra-db-ts": ^1.0.1 "@elastic/elasticsearch": ^8.4.0 - "@faker-js/faker": ^7.6.0 + "@faker-js/faker": 8.4.1 "@getmetal/metal-sdk": ^4.0.0 "@getzep/zep-cloud": ^1.0.6 "@getzep/zep-js": ^0.9.0 From 39f59df34ef4dc6eb6f3c1c098c3bc24338673df Mon Sep 17 00:00:00 2001 From: xteam-ivoneijr Date: Thu, 31 Aug 2023 22:50:03 -0300 Subject: [PATCH 02/17] feat(community): Add jira document loader - langchain-ai#2433 --- .../src/document_loaders/web/jira.ts | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 libs/langchain-community/src/document_loaders/web/jira.ts diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts new file mode 100644 index 000000000000..69cd7366ef32 --- /dev/null +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -0,0 +1,378 @@ +import { Document } from "../../document.js"; +import { BaseDocumentLoader } from "../base.js"; + +type JiraStatusCategory = { + self: string + id: number + key: string + colorName: string + name: string +} + +type JiraStatus = { + self: string + description: string + iconUrl: string + name: string + id: string + statusCategory: JiraStatusCategory +} + +type JiraUser = { + accountId: string + accountType: string + active: boolean + avatarUrls: { + '16x16': string + '24x24': string + '32x32': string + '48x48': string + } + displayName: string + emailAddress: string + self: string + timeZone: string +} + +type JiraIssueType = { + avatarId: number + description: string + entityId: string + hierarchyLevel: number + iconUrl: string + id: string + name: string + self: string + subtask: boolean +} + +type JiraPriority = { + iconUrl: string + id: string + name: string + self: string +} + +type JiraProgress = { + progress: number + total: number + percent?: number +} + +export type JiraProject = { + avatarUrls: { + '16x16': string + '24x24': string + '32x32': string + '48x48': string + } + id: string + key: string + name: string + projectTypeKey: string + self: string + simplified: boolean +} + +type JiraSubTask = { + id: string + key: string + self: string + fields: { + issuetype: JiraIssueType + priority: JiraPriority + status: JiraStatus + summary: string + } +} + +type JiraIssueLinkType = { + id: string + name: string + inward: string + outward: string + self: string +} + +export type JiraBriefIssue = { + id: string + key: string + self: string + fields: { + summary: string + status: JiraStatus + priority: JiraPriority + issuetype: JiraIssueType + } +} + +type JiraIssueLink = { + id: string + self: string + type: JiraIssueLinkType + inwardIssue?: JiraBriefIssue + outwardIssue?: JiraBriefIssue +} + +export type JiraIssue = { + expand: string + id: string + self: string + key: string + fields: { + assignee?: JiraUser + created: string + description: string + issuelinks: JiraIssueLink[] + issuetype: JiraIssueType + labels?: string[] + priority: JiraPriority + progress: JiraProgress + project: JiraProject + reporter?: JiraUser + creator: JiraUser + resolutiondate?: string + status: JiraStatus + subtasks: JiraSubTask[] + summary: string + timeestimate?: number + timespent?: number + updated: string + duedate?: string + parent?: JiraBriefIssue + } +} + +export type JiraAPIResponse = { + expand: string + startAt: number + maxResults: number + total: number + issues: JiraIssue[] +} + +/** + * Interface representing the parameters for configuring the + * JiraProjectLoader. + */ +export interface JiraProjectLoaderParams { + baseUrl: string + projectKey: string + username: string + accessToken: string + limit?: number +} + +const API_ENDPOINTS = { + SEARCH: "/rest/api/2/search" +}; + +/** + * Class representing a document loader for loading pages from Confluence. + */ +export class JiraProjectLoader extends BaseDocumentLoader { + public readonly baseUrl: string; + public readonly projectKey: string; + public readonly username: string; + public readonly accessToken: string; + public readonly authorizationHeader: string; + public readonly limit: number; + + constructor({ + baseUrl, + projectKey, + username, + accessToken, + limit = 100 + }: JiraProjectLoaderParams) { + super(); + this.baseUrl = baseUrl; + this.projectKey = projectKey; + this.username = username; + this.accessToken = accessToken; + this.authorizationHeader = this.buildAuthorizationHeader(); + this.limit = limit; + } + + private buildAuthorizationHeader(): string { + return `Basic ${Buffer.from( + `${this.username}:${this.accessToken}` + ).toString('base64')}`; + } + + public async load(): Promise { + const allIssues: JiraIssue[] = []; + + try { + for await (const issues of this.fetchIssues()) { + allIssues.push(...issues); + } + + return allIssues.map(issue => this.documentFromIssue(issue)); + } catch (error) { + console.error('Error:', error); + return []; + } + } + + protected async *fetchIssues(): AsyncIterable { + const url = `${this.baseUrl}${API_ENDPOINTS.SEARCH}`; + let startAt = 0; + + while (true) { + try { + const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limit}`; + const options = { + method: 'GET', + headers: { + Authorization: this.authorizationHeader, + Accept: 'application/json' + } + }; + + const response = await fetch(pageUrl, options); + const data: JiraAPIResponse = await response.json(); + + if (!data.issues || data.issues.length === 0) break; + + yield data.issues; + startAt += this.limit; + + } catch (error) { + console.error(error); + yield []; + } + } + } + + private documentFromIssue(issue: JiraIssue): Document { + return new Document({ + pageContent: this.formatIssueInfo({ + issue, + baseUrl: this.baseUrl + }), + metadata: { + id: issue.id, + baseUrl: this.baseUrl, + projectKey: this.projectKey + } + }); + } + + private formatIssueInfo({ + issue, + baseUrl, + }: { + issue: JiraIssue + baseUrl: string + }): string { + let text = `Issue: ${this.formatMainIssueInfoText({ issue, baseUrl })}\n` + text += `Project: ${issue.fields.project.name} (${issue.fields.project.key}, ID ${issue.fields.project.id})\n` + text += `Status: ${issue.fields.status.name}\n` + text += `Priority: ${issue.fields.priority.name}\n` + text += `Type: ${issue.fields.issuetype.name}\n` + text += `Creator: ${issue.fields.creator.displayName}\n` + + if (issue.fields.labels && issue.fields.labels.length > 0) { + text += `Labels: ${issue.fields.labels.join(', ')}\n` + } + + text += `Created: ${issue.fields.created}\n` + text += `Updated: ${issue.fields.updated}\n` + + if (issue.fields.reporter) { + text += `Reporter: ${issue.fields.reporter.displayName}\n` + } + + text += `Assignee: ${issue.fields.assignee?.displayName ?? 'Unassigned'}\n` + + if (issue.fields.duedate) { + text += `Due Date: ${issue.fields.duedate}\n` + } + + if (issue.fields.timeestimate) { + text += `Time Estimate: ${issue.fields.timeestimate}\n` + } + + if (issue.fields.timespent) { + text += `Time Spent: ${issue.fields.timespent}\n` + } + + if (issue.fields.resolutiondate) { + text += `Resolution Date: ${issue.fields.resolutiondate}\n` + } + + if (issue.fields.description) { + text += `Description: ${issue.fields.description}\n` + } + + if (issue.fields.progress.percent) { + text += `Progress: ${issue.fields.progress.percent}%\n` + } + + if (issue.fields.parent) { + text += `Parent Issue: ${this.formatMainIssueInfoText({ + issue: issue.fields.parent, + baseUrl, + })}\n` + } + + if (issue.fields.subtasks.length > 0) { + text += `Subtasks:\n` + issue.fields.subtasks.forEach((subtask) => { + text += ` - ${this.formatMainIssueInfoText({ + issue: subtask, + baseUrl, + })}\n` + }) + } + + if (issue.fields.issuelinks.length > 0) { + text += `Issue Links:\n` + issue.fields.issuelinks.forEach((link) => { + text += ` - ${link.type.name}\n` + if (link.inwardIssue) { + text += ` - ${this.formatMainIssueInfoText({ + issue: link.inwardIssue, + baseUrl, + })}\n` + } + if (link.outwardIssue) { + text += ` - ${this.formatMainIssueInfoText({ + issue: link.outwardIssue, + baseUrl, + })}\n` + } + }) + } + + return text + } + + private getLinkToIssue({ + issueKey, + baseUrl, + }: { + issueKey: string + baseUrl: string + }): string { + return `${baseUrl}/browse/${issueKey}` + } + + private formatMainIssueInfoText({ + issue, + baseUrl, + }: { + issue: JiraIssue | JiraBriefIssue + baseUrl: string + }): string { + const link = this.getLinkToIssue({ + issueKey: issue.key, + baseUrl, + }) + + const text = `${issue.key} (ID ${issue.id}) - ${issue.fields.summary} (${link})` + + return text + } +} From b8849e3db839a45fa99ee690d98a88c7f7415704 Mon Sep 17 00:00:00 2001 From: xteam-ivoneijr Date: Thu, 31 Aug 2023 23:13:48 -0300 Subject: [PATCH 03/17] docs(community): add jira document loader documentation --- .../document_loaders/web_loaders/jira.mdx | 23 +++++++++++++++++++ examples/src/document_loaders/jira.ts | 20 ++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx create mode 100644 examples/src/document_loaders/jira.ts diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx new file mode 100644 index 000000000000..26478514ca30 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx @@ -0,0 +1,23 @@ +--- +sidebar_class_name: node-only +--- + +# Jira + +:::tip Compatibility +Only available on Node.js. +::: + +This covers how to load document objects from issues in a Jira projects. + +## Credentials + +- You'll need to set up an access token and provide it along with your jira username in order to authenticate the request +- You'll also need the `project key` for the project containing the issues to load as documents. + +## Usage + +import CodeBlock from "@theme/CodeBlock"; +import Example from "@examples/document_loaders/jira.ts"; + +{Example} diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts new file mode 100644 index 000000000000..1fb904bf560e --- /dev/null +++ b/examples/src/document_loaders/jira.ts @@ -0,0 +1,20 @@ +import { JiraProjectLoader } from "langchain/document_loaders/web/jira"; + +const username = process.env.CONFLUENCE_USERNAME; +const accessToken = process.env.CONFLUENCE_ACCESS_TOKEN; + +if (username && accessToken) { + const loader = new JiraProjectLoader({ + baseUrl: "https://example.atlassian.net/wiki", + projectKey: "PI", + username, + accessToken, + }); + + const documents = await loader.load(); + console.log(documents); +} else { + console.log( + "You must provide a username and access token to run this example." + ); +} From 23cac7ebcbab484461afe71c612379000c1f6ef2 Mon Sep 17 00:00:00 2001 From: xteam-ivoneijr Date: Thu, 31 Aug 2023 23:14:44 -0300 Subject: [PATCH 04/17] chore(community): change environment variables example naming --- examples/src/document_loaders/jira.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts index 1fb904bf560e..0d9747903192 100644 --- a/examples/src/document_loaders/jira.ts +++ b/examples/src/document_loaders/jira.ts @@ -1,7 +1,7 @@ import { JiraProjectLoader } from "langchain/document_loaders/web/jira"; -const username = process.env.CONFLUENCE_USERNAME; -const accessToken = process.env.CONFLUENCE_ACCESS_TOKEN; +const username = process.env.JIRA_USERNAME; +const accessToken = process.env.JIRA_ACCESS_TOKEN; if (username && accessToken) { const loader = new JiraProjectLoader({ From 0cea35dafbdfc9ae3b21696ebc1bc7aaa1d8461f Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Wed, 13 Nov 2024 14:49:36 +0100 Subject: [PATCH 05/17] chore(community): Fix jira document loader after rebase --- examples/src/document_loaders/jira.ts | 2 +- libs/langchain-community/.gitignore | 4 ++++ libs/langchain-community/langchain.config.js | 1 + libs/langchain-community/package.json | 13 +++++++++++++ .../src/document_loaders/web/jira.ts | 4 ++-- libs/langchain-community/src/load/import_map.ts | 1 + 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts index 0d9747903192..12af6fb5777c 100644 --- a/examples/src/document_loaders/jira.ts +++ b/examples/src/document_loaders/jira.ts @@ -1,4 +1,4 @@ -import { JiraProjectLoader } from "langchain/document_loaders/web/jira"; +import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira"; const username = process.env.JIRA_USERNAME; const accessToken = process.env.JIRA_ACCESS_TOKEN; diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index e6ae5fa54a4f..def745b41f80 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -902,6 +902,10 @@ document_loaders/web/imsdb.cjs document_loaders/web/imsdb.js document_loaders/web/imsdb.d.ts document_loaders/web/imsdb.d.cts +document_loaders/web/jira.cjs +document_loaders/web/jira.js +document_loaders/web/jira.d.ts +document_loaders/web/jira.d.cts document_loaders/web/figma.cjs document_loaders/web/figma.js document_loaders/web/figma.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index 4a402c6941e8..7d735fa968ba 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -281,6 +281,7 @@ export const config = { "document_loaders/web/gitbook": "document_loaders/web/gitbook", "document_loaders/web/hn": "document_loaders/web/hn", "document_loaders/web/imsdb": "document_loaders/web/imsdb", + "document_loaders/web/jira": "document_loaders/web/jira", "document_loaders/web/figma": "document_loaders/web/figma", "document_loaders/web/firecrawl": "document_loaders/web/firecrawl", "document_loaders/web/github": "document_loaders/web/github", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index fafedbaea34f..423341bee8dc 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -2746,6 +2746,15 @@ "import": "./document_loaders/web/imsdb.js", "require": "./document_loaders/web/imsdb.cjs" }, + "./document_loaders/web/jira": { + "types": { + "import": "./document_loaders/web/jira.d.ts", + "require": "./document_loaders/web/jira.d.cts", + "default": "./document_loaders/web/jira.d.ts" + }, + "import": "./document_loaders/web/jira.js", + "require": "./document_loaders/web/jira.cjs" + }, "./document_loaders/web/figma": { "types": { "import": "./document_loaders/web/figma.d.ts", @@ -4005,6 +4014,10 @@ "document_loaders/web/imsdb.js", "document_loaders/web/imsdb.d.ts", "document_loaders/web/imsdb.d.cts", + "document_loaders/web/jira.cjs", + "document_loaders/web/jira.js", + "document_loaders/web/jira.d.ts", + "document_loaders/web/jira.d.cts", "document_loaders/web/figma.cjs", "document_loaders/web/figma.js", "document_loaders/web/figma.d.ts", diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 69cd7366ef32..95e583f855e2 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -1,5 +1,5 @@ -import { Document } from "../../document.js"; -import { BaseDocumentLoader } from "../base.js"; +import { Document } from "@langchain/core/documents"; +import { BaseDocumentLoader } from "@langchain/core/document_loaders/base"; type JiraStatusCategory = { self: string diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 8b3b734a82c1..26aa80ca068b 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -73,6 +73,7 @@ export * as indexes__base from "../indexes/base.js"; export * as indexes__memory from "../indexes/memory.js"; export * as document_loaders__web__airtable from "../document_loaders/web/airtable.js"; export * as document_loaders__web__html from "../document_loaders/web/html.js"; +export * as document_loaders__web__jira from "../document_loaders/web/jira.js"; export * as document_loaders__web__searchapi from "../document_loaders/web/searchapi.js"; export * as document_loaders__web__serpapi from "../document_loaders/web/serpapi.js"; export * as document_loaders__web__sort_xyz_blockchain from "../document_loaders/web/sort_xyz_blockchain.js"; From 9d5526155397bd19bfb28b8b136ece0ff494a92a Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Wed, 13 Nov 2024 18:08:19 +0100 Subject: [PATCH 06/17] chore(community): Reduce visibility of access token in JiraProjectLoader --- libs/langchain-community/src/document_loaders/web/jira.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 95e583f855e2..174415edd587 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -171,11 +171,10 @@ const API_ENDPOINTS = { * Class representing a document loader for loading pages from Confluence. */ export class JiraProjectLoader extends BaseDocumentLoader { + private readonly accessToken: string; public readonly baseUrl: string; public readonly projectKey: string; public readonly username: string; - public readonly accessToken: string; - public readonly authorizationHeader: string; public readonly limit: number; constructor({ @@ -190,7 +189,6 @@ export class JiraProjectLoader extends BaseDocumentLoader { this.projectKey = projectKey; this.username = username; this.accessToken = accessToken; - this.authorizationHeader = this.buildAuthorizationHeader(); this.limit = limit; } @@ -216,6 +214,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { } protected async *fetchIssues(): AsyncIterable { + const authorizationHeader = this.buildAuthorizationHeader(); const url = `${this.baseUrl}${API_ENDPOINTS.SEARCH}`; let startAt = 0; @@ -225,7 +224,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { const options = { method: 'GET', headers: { - Authorization: this.authorizationHeader, + Authorization: authorizationHeader, Accept: 'application/json' } }; From de3646869b8010bec90d0d2f4bf8f1722d006322 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Wed, 13 Nov 2024 18:21:32 +0100 Subject: [PATCH 07/17] chore(community): Reformat document loader jira.ts --- .../src/document_loaders/web/jira.ts | 350 +++++++++--------- 1 file changed, 177 insertions(+), 173 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 174415edd587..6fffa0d3dcac 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -2,179 +2,184 @@ import { Document } from "@langchain/core/documents"; import { BaseDocumentLoader } from "@langchain/core/document_loaders/base"; type JiraStatusCategory = { - self: string - id: number - key: string - colorName: string - name: string -} + self: string; + id: number; + key: string; + colorName: string; + name: string; +}; type JiraStatus = { - self: string - description: string - iconUrl: string - name: string - id: string - statusCategory: JiraStatusCategory -} + self: string; + description: string; + iconUrl: string; + name: string; + id: string; + statusCategory: JiraStatusCategory; +}; type JiraUser = { - accountId: string - accountType: string - active: boolean + accountId: string; + accountType: string; + active: boolean; avatarUrls: { - '16x16': string - '24x24': string - '32x32': string - '48x48': string - } - displayName: string - emailAddress: string - self: string - timeZone: string -} + "16x16": string; + "24x24": string; + "32x32": string; + "48x48": string; + }; + displayName: string; + emailAddress: string; + self: string; + timeZone: string; +}; type JiraIssueType = { - avatarId: number - description: string - entityId: string - hierarchyLevel: number - iconUrl: string - id: string - name: string - self: string - subtask: boolean -} + avatarId: number; + description: string; + entityId: string; + hierarchyLevel: number; + iconUrl: string; + id: string; + name: string; + self: string; + subtask: boolean; +}; type JiraPriority = { - iconUrl: string - id: string - name: string - self: string -} + iconUrl: string; + id: string; + name: string; + self: string; +}; type JiraProgress = { - progress: number - total: number - percent?: number -} + progress: number; + total: number; + percent?: number; +}; export type JiraProject = { avatarUrls: { - '16x16': string - '24x24': string - '32x32': string - '48x48': string - } - id: string - key: string - name: string - projectTypeKey: string - self: string - simplified: boolean -} + "16x16": string; + "24x24": string; + "32x32": string; + "48x48": string; + }; + id: string; + key: string; + name: string; + projectTypeKey: string; + self: string; + simplified: boolean; +}; type JiraSubTask = { - id: string - key: string - self: string + id: string; + key: string; + self: string; fields: { - issuetype: JiraIssueType - priority: JiraPriority - status: JiraStatus - summary: string - } -} + issuetype: JiraIssueType; + priority: JiraPriority; + status: JiraStatus; + summary: string; + }; +}; type JiraIssueLinkType = { - id: string - name: string - inward: string - outward: string - self: string -} + id: string; + name: string; + inward: string; + outward: string; + self: string; +}; export type JiraBriefIssue = { - id: string - key: string - self: string + id: string; + key: string; + self: string; fields: { - summary: string - status: JiraStatus - priority: JiraPriority - issuetype: JiraIssueType - } -} + summary: string; + status: JiraStatus; + priority: JiraPriority; + issuetype: JiraIssueType; + }; +}; type JiraIssueLink = { - id: string - self: string - type: JiraIssueLinkType - inwardIssue?: JiraBriefIssue - outwardIssue?: JiraBriefIssue -} + id: string; + self: string; + type: JiraIssueLinkType; + inwardIssue?: JiraBriefIssue; + outwardIssue?: JiraBriefIssue; +}; export type JiraIssue = { - expand: string - id: string - self: string - key: string + expand: string; + id: string; + self: string; + key: string; fields: { - assignee?: JiraUser - created: string - description: string - issuelinks: JiraIssueLink[] - issuetype: JiraIssueType - labels?: string[] - priority: JiraPriority - progress: JiraProgress - project: JiraProject - reporter?: JiraUser - creator: JiraUser - resolutiondate?: string - status: JiraStatus - subtasks: JiraSubTask[] - summary: string - timeestimate?: number - timespent?: number - updated: string - duedate?: string - parent?: JiraBriefIssue - } -} + assignee?: JiraUser; + created: string; + description: string; + issuelinks: JiraIssueLink[]; + issuetype: JiraIssueType; + labels?: string[]; + priority: JiraPriority; + progress: JiraProgress; + project: JiraProject; + reporter?: JiraUser; + creator: JiraUser; + resolutiondate?: string; + status: JiraStatus; + subtasks: JiraSubTask[]; + summary: string; + timeestimate?: number; + timespent?: number; + updated: string; + duedate?: string; + parent?: JiraBriefIssue; + }; +}; export type JiraAPIResponse = { - expand: string - startAt: number - maxResults: number - total: number - issues: JiraIssue[] -} + expand: string; + startAt: number; + maxResults: number; + total: number; + issues: JiraIssue[]; +}; /** * Interface representing the parameters for configuring the * JiraProjectLoader. */ export interface JiraProjectLoaderParams { - baseUrl: string - projectKey: string - username: string - accessToken: string - limit?: number + baseUrl: string; + projectKey: string; + username: string; + accessToken: string; + limit?: number; } const API_ENDPOINTS = { - SEARCH: "/rest/api/2/search" + SEARCH: "/rest/api/2/search", }; /** * Class representing a document loader for loading pages from Confluence. */ export class JiraProjectLoader extends BaseDocumentLoader { + private readonly accessToken: string; + public readonly baseUrl: string; + public readonly projectKey: string; + public readonly username: string; + public readonly limit: number; constructor({ @@ -182,7 +187,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { projectKey, username, accessToken, - limit = 100 + limit = 100, }: JiraProjectLoaderParams) { super(); this.baseUrl = baseUrl; @@ -195,7 +200,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { private buildAuthorizationHeader(): string { return `Basic ${Buffer.from( `${this.username}:${this.accessToken}` - ).toString('base64')}`; + ).toString("base64")}`; } public async load(): Promise { @@ -206,9 +211,9 @@ export class JiraProjectLoader extends BaseDocumentLoader { allIssues.push(...issues); } - return allIssues.map(issue => this.documentFromIssue(issue)); + return allIssues.map((issue) => this.documentFromIssue(issue)); } catch (error) { - console.error('Error:', error); + console.error("Error:", error); return []; } } @@ -222,11 +227,11 @@ export class JiraProjectLoader extends BaseDocumentLoader { try { const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limit}`; const options = { - method: 'GET', + method: "GET", headers: { Authorization: authorizationHeader, - Accept: 'application/json' - } + Accept: "application/json", + }, }; const response = await fetch(pageUrl, options); @@ -236,7 +241,6 @@ export class JiraProjectLoader extends BaseDocumentLoader { yield data.issues; startAt += this.limit; - } catch (error) { console.error(error); yield []; @@ -248,13 +252,13 @@ export class JiraProjectLoader extends BaseDocumentLoader { return new Document({ pageContent: this.formatIssueInfo({ issue, - baseUrl: this.baseUrl + baseUrl: this.baseUrl, }), metadata: { id: issue.id, baseUrl: this.baseUrl, - projectKey: this.projectKey - } + projectKey: this.projectKey, + }, }); } @@ -262,116 +266,116 @@ export class JiraProjectLoader extends BaseDocumentLoader { issue, baseUrl, }: { - issue: JiraIssue - baseUrl: string + issue: JiraIssue; + baseUrl: string; }): string { - let text = `Issue: ${this.formatMainIssueInfoText({ issue, baseUrl })}\n` - text += `Project: ${issue.fields.project.name} (${issue.fields.project.key}, ID ${issue.fields.project.id})\n` - text += `Status: ${issue.fields.status.name}\n` - text += `Priority: ${issue.fields.priority.name}\n` - text += `Type: ${issue.fields.issuetype.name}\n` - text += `Creator: ${issue.fields.creator.displayName}\n` + let text = `Issue: ${this.formatMainIssueInfoText({ issue, baseUrl })}\n`; + text += `Project: ${issue.fields.project.name} (${issue.fields.project.key}, ID ${issue.fields.project.id})\n`; + text += `Status: ${issue.fields.status.name}\n`; + text += `Priority: ${issue.fields.priority.name}\n`; + text += `Type: ${issue.fields.issuetype.name}\n`; + text += `Creator: ${issue.fields.creator.displayName}\n`; if (issue.fields.labels && issue.fields.labels.length > 0) { - text += `Labels: ${issue.fields.labels.join(', ')}\n` + text += `Labels: ${issue.fields.labels.join(", ")}\n`; } - text += `Created: ${issue.fields.created}\n` - text += `Updated: ${issue.fields.updated}\n` + text += `Created: ${issue.fields.created}\n`; + text += `Updated: ${issue.fields.updated}\n`; if (issue.fields.reporter) { - text += `Reporter: ${issue.fields.reporter.displayName}\n` + text += `Reporter: ${issue.fields.reporter.displayName}\n`; } - text += `Assignee: ${issue.fields.assignee?.displayName ?? 'Unassigned'}\n` + text += `Assignee: ${issue.fields.assignee?.displayName ?? "Unassigned"}\n`; if (issue.fields.duedate) { - text += `Due Date: ${issue.fields.duedate}\n` + text += `Due Date: ${issue.fields.duedate}\n`; } if (issue.fields.timeestimate) { - text += `Time Estimate: ${issue.fields.timeestimate}\n` + text += `Time Estimate: ${issue.fields.timeestimate}\n`; } if (issue.fields.timespent) { - text += `Time Spent: ${issue.fields.timespent}\n` + text += `Time Spent: ${issue.fields.timespent}\n`; } if (issue.fields.resolutiondate) { - text += `Resolution Date: ${issue.fields.resolutiondate}\n` + text += `Resolution Date: ${issue.fields.resolutiondate}\n`; } if (issue.fields.description) { - text += `Description: ${issue.fields.description}\n` + text += `Description: ${issue.fields.description}\n`; } if (issue.fields.progress.percent) { - text += `Progress: ${issue.fields.progress.percent}%\n` + text += `Progress: ${issue.fields.progress.percent}%\n`; } if (issue.fields.parent) { text += `Parent Issue: ${this.formatMainIssueInfoText({ issue: issue.fields.parent, baseUrl, - })}\n` + })}\n`; } if (issue.fields.subtasks.length > 0) { - text += `Subtasks:\n` + text += `Subtasks:\n`; issue.fields.subtasks.forEach((subtask) => { text += ` - ${this.formatMainIssueInfoText({ issue: subtask, baseUrl, - })}\n` - }) + })}\n`; + }); } if (issue.fields.issuelinks.length > 0) { - text += `Issue Links:\n` + text += `Issue Links:\n`; issue.fields.issuelinks.forEach((link) => { - text += ` - ${link.type.name}\n` + text += ` - ${link.type.name}\n`; if (link.inwardIssue) { text += ` - ${this.formatMainIssueInfoText({ issue: link.inwardIssue, baseUrl, - })}\n` + })}\n`; } if (link.outwardIssue) { text += ` - ${this.formatMainIssueInfoText({ issue: link.outwardIssue, baseUrl, - })}\n` + })}\n`; } - }) + }); } - return text + return text; } private getLinkToIssue({ issueKey, baseUrl, }: { - issueKey: string - baseUrl: string + issueKey: string; + baseUrl: string; }): string { - return `${baseUrl}/browse/${issueKey}` + return `${baseUrl}/browse/${issueKey}`; } private formatMainIssueInfoText({ issue, baseUrl, }: { - issue: JiraIssue | JiraBriefIssue - baseUrl: string + issue: JiraIssue | JiraBriefIssue; + baseUrl: string; }): string { const link = this.getLinkToIssue({ issueKey: issue.key, baseUrl, - }) + }); - const text = `${issue.key} (ID ${issue.id}) - ${issue.fields.summary} (${link})` + const text = `${issue.key} (ID ${issue.id}) - ${issue.fields.summary} (${link})`; - return text + return text; } } From 7635eb372d081595701def4e5e1385788a520325 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 15 Nov 2024 17:07:14 +0100 Subject: [PATCH 08/17] =?UTF-8?q?chore(community):=20Rename=20baseUrl=20?= =?UTF-8?q?=E2=86=92=20host=20in=20jira=20document=20loader?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/src/document_loaders/jira.ts | 2 +- .../src/document_loaders/web/jira.ts | 42 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts index 12af6fb5777c..e43ebcb4363a 100644 --- a/examples/src/document_loaders/jira.ts +++ b/examples/src/document_loaders/jira.ts @@ -5,7 +5,7 @@ const accessToken = process.env.JIRA_ACCESS_TOKEN; if (username && accessToken) { const loader = new JiraProjectLoader({ - baseUrl: "https://example.atlassian.net/wiki", + host: "https://example.atlassian.net/wiki", projectKey: "PI", username, accessToken, diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 6fffa0d3dcac..a1d19f1dfb6d 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -156,7 +156,7 @@ export type JiraAPIResponse = { * JiraProjectLoader. */ export interface JiraProjectLoaderParams { - baseUrl: string; + host: string; projectKey: string; username: string; accessToken: string; @@ -174,23 +174,23 @@ export class JiraProjectLoader extends BaseDocumentLoader { private readonly accessToken: string; - public readonly baseUrl: string; + public readonly host: string; public readonly projectKey: string; public readonly username: string; - + public readonly limit: number; constructor({ - baseUrl, + host, projectKey, username, accessToken, limit = 100, }: JiraProjectLoaderParams) { super(); - this.baseUrl = baseUrl; + this.host = host; this.projectKey = projectKey; this.username = username; this.accessToken = accessToken; @@ -220,7 +220,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { protected async *fetchIssues(): AsyncIterable { const authorizationHeader = this.buildAuthorizationHeader(); - const url = `${this.baseUrl}${API_ENDPOINTS.SEARCH}`; + const url = `${this.host}${API_ENDPOINTS.SEARCH}`; let startAt = 0; while (true) { @@ -252,11 +252,11 @@ export class JiraProjectLoader extends BaseDocumentLoader { return new Document({ pageContent: this.formatIssueInfo({ issue, - baseUrl: this.baseUrl, + host: this.host, }), metadata: { id: issue.id, - baseUrl: this.baseUrl, + host: this.host, projectKey: this.projectKey, }, }); @@ -264,12 +264,12 @@ export class JiraProjectLoader extends BaseDocumentLoader { private formatIssueInfo({ issue, - baseUrl, + host, }: { issue: JiraIssue; - baseUrl: string; + host: string; }): string { - let text = `Issue: ${this.formatMainIssueInfoText({ issue, baseUrl })}\n`; + let text = `Issue: ${this.formatMainIssueInfoText({ issue, host })}\n`; text += `Project: ${issue.fields.project.name} (${issue.fields.project.key}, ID ${issue.fields.project.id})\n`; text += `Status: ${issue.fields.status.name}\n`; text += `Priority: ${issue.fields.priority.name}\n`; @@ -316,7 +316,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { if (issue.fields.parent) { text += `Parent Issue: ${this.formatMainIssueInfoText({ issue: issue.fields.parent, - baseUrl, + host, })}\n`; } @@ -325,7 +325,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { issue.fields.subtasks.forEach((subtask) => { text += ` - ${this.formatMainIssueInfoText({ issue: subtask, - baseUrl, + host, })}\n`; }); } @@ -337,13 +337,13 @@ export class JiraProjectLoader extends BaseDocumentLoader { if (link.inwardIssue) { text += ` - ${this.formatMainIssueInfoText({ issue: link.inwardIssue, - baseUrl, + host, })}\n`; } if (link.outwardIssue) { text += ` - ${this.formatMainIssueInfoText({ issue: link.outwardIssue, - baseUrl, + host, })}\n`; } }); @@ -354,24 +354,24 @@ export class JiraProjectLoader extends BaseDocumentLoader { private getLinkToIssue({ issueKey, - baseUrl, + host, }: { issueKey: string; - baseUrl: string; + host: string; }): string { - return `${baseUrl}/browse/${issueKey}`; + return `${host}/browse/${issueKey}`; } private formatMainIssueInfoText({ issue, - baseUrl, + host, }: { issue: JiraIssue | JiraBriefIssue; - baseUrl: string; + host: string; }): string { const link = this.getLinkToIssue({ issueKey: issue.key, - baseUrl, + host, }); const text = `${issue.key} (ID ${issue.id}) - ${issue.fields.summary} (${link})`; From 7054b183bf355e18f7e9fdc2df92e59fd6a7a107 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 15 Nov 2024 17:44:54 +0100 Subject: [PATCH 09/17] chore(community): Make Jira document loader more modular (for test) --- .../src/document_loaders/web/jira.ts | 191 +++++++++++------- 1 file changed, 113 insertions(+), 78 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index a1d19f1dfb6d..e12e5f05cf59 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -153,101 +153,34 @@ export type JiraAPIResponse = { /** * Interface representing the parameters for configuring the - * JiraProjectLoader. + * JiraDocumentConverter. */ -export interface JiraProjectLoaderParams { +export interface JiraDocumentConverterParams { host: string; projectKey: string; - username: string; - accessToken: string; - limit?: number; } -const API_ENDPOINTS = { - SEARCH: "/rest/api/2/search", -}; - /** - * Class representing a document loader for loading pages from Confluence. + * Class responsible for converting Jira issues to Document objects */ -export class JiraProjectLoader extends BaseDocumentLoader { - - private readonly accessToken: string; - +export class JiraDocumentConverter { + public readonly host: string; - + public readonly projectKey: string; - public readonly username: string; - - public readonly limit: number; - constructor({ host, - projectKey, - username, - accessToken, - limit = 100, - }: JiraProjectLoaderParams) { - super(); + projectKey + }: JiraDocumentConverterParams) { this.host = host; this.projectKey = projectKey; - this.username = username; - this.accessToken = accessToken; - this.limit = limit; } - private buildAuthorizationHeader(): string { - return `Basic ${Buffer.from( - `${this.username}:${this.accessToken}` - ).toString("base64")}`; + public convertToDocuments(issues: JiraIssue[]): Document[] { + return issues.map((issue) => this.documentFromIssue(issue)); } - - public async load(): Promise { - const allIssues: JiraIssue[] = []; - - try { - for await (const issues of this.fetchIssues()) { - allIssues.push(...issues); - } - - return allIssues.map((issue) => this.documentFromIssue(issue)); - } catch (error) { - console.error("Error:", error); - return []; - } - } - - protected async *fetchIssues(): AsyncIterable { - const authorizationHeader = this.buildAuthorizationHeader(); - const url = `${this.host}${API_ENDPOINTS.SEARCH}`; - let startAt = 0; - - while (true) { - try { - const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limit}`; - const options = { - method: "GET", - headers: { - Authorization: authorizationHeader, - Accept: "application/json", - }, - }; - - const response = await fetch(pageUrl, options); - const data: JiraAPIResponse = await response.json(); - - if (!data.issues || data.issues.length === 0) break; - - yield data.issues; - startAt += this.limit; - } catch (error) { - console.error(error); - yield []; - } - } - } - + private documentFromIssue(issue: JiraIssue): Document { return new Document({ pageContent: this.formatIssueInfo({ @@ -379,3 +312,105 @@ export class JiraProjectLoader extends BaseDocumentLoader { return text; } } + +/** + * Interface representing the parameters for configuring the + * JiraProjectLoader. + */ +export interface JiraProjectLoaderParams { + host: string; + projectKey: string; + username: string; + accessToken: string; + limit?: number; +} + +const API_ENDPOINTS = { + SEARCH: "/rest/api/2/search", +}; + +/** + * Class representing a document loader for loading pages from Confluence. + */ +export class JiraProjectLoader extends BaseDocumentLoader { + + private readonly accessToken: string; + + public readonly host: string; + + public readonly projectKey: string; + + public readonly username: string; + + public readonly limit: number; + + private readonly documentConverter: JiraDocumentConverter; + + constructor({ + host, + projectKey, + username, + accessToken, + limit = 100, + }: JiraProjectLoaderParams) { + super(); + this.host = host; + this.projectKey = projectKey; + this.username = username; + this.accessToken = accessToken; + this.limit = limit; + this.documentConverter = new JiraDocumentConverter({host, projectKey}); + } + + private buildAuthorizationHeader(): string { + return `Basic ${Buffer.from( + `${this.username}:${this.accessToken}` + ).toString("base64")}`; + } + + public async load(): Promise { + const allIssues: JiraIssue[] = []; + + try { + for await (const issues of this.fetchIssues()) { + allIssues.push(...issues); + } + + return this.documentConverter.convertToDocuments(allIssues); + } catch (error) { + console.error("Error:", error); + return []; + } + } + + protected async *fetchIssues(): AsyncIterable { + const authorizationHeader = this.buildAuthorizationHeader(); + const url = `${this.host}${API_ENDPOINTS.SEARCH}`; + let startAt = 0; + + while (true) { + try { + const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limit}`; + const options = { + method: "GET", + headers: { + Authorization: authorizationHeader, + Accept: "application/json", + }, + }; + + const response = await fetch(pageUrl, options); + const data: JiraAPIResponse = await response.json(); + + if (!data.issues || data.issues.length === 0) break; + + yield data.issues; + startAt += this.limit; + } catch (error) { + console.error(error); + yield []; + } + } + } + +} From c7dcdf4e092d7dd0648cd9b3ca7c88dfd7f85596 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 15:54:48 +0100 Subject: [PATCH 10/17] =?UTF-8?q?chore(community):=20Rename=20JiraProjectL?= =?UTF-8?q?oader=20arg=20limit{=20=E2=86=92=20PerRequest}?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/document_loaders/web/jira.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index e12e5f05cf59..49d93140e8e2 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -322,7 +322,7 @@ export interface JiraProjectLoaderParams { projectKey: string; username: string; accessToken: string; - limit?: number; + limitPerRequest?: number; } const API_ENDPOINTS = { @@ -342,7 +342,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { public readonly username: string; - public readonly limit: number; + public readonly limitPerRequest: number; private readonly documentConverter: JiraDocumentConverter; @@ -351,14 +351,14 @@ export class JiraProjectLoader extends BaseDocumentLoader { projectKey, username, accessToken, - limit = 100, + limitPerRequest = 100, }: JiraProjectLoaderParams) { super(); this.host = host; this.projectKey = projectKey; this.username = username; this.accessToken = accessToken; - this.limit = limit; + this.limitPerRequest = limitPerRequest; this.documentConverter = new JiraDocumentConverter({host, projectKey}); } @@ -390,7 +390,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { while (true) { try { - const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limit}`; + const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limitPerRequest}`; const options = { method: "GET", headers: { @@ -405,7 +405,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { if (!data.issues || data.issues.length === 0) break; yield data.issues; - startAt += this.limit; + startAt += this.limitPerRequest; } catch (error) { console.error(error); yield []; From dd88695b2841060f53fa624c0a1778c9af145ea6 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 15:55:10 +0100 Subject: [PATCH 11/17] chore(community): Export all types for Jira document loader --- .../src/document_loaders/web/jira.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 49d93140e8e2..09af70d46fd2 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -1,7 +1,7 @@ import { Document } from "@langchain/core/documents"; import { BaseDocumentLoader } from "@langchain/core/document_loaders/base"; -type JiraStatusCategory = { +export type JiraStatusCategory = { self: string; id: number; key: string; @@ -9,7 +9,7 @@ type JiraStatusCategory = { name: string; }; -type JiraStatus = { +export type JiraStatus = { self: string; description: string; iconUrl: string; @@ -18,7 +18,7 @@ type JiraStatus = { statusCategory: JiraStatusCategory; }; -type JiraUser = { +export type JiraUser = { accountId: string; accountType: string; active: boolean; @@ -34,7 +34,7 @@ type JiraUser = { timeZone: string; }; -type JiraIssueType = { +export type JiraIssueType = { avatarId: number; description: string; entityId: string; @@ -46,14 +46,14 @@ type JiraIssueType = { subtask: boolean; }; -type JiraPriority = { +export type JiraPriority = { iconUrl: string; id: string; name: string; self: string; }; -type JiraProgress = { +export type JiraProgress = { progress: number; total: number; percent?: number; @@ -74,7 +74,7 @@ export type JiraProject = { simplified: boolean; }; -type JiraSubTask = { +export type JiraSubTask = { id: string; key: string; self: string; @@ -86,7 +86,7 @@ type JiraSubTask = { }; }; -type JiraIssueLinkType = { +export type JiraIssueLinkType = { id: string; name: string; inward: string; @@ -106,7 +106,7 @@ export type JiraBriefIssue = { }; }; -type JiraIssueLink = { +export type JiraIssueLink = { id: string; self: string; type: JiraIssueLinkType; From 0a736184bf98c25bafc23c1b1b0290b2fc23251f Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 16:15:26 +0100 Subject: [PATCH 12/17] chore(community): Add tests on jira document loader --- .../document_loaders/tests/jira.int.test.ts | 82 +++++++ .../src/document_loaders/tests/jira.test.ts | 214 ++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 libs/langchain-community/src/document_loaders/tests/jira.int.test.ts create mode 100644 libs/langchain-community/src/document_loaders/tests/jira.test.ts diff --git a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts new file mode 100644 index 000000000000..58e5d6967214 --- /dev/null +++ b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts @@ -0,0 +1,82 @@ +/** + * NOTE: Env var should be set, and configured project should exist + */ +import { expect, test } from "@jest/globals"; +import { JiraProjectLoader } from "../web/jira.js"; + +describe("JiraProjectLoader Integration Tests", () => { + const JIRA_HOST = requireEnvVar("JIRA_HOST"); + const JIRA_USERNAME = requireEnvVar("JIRA_USERNAME"); + const JIRA_ACCESS_TOKEN = requireEnvVar("JIRA_ACCESS_TOKEN"); + const JIRA_PROJECT_KEY = requireEnvVar("JIRA_PROJECT_KEY"); + + function requireEnvVar(name: string): string { + // eslint-disable-next-line no-process-env + const value = process.env[name]; + if (!value) { + throw new Error(`environment variable "${name}" must be set`); + } + return value; + } + + test("should load Jira project issues successfully", async () => { + const loader = new JiraProjectLoader({ + host: JIRA_HOST, + projectKey: JIRA_PROJECT_KEY, + username: JIRA_USERNAME, + accessToken: JIRA_ACCESS_TOKEN, + limitPerRequest: 20 + }); + + const docs = await loader.load(); + + expect(docs).toBeDefined(); + expect(Array.isArray(docs)).toBe(true); + + if (docs.length > 0) { + const firstDoc = docs[0]; + + // Check document structure + expect(firstDoc).toHaveProperty("pageContent"); + expect(firstDoc).toHaveProperty("metadata"); + + // Check metadata + expect(firstDoc.metadata).toHaveProperty("id"); + expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST); + expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY); + + // Check pageContent contains essential Jira issue information + const content = firstDoc.pageContent; + expect(content).toContain("Issue:"); + expect(content).toContain("Project:"); + expect(content).toContain("Status:"); + expect(content).toContain("Priority:"); + expect(content).toContain("Type:"); + expect(content).toContain("Creator:"); + } + }); + + test("should handle invalid credentials", async () => { + const loader = new JiraProjectLoader({ + host: JIRA_HOST, + projectKey: JIRA_PROJECT_KEY, + username: "invalid_username", + accessToken: "invalid_token" + }); + + const docs = await loader.load(); + expect(docs).toEqual([]); + }); + + test("should handle invalid project key", async () => { + const loader = new JiraProjectLoader({ + host: JIRA_HOST, + projectKey: "INVALID_PROJECT_KEY", + username: JIRA_USERNAME, + accessToken: JIRA_ACCESS_TOKEN + }); + + const docs = await loader.load(); + expect(docs).toEqual([]); + }); +}); diff --git a/libs/langchain-community/src/document_loaders/tests/jira.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.test.ts new file mode 100644 index 000000000000..b2fe7741e90b --- /dev/null +++ b/libs/langchain-community/src/document_loaders/tests/jira.test.ts @@ -0,0 +1,214 @@ +import { faker } from "@faker-js/faker"; +import { JiraDocumentConverter, JiraIssue, JiraUser, JiraIssueType, JiraPriority, JiraProgress, JiraProject, JiraStatus, JiraStatusCategory } from "../web/jira.js"; + + +describe("JiraDocumentConverter Unit Tests", () => { + + function getConverter() { + return new JiraDocumentConverter({ + projectKey: "PROJ", + host: "https://example.com" + }); + } + + it("should handle missing optional fields", () => { + const issue: JiraIssue = someJiraIssue(); + delete issue.fields.assignee; + delete issue.fields.duedate + + const converter = getConverter(); + const document = converter.convertToDocuments([issue])[0]; + + expect(document).toBeDefined(); + expect(document.pageContent).toContain(issue.fields.summary); + expect(document.pageContent).toContain("Assignee: Unassigned"); + expect(document.pageContent).not.toMatch(/.*^Due Date: .*/m); + expect(document.metadata).toEqual({ + id: issue.id, + host: converter.host, + projectKey: converter.projectKey + }); + }); + + it("should format the document content properly", () => { + const converter = getConverter(); + const issue = someJiraIssue(); + const document = converter.convertToDocuments([issue])[0]; + + expect(document.pageContent).toContain(issue.fields.summary); + expect(document.pageContent).toContain(issue.fields.description); + expect(document.pageContent).toContain(issue.fields.labels?.join(", ") || ""); + expect(document.pageContent).toContain(issue.fields.reporter?.displayName || ""); + expect(document.pageContent).toContain(issue.fields.assignee?.displayName || "Unassigned"); + expect(document.pageContent).toContain(issue.fields.duedate || ""); + expect(document.pageContent).toContain(issue.fields.timeestimate?.toString() || ""); + expect(document.pageContent).toContain(issue.fields.timespent?.toString() || ""); + expect(document.pageContent).toContain(issue.fields.resolutiondate || ""); + expect(document.pageContent).toContain(issue.fields.progress.percent?.toString() || ""); + }); +}); + +export function someJiraIssueType(overrides: Partial = {}): JiraIssueType { + const baseIssueType: JiraIssueType = { + avatarId: faker.number.int({ min: 1, max: 100 }), + description: faker.lorem.sentence(), + entityId: faker.string.uuid(), + hierarchyLevel: faker.number.int({ min: 1, max: 5 }), + iconUrl: faker.image.url(), + id: faker.string.numeric(5), + name: faker.helpers.arrayElement(['Bug', 'Task', 'Story', 'Epic']), + self: faker.internet.url(), + subtask: false, + }; + + return { + ...baseIssueType, + ...overrides, + }; +} + + +export function someJiraUser(overrides: Partial = {}): JiraUser { + const baseUser = { + accountId: faker.string.uuid(), + accountType: "atlassian", + active: true, + avatarUrls: { + "16x16": faker.image.avatar(), + "24x24": faker.image.avatar(), + "32x32": faker.image.avatar(), + "48x48": faker.image.avatar(), + }, + displayName: faker.person.fullName(), + emailAddress: faker.internet.email(), + self: faker.internet.url(), + timeZone: faker.location.timeZone(), + }; + + return { + ...baseUser, + ...overrides, + }; +} + +export function someJiraPriority(overrides: Partial = {}): JiraPriority { + const basePriority: JiraPriority = { + iconUrl: faker.image.url(), + id: faker.string.numeric(2), + name: faker.helpers.arrayElement(['Highest', 'High', 'Medium', 'Low', 'Lowest']), + self: faker.internet.url(), + }; + + return { + ...basePriority, + ...overrides, + }; +} + +export function someJiraProgress(overrides: Partial = {}): JiraProgress { + const baseProgress: JiraProgress = { + progress: faker.number.int({ min: 0, max: 100 }), + total: 100, + percent: faker.number.int({ min: 0, max: 100 }), + }; + + return { + ...baseProgress, + ...overrides, + }; +} + +export function someJiraProject(overrides: Partial = {}): JiraProject { + const baseProject: JiraProject = { + avatarUrls: { + "16x16": faker.image.avatar(), + "24x24": faker.image.avatar(), + "32x32": faker.image.avatar(), + "48x48": faker.image.avatar(), + }, + id: faker.string.numeric(5), + key: faker.string.alpha(4).toUpperCase(), + name: faker.company.name(), + projectTypeKey: "software", + self: faker.internet.url(), + simplified: false, + }; + + return { + ...baseProject, + ...overrides, + }; +} + +export function someJiraStatusCategory(overrides: Partial = {}): JiraStatusCategory { + const baseStatusCategory: JiraStatusCategory = { + self: faker.internet.url(), + id: faker.number.int({ min: 1, max: 5 }), + key: faker.helpers.arrayElement(['new', 'indeterminate', 'done']), + colorName: faker.helpers.arrayElement(['blue-gray', 'yellow', 'green']), + name: faker.helpers.arrayElement(['To Do', 'In Progress', 'Done']), + }; + + return { + ...baseStatusCategory, + ...overrides, + }; +} + + +export function someJiraStatus(overrides: Partial = {}): JiraStatus { + const baseStatus: JiraStatus = { + self: faker.internet.url(), + description: faker.lorem.sentence(), + iconUrl: faker.image.url(), + name: faker.helpers.arrayElement(['To Do', 'In Progress', 'Done', 'Blocked']), + id: faker.string.numeric(2), + statusCategory: someJiraStatusCategory(), + }; + + return { + ...baseStatus, + ...overrides, + }; +} + + +export function someJiraIssue(overrides: Partial = {}): JiraIssue { + const issueKey = `${faker.string.alpha(4).toUpperCase()}-${faker.number.int({ min: 1, max: 9999 })}`; + + const baseIssue: JiraIssue = { + expand: "renderedFields", + id: faker.string.numeric(5), + self: `https://${faker.internet.domainName()}/rest/api/2/issue/${issueKey}`, + key: issueKey, + fields: { + assignee: faker.datatype.boolean() ? someJiraUser() : undefined, + created: faker.date.past().toISOString(), + description: faker.lorem.paragraph(), + issuelinks: [], + issuetype: someJiraIssueType(), + labels: faker.datatype.boolean() ? + Array.from({ length: faker.number.int({ min: 1, max: 5 }) }, () => faker.word.noun()) : + undefined, + priority: someJiraPriority(), + progress: someJiraProgress(), + project: someJiraProject(), + reporter: faker.datatype.boolean() ? someJiraUser() : undefined, + creator: someJiraUser(), + resolutiondate: faker.datatype.boolean() ? faker.date.recent().toISOString() : undefined, + status: someJiraStatus(), + subtasks: [], + summary: faker.lorem.sentence(), + timeestimate: faker.datatype.boolean() ? faker.number.int({ min: 1, max: 100 }) * 3600 : undefined, + timespent: faker.datatype.boolean() ? faker.number.int({ min: 1, max: 100 }) * 3600 : undefined, + updated: faker.date.recent().toISOString(), + duedate: faker.datatype.boolean() ? faker.date.future().toISOString() : undefined, + }, + }; + console.log(baseIssue.fields.duedate); + + return { + ...baseIssue, + ...overrides, + }; +} \ No newline at end of file From 359518c0d2284a1a604fb456ac470283af20d1d9 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 16:21:07 +0100 Subject: [PATCH 13/17] chore(community): Reformat jira document loader --- .../document_loaders/tests/jira.int.test.ts | 6 +- .../src/document_loaders/tests/jira.test.ts | 127 +++++++++++++----- .../src/document_loaders/web/jira.ts | 18 +-- 3 files changed, 99 insertions(+), 52 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts index 58e5d6967214..acc66898949d 100644 --- a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts +++ b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts @@ -25,7 +25,7 @@ describe("JiraProjectLoader Integration Tests", () => { projectKey: JIRA_PROJECT_KEY, username: JIRA_USERNAME, accessToken: JIRA_ACCESS_TOKEN, - limitPerRequest: 20 + limitPerRequest: 20, }); const docs = await loader.load(); @@ -61,7 +61,7 @@ describe("JiraProjectLoader Integration Tests", () => { host: JIRA_HOST, projectKey: JIRA_PROJECT_KEY, username: "invalid_username", - accessToken: "invalid_token" + accessToken: "invalid_token", }); const docs = await loader.load(); @@ -73,7 +73,7 @@ describe("JiraProjectLoader Integration Tests", () => { host: JIRA_HOST, projectKey: "INVALID_PROJECT_KEY", username: JIRA_USERNAME, - accessToken: JIRA_ACCESS_TOKEN + accessToken: JIRA_ACCESS_TOKEN, }); const docs = await loader.load(); diff --git a/libs/langchain-community/src/document_loaders/tests/jira.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.test.ts index b2fe7741e90b..92b1224446e0 100644 --- a/libs/langchain-community/src/document_loaders/tests/jira.test.ts +++ b/libs/langchain-community/src/document_loaders/tests/jira.test.ts @@ -1,20 +1,28 @@ import { faker } from "@faker-js/faker"; -import { JiraDocumentConverter, JiraIssue, JiraUser, JiraIssueType, JiraPriority, JiraProgress, JiraProject, JiraStatus, JiraStatusCategory } from "../web/jira.js"; - +import { + JiraDocumentConverter, + JiraIssue, + JiraUser, + JiraIssueType, + JiraPriority, + JiraProgress, + JiraProject, + JiraStatus, + JiraStatusCategory, +} from "../web/jira.js"; describe("JiraDocumentConverter Unit Tests", () => { - function getConverter() { return new JiraDocumentConverter({ projectKey: "PROJ", - host: "https://example.com" + host: "https://example.com", }); } it("should handle missing optional fields", () => { const issue: JiraIssue = someJiraIssue(); delete issue.fields.assignee; - delete issue.fields.duedate + delete issue.fields.duedate; const converter = getConverter(); const document = converter.convertToDocuments([issue])[0]; @@ -26,7 +34,7 @@ describe("JiraDocumentConverter Unit Tests", () => { expect(document.metadata).toEqual({ id: issue.id, host: converter.host, - projectKey: converter.projectKey + projectKey: converter.projectKey, }); }); @@ -37,18 +45,32 @@ describe("JiraDocumentConverter Unit Tests", () => { expect(document.pageContent).toContain(issue.fields.summary); expect(document.pageContent).toContain(issue.fields.description); - expect(document.pageContent).toContain(issue.fields.labels?.join(", ") || ""); - expect(document.pageContent).toContain(issue.fields.reporter?.displayName || ""); - expect(document.pageContent).toContain(issue.fields.assignee?.displayName || "Unassigned"); + expect(document.pageContent).toContain( + issue.fields.labels?.join(", ") || "" + ); + expect(document.pageContent).toContain( + issue.fields.reporter?.displayName || "" + ); + expect(document.pageContent).toContain( + issue.fields.assignee?.displayName || "Unassigned" + ); expect(document.pageContent).toContain(issue.fields.duedate || ""); - expect(document.pageContent).toContain(issue.fields.timeestimate?.toString() || ""); - expect(document.pageContent).toContain(issue.fields.timespent?.toString() || ""); + expect(document.pageContent).toContain( + issue.fields.timeestimate?.toString() || "" + ); + expect(document.pageContent).toContain( + issue.fields.timespent?.toString() || "" + ); expect(document.pageContent).toContain(issue.fields.resolutiondate || ""); - expect(document.pageContent).toContain(issue.fields.progress.percent?.toString() || ""); + expect(document.pageContent).toContain( + issue.fields.progress.percent?.toString() || "" + ); }); }); -export function someJiraIssueType(overrides: Partial = {}): JiraIssueType { +export function someJiraIssueType( + overrides: Partial = {} +): JiraIssueType { const baseIssueType: JiraIssueType = { avatarId: faker.number.int({ min: 1, max: 100 }), description: faker.lorem.sentence(), @@ -56,7 +78,7 @@ export function someJiraIssueType(overrides: Partial = {}): JiraI hierarchyLevel: faker.number.int({ min: 1, max: 5 }), iconUrl: faker.image.url(), id: faker.string.numeric(5), - name: faker.helpers.arrayElement(['Bug', 'Task', 'Story', 'Epic']), + name: faker.helpers.arrayElement(["Bug", "Task", "Story", "Epic"]), self: faker.internet.url(), subtask: false, }; @@ -67,7 +89,6 @@ export function someJiraIssueType(overrides: Partial = {}): JiraI }; } - export function someJiraUser(overrides: Partial = {}): JiraUser { const baseUser = { accountId: faker.string.uuid(), @@ -91,11 +112,19 @@ export function someJiraUser(overrides: Partial = {}): JiraUser { }; } -export function someJiraPriority(overrides: Partial = {}): JiraPriority { +export function someJiraPriority( + overrides: Partial = {} +): JiraPriority { const basePriority: JiraPriority = { iconUrl: faker.image.url(), id: faker.string.numeric(2), - name: faker.helpers.arrayElement(['Highest', 'High', 'Medium', 'Low', 'Lowest']), + name: faker.helpers.arrayElement([ + "Highest", + "High", + "Medium", + "Low", + "Lowest", + ]), self: faker.internet.url(), }; @@ -105,7 +134,9 @@ export function someJiraPriority(overrides: Partial = {}): JiraPri }; } -export function someJiraProgress(overrides: Partial = {}): JiraProgress { +export function someJiraProgress( + overrides: Partial = {} +): JiraProgress { const baseProgress: JiraProgress = { progress: faker.number.int({ min: 0, max: 100 }), total: 100, @@ -118,7 +149,9 @@ export function someJiraProgress(overrides: Partial = {}): JiraPro }; } -export function someJiraProject(overrides: Partial = {}): JiraProject { +export function someJiraProject( + overrides: Partial = {} +): JiraProject { const baseProject: JiraProject = { avatarUrls: { "16x16": faker.image.avatar(), @@ -140,13 +173,15 @@ export function someJiraProject(overrides: Partial = {}): JiraProje }; } -export function someJiraStatusCategory(overrides: Partial = {}): JiraStatusCategory { +export function someJiraStatusCategory( + overrides: Partial = {} +): JiraStatusCategory { const baseStatusCategory: JiraStatusCategory = { self: faker.internet.url(), id: faker.number.int({ min: 1, max: 5 }), - key: faker.helpers.arrayElement(['new', 'indeterminate', 'done']), - colorName: faker.helpers.arrayElement(['blue-gray', 'yellow', 'green']), - name: faker.helpers.arrayElement(['To Do', 'In Progress', 'Done']), + key: faker.helpers.arrayElement(["new", "indeterminate", "done"]), + colorName: faker.helpers.arrayElement(["blue-gray", "yellow", "green"]), + name: faker.helpers.arrayElement(["To Do", "In Progress", "Done"]), }; return { @@ -155,13 +190,19 @@ export function someJiraStatusCategory(overrides: Partial = }; } - -export function someJiraStatus(overrides: Partial = {}): JiraStatus { +export function someJiraStatus( + overrides: Partial = {} +): JiraStatus { const baseStatus: JiraStatus = { self: faker.internet.url(), description: faker.lorem.sentence(), iconUrl: faker.image.url(), - name: faker.helpers.arrayElement(['To Do', 'In Progress', 'Done', 'Blocked']), + name: faker.helpers.arrayElement([ + "To Do", + "In Progress", + "Done", + "Blocked", + ]), id: faker.string.numeric(2), statusCategory: someJiraStatusCategory(), }; @@ -172,10 +213,12 @@ export function someJiraStatus(overrides: Partial = {}): JiraStatus }; } - export function someJiraIssue(overrides: Partial = {}): JiraIssue { - const issueKey = `${faker.string.alpha(4).toUpperCase()}-${faker.number.int({ min: 1, max: 9999 })}`; - + const issueKey = `${faker.string.alpha(4).toUpperCase()}-${faker.number.int({ + min: 1, + max: 9999, + })}`; + const baseIssue: JiraIssue = { expand: "renderedFields", id: faker.string.numeric(5), @@ -187,22 +230,32 @@ export function someJiraIssue(overrides: Partial = {}): JiraIssue { description: faker.lorem.paragraph(), issuelinks: [], issuetype: someJiraIssueType(), - labels: faker.datatype.boolean() ? - Array.from({ length: faker.number.int({ min: 1, max: 5 }) }, () => faker.word.noun()) : - undefined, + labels: faker.datatype.boolean() + ? Array.from({ length: faker.number.int({ min: 1, max: 5 }) }, () => + faker.word.noun() + ) + : undefined, priority: someJiraPriority(), progress: someJiraProgress(), project: someJiraProject(), reporter: faker.datatype.boolean() ? someJiraUser() : undefined, creator: someJiraUser(), - resolutiondate: faker.datatype.boolean() ? faker.date.recent().toISOString() : undefined, + resolutiondate: faker.datatype.boolean() + ? faker.date.recent().toISOString() + : undefined, status: someJiraStatus(), subtasks: [], summary: faker.lorem.sentence(), - timeestimate: faker.datatype.boolean() ? faker.number.int({ min: 1, max: 100 }) * 3600 : undefined, - timespent: faker.datatype.boolean() ? faker.number.int({ min: 1, max: 100 }) * 3600 : undefined, + timeestimate: faker.datatype.boolean() + ? faker.number.int({ min: 1, max: 100 }) * 3600 + : undefined, + timespent: faker.datatype.boolean() + ? faker.number.int({ min: 1, max: 100 }) * 3600 + : undefined, updated: faker.date.recent().toISOString(), - duedate: faker.datatype.boolean() ? faker.date.future().toISOString() : undefined, + duedate: faker.datatype.boolean() + ? faker.date.future().toISOString() + : undefined, }, }; console.log(baseIssue.fields.duedate); @@ -211,4 +264,4 @@ export function someJiraIssue(overrides: Partial = {}): JiraIssue { ...baseIssue, ...overrides, }; -} \ No newline at end of file +} diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 09af70d46fd2..13ce2ce3a32b 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -164,15 +164,11 @@ export interface JiraDocumentConverterParams { * Class responsible for converting Jira issues to Document objects */ export class JiraDocumentConverter { - public readonly host: string; - + public readonly projectKey: string; - constructor({ - host, - projectKey - }: JiraDocumentConverterParams) { + constructor({ host, projectKey }: JiraDocumentConverterParams) { this.host = host; this.projectKey = projectKey; } @@ -180,7 +176,7 @@ export class JiraDocumentConverter { public convertToDocuments(issues: JiraIssue[]): Document[] { return issues.map((issue) => this.documentFromIssue(issue)); } - + private documentFromIssue(issue: JiraIssue): Document { return new Document({ pageContent: this.formatIssueInfo({ @@ -333,7 +329,6 @@ const API_ENDPOINTS = { * Class representing a document loader for loading pages from Confluence. */ export class JiraProjectLoader extends BaseDocumentLoader { - private readonly accessToken: string; public readonly host: string; @@ -345,7 +340,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { public readonly limitPerRequest: number; private readonly documentConverter: JiraDocumentConverter; - + constructor({ host, projectKey, @@ -359,7 +354,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { this.username = username; this.accessToken = accessToken; this.limitPerRequest = limitPerRequest; - this.documentConverter = new JiraDocumentConverter({host, projectKey}); + this.documentConverter = new JiraDocumentConverter({ host, projectKey }); } private buildAuthorizationHeader(): string { @@ -375,7 +370,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { for await (const issues of this.fetchIssues()) { allIssues.push(...issues); } - + return this.documentConverter.convertToDocuments(allIssues); } catch (error) { console.error("Error:", error); @@ -412,5 +407,4 @@ export class JiraProjectLoader extends BaseDocumentLoader { } } } - } From bc1f3b9354d132173f3c1b42d3fc25d3d0f7ae64 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 16:22:43 +0100 Subject: [PATCH 14/17] chore(community): Add .env examples for jira document loader --- examples/.env.example | 6 +++++- libs/langchain-community/.env.example | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 libs/langchain-community/.env.example diff --git a/examples/.env.example b/examples/.env.example index 2abb8d8e6912..9aae33991e92 100644 --- a/examples/.env.example +++ b/examples/.env.example @@ -83,4 +83,8 @@ FRIENDLI_TEAM=ADD_YOURS_HERE # https://suite.friendli.ai/ HANA_HOST=HANA_DB_ADDRESS HANA_PORT=HANA_DB_PORT HANA_UID=HANA_DB_USER -HANA_PWD=HANA_DB_PASSWORD \ No newline at end of file +HANA_PWD=HANA_DB_PASSWORD +JIRA_HOST=ADD_YOURS_HERE +JIRA_USERNAME=ADD_YOURS_HERE +JIRA_ACCESS_TOKEN=ADD_YOURS_HERE +JIRA_PROJECT_KEY=ADD_YOURS_HERE \ No newline at end of file diff --git a/libs/langchain-community/.env.example b/libs/langchain-community/.env.example new file mode 100644 index 000000000000..2c36f95558b3 --- /dev/null +++ b/libs/langchain-community/.env.example @@ -0,0 +1,4 @@ +JIRA_HOST=ADD_YOURS_HERE +JIRA_USERNAME=ADD_YOURS_HERE +JIRA_ACCESS_TOKEN=ADD_YOURS_HERE +JIRA_PROJECT_KEY=ADD_YOURS_HERE From e98b141a8558f7bf12516162610938df5668d012 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Fri, 29 Nov 2024 18:56:16 +0100 Subject: [PATCH 15/17] chore(community): jira doc loader example: host and projectKey as env var --- examples/src/document_loaders/jira.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts index e43ebcb4363a..3da14560e658 100644 --- a/examples/src/document_loaders/jira.ts +++ b/examples/src/document_loaders/jira.ts @@ -1,12 +1,14 @@ import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira"; +const host = process.env.JIRA_HOST; const username = process.env.JIRA_USERNAME; const accessToken = process.env.JIRA_ACCESS_TOKEN; +const projectKey = process.env.JIRA_PROJECT_KEY; if (username && accessToken) { const loader = new JiraProjectLoader({ - host: "https://example.atlassian.net/wiki", - projectKey: "PI", + host, + projectKey, username, accessToken, }); From cc18630a6913ef1a6b643d00249575134fe42caa Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Mon, 2 Dec 2024 10:43:33 +0100 Subject: [PATCH 16/17] feat(community) Add 'createdAfter' filter on jira document loader --- examples/src/document_loaders/jira.ts | 4 ++++ .../document_loaders/tests/jira.int.test.ts | 23 +++++++++++++++++-- .../src/document_loaders/web/jira.ts | 14 ++++++++++- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts index 3da14560e658..0d412203623a 100644 --- a/examples/src/document_loaders/jira.ts +++ b/examples/src/document_loaders/jira.ts @@ -6,11 +6,15 @@ const accessToken = process.env.JIRA_ACCESS_TOKEN; const projectKey = process.env.JIRA_PROJECT_KEY; if (username && accessToken) { + // Created within last 30 days + const createdAfter = new Date(); + createdAfter.setDate(createdAfter.getDate() - 30); const loader = new JiraProjectLoader({ host, projectKey, username, accessToken, + createdAfter }); const documents = await loader.load(); diff --git a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts index acc66898949d..42ec79c7395a 100644 --- a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts +++ b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts @@ -1,6 +1,7 @@ /** * NOTE: Env var should be set, and configured project should exist */ +import { Document } from "@langchain/core/documents"; import { expect, test } from "@jest/globals"; import { JiraProjectLoader } from "../web/jira.js"; @@ -19,16 +20,34 @@ describe("JiraProjectLoader Integration Tests", () => { return value; } - test("should load Jira project issues successfully", async () => { + async function loadJiraDocs({createdAfter = undefined}: {createdAfter?: Date} = {}): Promise { const loader = new JiraProjectLoader({ host: JIRA_HOST, projectKey: JIRA_PROJECT_KEY, username: JIRA_USERNAME, accessToken: JIRA_ACCESS_TOKEN, limitPerRequest: 20, + createdAfter }); - const docs = await loader.load(); + return loader.load(); + } + + test("should load Jira project issues successfully", async () => { + const now = new Date(); + let months = 1; + + let docs: Document[] = []; + while (docs.length === 0 && months < 120) { + const createdAfter = new Date(now); + createdAfter.setDate(now.getDate() - months * 30); + docs = await loadJiraDocs({createdAfter}); + months *= 1.2; + } + + if (months >= 10) { + docs = await loadJiraDocs({}); + } expect(docs).toBeDefined(); expect(Array.isArray(docs)).toBe(true); diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 13ce2ce3a32b..631bbbf31eb9 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -319,6 +319,7 @@ export interface JiraProjectLoaderParams { username: string; accessToken: string; limitPerRequest?: number; + createdAfter?: Date; } const API_ENDPOINTS = { @@ -339,6 +340,8 @@ export class JiraProjectLoader extends BaseDocumentLoader { public readonly limitPerRequest: number; + private readonly createdAfter?: Date; + private readonly documentConverter: JiraDocumentConverter; constructor({ @@ -347,6 +350,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { username, accessToken, limitPerRequest = 100, + createdAfter, }: JiraProjectLoaderParams) { super(); this.host = host; @@ -354,6 +358,7 @@ export class JiraProjectLoader extends BaseDocumentLoader { this.username = username; this.accessToken = accessToken; this.limitPerRequest = limitPerRequest; + this.createdAfter = createdAfter; this.documentConverter = new JiraDocumentConverter({ host, projectKey }); } @@ -385,7 +390,14 @@ export class JiraProjectLoader extends BaseDocumentLoader { while (true) { try { - const pageUrl = `${url}?jql=project=${this.projectKey}&startAt=${startAt}&maxResults=${this.limitPerRequest}`; + const jqlProps = [ + `project=${this.projectKey}`, + `startAt=${startAt}`, + `maxResults=${this.limitPerRequest}`, + ...(this.createdAfter ? [`created>${this.createdAfter.toISOString()}`] : []) + ]; + const pageUrl = `${url}?jql=${jqlProps.join('&')}`; + const options = { method: "GET", headers: { From 195a6d7f27902c786a49a67b0025e8f1d71e4033 Mon Sep 17 00:00:00 2001 From: Mathieu Giorgino Date: Mon, 2 Dec 2024 21:03:27 +0100 Subject: [PATCH 17/17] feat(community): Add jira document loader filtering on creation date --- examples/src/document_loaders/jira.ts | 6 +- .../document_loaders/tests/jira.int.test.ts | 230 +++++++++++++----- .../src/document_loaders/web/jira.ts | 41 +++- 3 files changed, 202 insertions(+), 75 deletions(-) diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts index 0d412203623a..73d52efdf76f 100644 --- a/examples/src/document_loaders/jira.ts +++ b/examples/src/document_loaders/jira.ts @@ -1,9 +1,9 @@ import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira"; -const host = process.env.JIRA_HOST; +const host = process.env.JIRA_HOST || 'https://jira.example.com'; const username = process.env.JIRA_USERNAME; const accessToken = process.env.JIRA_ACCESS_TOKEN; -const projectKey = process.env.JIRA_PROJECT_KEY; +const projectKey = process.env.JIRA_PROJECT_KEY || 'PROJ'; if (username && accessToken) { // Created within last 30 days @@ -18,7 +18,7 @@ if (username && accessToken) { }); const documents = await loader.load(); - console.log(documents); + console.log(`Loaded ${documents.length} Jira document(s)`); } else { console.log( "You must provide a username and access token to run this example." diff --git a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts index 42ec79c7395a..e01d1d65663b 100644 --- a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts +++ b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts @@ -3,82 +3,118 @@ */ import { Document } from "@langchain/core/documents"; import { expect, test } from "@jest/globals"; -import { JiraProjectLoader } from "../web/jira.js"; +import { + JiraIssue, + JiraProjectLoader, + JiraProjectLoaderParams, +} from "../web/jira.js"; describe("JiraProjectLoader Integration Tests", () => { const JIRA_HOST = requireEnvVar("JIRA_HOST"); const JIRA_USERNAME = requireEnvVar("JIRA_USERNAME"); const JIRA_ACCESS_TOKEN = requireEnvVar("JIRA_ACCESS_TOKEN"); const JIRA_PROJECT_KEY = requireEnvVar("JIRA_PROJECT_KEY"); + const jiraConf: JiraProjectLoaderParams = { + host: JIRA_HOST, + projectKey: JIRA_PROJECT_KEY, + username: JIRA_USERNAME, + accessToken: JIRA_ACCESS_TOKEN, + limitPerRequest: 20, + }; - function requireEnvVar(name: string): string { - // eslint-disable-next-line no-process-env - const value = process.env[name]; - if (!value) { - throw new Error(`environment variable "${name}" must be set`); - } - return value; - } + test("should load Jira project issues as documents successfully", async () => { + const docs = await loadJiraDocsUntil((docs) => docs.length > 0); - async function loadJiraDocs({createdAfter = undefined}: {createdAfter?: Date} = {}): Promise { - const loader = new JiraProjectLoader({ - host: JIRA_HOST, - projectKey: JIRA_PROJECT_KEY, - username: JIRA_USERNAME, - accessToken: JIRA_ACCESS_TOKEN, - limitPerRequest: 20, - createdAfter - }); - - return loader.load(); - } - - test("should load Jira project issues successfully", async () => { - const now = new Date(); - let months = 1; + expect(docs).toBeDefined(); + expect(Array.isArray(docs)).toBe(true); - let docs: Document[] = []; - while (docs.length === 0 && months < 120) { - const createdAfter = new Date(now); - createdAfter.setDate(now.getDate() - months * 30); - docs = await loadJiraDocs({createdAfter}); - months *= 1.2; + if (docs.length < 1) { + // Skip test if not enough issues available + return; } + const firstDoc = docs[0]; + + // Check document structure + expect(firstDoc).toHaveProperty("pageContent"); + expect(firstDoc).toHaveProperty("metadata"); + + // Check metadata + expect(firstDoc.metadata).toHaveProperty("id"); + expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST); + expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY); + + // Check pageContent contains essential Jira issue information + const content = firstDoc.pageContent; + expect(content).toContain("Issue:"); + expect(content).toContain("Project:"); + expect(content).toContain("Status:"); + expect(content).toContain("Priority:"); + expect(content).toContain("Type:"); + expect(content).toContain("Creator:"); + }); - if (months >= 10) { - docs = await loadJiraDocs({}); + test("should filter issues based on createdAfter date", async () => { + // First load at least 2 issues with different creation dates (ignoring time) + const baseIssues = await loadJiraIssuesUntil(haveTwoDifferentCreationDates); + if (baseIssues.length < 2) { + // Skip test if not enough issues available + return; } - expect(docs).toBeDefined(); - expect(Array.isArray(docs)).toBe(true); + // Create a map from date string without time to list of issues + const dateToIssueMap = new Map(); + baseIssues.forEach((issue) => { + const date = asStringWithoutTime(new Date(issue.fields.created)); + dateToIssueMap.set(date, (dateToIssueMap.get(date) ?? []).concat(issue)); + }); + // Convert map to list of {date, issues} + const issuesGroupedByDate = Array.from( + dateToIssueMap, + ([date, issues]) => ({ date, issues }) + ); + issuesGroupedByDate.sort((a, b) => a.date.localeCompare(b.date)); + + // Pick middle date to split issues in two groups + const middleIndex = Math.floor(issuesGroupedByDate.length / 2); + const middleDate = new Date(issuesGroupedByDate[middleIndex].date); + const issuesAfterMiddle = issuesGroupedByDate + .slice(middleIndex) + .flatMap(({ issues }) => issues); + + // Load issues created after middle date + const loader = new JiraProjectLoader({ + ...jiraConf, + createdAfter: middleDate, + }); - if (docs.length > 0) { - const firstDoc = docs[0]; - - // Check document structure - expect(firstDoc).toHaveProperty("pageContent"); - expect(firstDoc).toHaveProperty("metadata"); - - // Check metadata - expect(firstDoc.metadata).toHaveProperty("id"); - expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST); - expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY); - - // Check pageContent contains essential Jira issue information - const content = firstDoc.pageContent; - expect(content).toContain("Issue:"); - expect(content).toContain("Project:"); - expect(content).toContain("Status:"); - expect(content).toContain("Priority:"); - expect(content).toContain("Type:"); - expect(content).toContain("Creator:"); - } + const filteredDocs = await loader.load(); + + // Verify we got the expected issues + expect(filteredDocs.length).toBeGreaterThan(0); + expect(filteredDocs.length).toBeLessThan(baseIssues.length); + + // Verify all returned issues are created after our cutoff date + const middleDateTimestamp = middleDate.getTime(); + filteredDocs.forEach((doc) => { + const issueDateString = doc.pageContent + .split("\n") + .filter((line) => /^Created: /.test(line))[0] + .replace("Created: ", ""); + const issueDateTimestamp = new Date( + asStringWithoutTime(new Date(issueDateString)) + ).getTime(); + expect(issueDateTimestamp).toBeGreaterThanOrEqual(middleDateTimestamp); + }); + + // Verify we got the same issues as in our original set + const filteredIds = new Set(filteredDocs.map((d) => d.metadata.id)); + const expectedIds = new Set(issuesAfterMiddle.map((issue) => issue.id)); + expect(filteredIds).toEqual(expectedIds); }); test("should handle invalid credentials", async () => { const loader = new JiraProjectLoader({ - host: JIRA_HOST, - projectKey: JIRA_PROJECT_KEY, + ...jiraConf, username: "invalid_username", accessToken: "invalid_token", }); @@ -89,13 +125,85 @@ describe("JiraProjectLoader Integration Tests", () => { test("should handle invalid project key", async () => { const loader = new JiraProjectLoader({ - host: JIRA_HOST, + ...jiraConf, projectKey: "INVALID_PROJECT_KEY", - username: JIRA_USERNAME, - accessToken: JIRA_ACCESS_TOKEN, }); const docs = await loader.load(); expect(docs).toEqual([]); }); + + function requireEnvVar(name: string): string { + // eslint-disable-next-line no-process-env + const value = process.env[name]; + if (!value) { + throw new Error(`environment variable "${name}" must be set`); + } + return value; + } + + function asStringWithoutTime(date: Date): string { + return date.toISOString().split("T")[0]; + } + + function sameDate(a: Date, b: Date) { + return asStringWithoutTime(a) === asStringWithoutTime(b); + } + + function haveTwoDifferentCreationDates(issues: JiraIssue[]): boolean { + return ( + issues.length >= 2 && + issues + .slice(1) + .some( + (issue) => + !sameDate( + new Date(issue.fields.created), + new Date(issues[0].fields.created) + ) + ) + ); + } + + async function loadJiraDocsUntil(predicate: (docs: Document[]) => boolean) { + const load = (createdAfter: Date) => + new JiraProjectLoader({ + ...jiraConf, + createdAfter, + }).load(); + return loadUntil(load, predicate); + } + + async function loadJiraIssuesUntil( + predicate: (docs: JiraIssue[]) => boolean + ) { + const load = (createdAfter: Date) => + new JiraProjectLoader({ + ...jiraConf, + createdAfter, + }).loadAsIssues(); + return loadUntil(load, predicate); + } + + async function loadUntil( + loadCreatedAfter: (date: Date) => Promise, + predicate: (loaded: T[]) => boolean + ): Promise { + const now = new Date(); + let months = 1; + const maxMonths = 120; + + let loaded: T[] = []; + while (!predicate(loaded) && months < maxMonths) { + const createdAfter = new Date(now); + createdAfter.setDate(now.getDate() - months * 30); + loaded = await loadCreatedAfter(createdAfter); + months *= 1.2; + } + + if (months >= maxMonths) { + return []; + } + return loaded; + } }); diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts index 631bbbf31eb9..59e0879d2ab9 100644 --- a/libs/langchain-community/src/document_loaders/web/jira.ts +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -369,34 +369,53 @@ export class JiraProjectLoader extends BaseDocumentLoader { } public async load(): Promise { - const allIssues: JiraIssue[] = []; - try { - for await (const issues of this.fetchIssues()) { - allIssues.push(...issues); - } - - return this.documentConverter.convertToDocuments(allIssues); + const allJiraIssues = await this.loadAsIssues(); + return this.documentConverter.convertToDocuments(allJiraIssues); } catch (error) { console.error("Error:", error); return []; } } + public async loadAsIssues(): Promise { + const allIssues: JiraIssue[] = []; + + for await (const issues of this.fetchIssues()) { + allIssues.push(...issues); + } + + return allIssues; + } + + protected toJiraDateString(date: Date | undefined): string | undefined { + if (!date) { + return undefined; + } + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, "0"); + const dayOfMonth = String(date.getDate()).padStart(2, "0"); + return `${year}-${month}-${dayOfMonth}`; + } + protected async *fetchIssues(): AsyncIterable { const authorizationHeader = this.buildAuthorizationHeader(); const url = `${this.host}${API_ENDPOINTS.SEARCH}`; + const createdAfterAsString = this.toJiraDateString(this.createdAfter); let startAt = 0; while (true) { try { const jqlProps = [ `project=${this.projectKey}`, - `startAt=${startAt}`, - `maxResults=${this.limitPerRequest}`, - ...(this.createdAfter ? [`created>${this.createdAfter.toISOString()}`] : []) + ...(createdAfterAsString ? [`created>=${createdAfterAsString}`] : []), ]; - const pageUrl = `${url}?jql=${jqlProps.join('&')}`; + const params = new URLSearchParams({ + jql: jqlProps.join(" AND "), + startAt: `${startAt}`, + maxResults: `${this.limitPerRequest}`, + }); + const pageUrl = `${url}?${params}`; const options = { method: "GET",