Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/jira document loader #7294

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
5883d58
chore(community): Bump faker dev dep to 8.4.1
mgiorgino-iobeya Nov 29, 2024
39f59df
feat(community): Add jira document loader - langchain-ai#2433
ivoneijr Sep 1, 2023
b8849e3
docs(community): add jira document loader documentation
ivoneijr Sep 1, 2023
23cac7e
chore(community): change environment variables example naming
ivoneijr Sep 1, 2023
0cea35d
chore(community): Fix jira document loader after rebase
mgiorgino-iobeya Nov 13, 2024
9d55261
chore(community): Reduce visibility of access token in JiraProjectLoader
mgiorgino-iobeya Nov 13, 2024
de36468
chore(community): Reformat document loader jira.ts
mgiorgino-iobeya Nov 13, 2024
7635eb3
chore(community): Rename baseUrl → host in jira document loader
mgiorgino-iobeya Nov 15, 2024
7054b18
chore(community): Make Jira document loader more modular (for test)
mgiorgino-iobeya Nov 15, 2024
c7dcdf4
chore(community): Rename JiraProjectLoader arg limit{ → PerRequest}
mgiorgino-iobeya Nov 29, 2024
dd88695
chore(community): Export all types for Jira document loader
mgiorgino-iobeya Nov 29, 2024
0a73618
chore(community): Add tests on jira document loader
mgiorgino-iobeya Nov 29, 2024
359518c
chore(community): Reformat jira document loader
mgiorgino-iobeya Nov 29, 2024
bc1f3b9
chore(community): Add .env examples for jira document loader
mgiorgino-iobeya Nov 29, 2024
e98b141
chore(community): jira doc loader example: host and projectKey as env…
mgiorgino-iobeya Nov 29, 2024
cc18630
feat(community) Add 'createdAfter' filter on jira document loader
mgiorgino-iobeya Dec 2, 2024
195a6d7
feat(community): Add jira document loader filtering on creation date
mgiorgino-iobeya Dec 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
sidebar_class_name: node-only
---

# Jira

:::tip Compatibility
Only available on Node.js.
:::

This covers how to load document objects from issues in a Jira projects.

## Credentials

- You'll need to set up an access token and provide it along with your jira username in order to authenticate the request
- You'll also need the `project key` for the project containing the issues to load as documents.

## Usage

import CodeBlock from "@theme/CodeBlock";
import Example from "@examples/document_loaders/jira.ts";

<CodeBlock language="typescript">{Example}</CodeBlock>
6 changes: 5 additions & 1 deletion examples/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,8 @@ FRIENDLI_TEAM=ADD_YOURS_HERE # https://suite.friendli.ai/
HANA_HOST=HANA_DB_ADDRESS
HANA_PORT=HANA_DB_PORT
HANA_UID=HANA_DB_USER
HANA_PWD=HANA_DB_PASSWORD
HANA_PWD=HANA_DB_PASSWORD
JIRA_HOST=ADD_YOURS_HERE
JIRA_USERNAME=ADD_YOURS_HERE
JIRA_ACCESS_TOKEN=ADD_YOURS_HERE
JIRA_PROJECT_KEY=ADD_YOURS_HERE
26 changes: 26 additions & 0 deletions examples/src/document_loaders/jira.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira";

const host = process.env.JIRA_HOST || 'https://jira.example.com';
const username = process.env.JIRA_USERNAME;
const accessToken = process.env.JIRA_ACCESS_TOKEN;
const projectKey = process.env.JIRA_PROJECT_KEY || 'PROJ';

if (username && accessToken) {
// Created within last 30 days
const createdAfter = new Date();
createdAfter.setDate(createdAfter.getDate() - 30);
const loader = new JiraProjectLoader({
host,
projectKey,
username,
accessToken,
createdAfter
});

const documents = await loader.load();
console.log(`Loaded ${documents.length} Jira document(s)`);
} else {
console.log(
"You must provide a username and access token to run this example."
);
}
4 changes: 4 additions & 0 deletions libs/langchain-community/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
JIRA_HOST=ADD_YOURS_HERE
JIRA_USERNAME=ADD_YOURS_HERE
JIRA_ACCESS_TOKEN=ADD_YOURS_HERE
JIRA_PROJECT_KEY=ADD_YOURS_HERE
4 changes: 4 additions & 0 deletions libs/langchain-community/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,10 @@ document_loaders/web/imsdb.cjs
document_loaders/web/imsdb.js
document_loaders/web/imsdb.d.ts
document_loaders/web/imsdb.d.cts
document_loaders/web/jira.cjs
document_loaders/web/jira.js
document_loaders/web/jira.d.ts
document_loaders/web/jira.d.cts
document_loaders/web/figma.cjs
document_loaders/web/figma.js
document_loaders/web/figma.d.ts
Expand Down
1 change: 1 addition & 0 deletions libs/langchain-community/langchain.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ export const config = {
"document_loaders/web/gitbook": "document_loaders/web/gitbook",
"document_loaders/web/hn": "document_loaders/web/hn",
"document_loaders/web/imsdb": "document_loaders/web/imsdb",
"document_loaders/web/jira": "document_loaders/web/jira",
"document_loaders/web/figma": "document_loaders/web/figma",
"document_loaders/web/firecrawl": "document_loaders/web/firecrawl",
"document_loaders/web/github": "document_loaders/web/github",
Expand Down
15 changes: 14 additions & 1 deletion libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"@cloudflare/workers-types": "^4.20230922.0",
"@datastax/astra-db-ts": "^1.0.1",
"@elastic/elasticsearch": "^8.4.0",
"@faker-js/faker": "^7.6.0",
"@faker-js/faker": "8.4.1",
"@getmetal/metal-sdk": "^4.0.0",
"@getzep/zep-cloud": "^1.0.6",
"@getzep/zep-js": "^0.9.0",
Expand Down Expand Up @@ -2746,6 +2746,15 @@
"import": "./document_loaders/web/imsdb.js",
"require": "./document_loaders/web/imsdb.cjs"
},
"./document_loaders/web/jira": {
"types": {
"import": "./document_loaders/web/jira.d.ts",
"require": "./document_loaders/web/jira.d.cts",
"default": "./document_loaders/web/jira.d.ts"
},
"import": "./document_loaders/web/jira.js",
"require": "./document_loaders/web/jira.cjs"
},
"./document_loaders/web/figma": {
"types": {
"import": "./document_loaders/web/figma.d.ts",
Expand Down Expand Up @@ -4005,6 +4014,10 @@
"document_loaders/web/imsdb.js",
"document_loaders/web/imsdb.d.ts",
"document_loaders/web/imsdb.d.cts",
"document_loaders/web/jira.cjs",
"document_loaders/web/jira.js",
"document_loaders/web/jira.d.ts",
"document_loaders/web/jira.d.cts",
"document_loaders/web/figma.cjs",
"document_loaders/web/figma.js",
"document_loaders/web/figma.d.ts",
Expand Down
209 changes: 209 additions & 0 deletions libs/langchain-community/src/document_loaders/tests/jira.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/**
* NOTE: Env var should be set, and configured project should exist
*/
import { Document } from "@langchain/core/documents";
import { expect, test } from "@jest/globals";
import {
JiraIssue,
JiraProjectLoader,
JiraProjectLoaderParams,
} from "../web/jira.js";

describe("JiraProjectLoader Integration Tests", () => {
const JIRA_HOST = requireEnvVar("JIRA_HOST");
const JIRA_USERNAME = requireEnvVar("JIRA_USERNAME");
const JIRA_ACCESS_TOKEN = requireEnvVar("JIRA_ACCESS_TOKEN");
const JIRA_PROJECT_KEY = requireEnvVar("JIRA_PROJECT_KEY");
const jiraConf: JiraProjectLoaderParams = {
host: JIRA_HOST,
projectKey: JIRA_PROJECT_KEY,
username: JIRA_USERNAME,
accessToken: JIRA_ACCESS_TOKEN,
limitPerRequest: 20,
};

test("should load Jira project issues as documents successfully", async () => {
const docs = await loadJiraDocsUntil((docs) => docs.length > 0);

expect(docs).toBeDefined();
expect(Array.isArray(docs)).toBe(true);

if (docs.length < 1) {
// Skip test if not enough issues available
return;
}
const firstDoc = docs[0];

// Check document structure
expect(firstDoc).toHaveProperty("pageContent");
expect(firstDoc).toHaveProperty("metadata");

// Check metadata
expect(firstDoc.metadata).toHaveProperty("id");
expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST);
expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY);

// Check pageContent contains essential Jira issue information
const content = firstDoc.pageContent;
expect(content).toContain("Issue:");
expect(content).toContain("Project:");
expect(content).toContain("Status:");
expect(content).toContain("Priority:");
expect(content).toContain("Type:");
expect(content).toContain("Creator:");
});

test("should filter issues based on createdAfter date", async () => {
// First load at least 2 issues with different creation dates (ignoring time)
const baseIssues = await loadJiraIssuesUntil(haveTwoDifferentCreationDates);
if (baseIssues.length < 2) {
// Skip test if not enough issues available
return;
}

// Create a map from date string without time to list of issues
const dateToIssueMap = new Map<string, JiraIssue[]>();
baseIssues.forEach((issue) => {
const date = asStringWithoutTime(new Date(issue.fields.created));
dateToIssueMap.set(date, (dateToIssueMap.get(date) ?? []).concat(issue));
});
// Convert map to list of {date, issues}
const issuesGroupedByDate = Array.from(
dateToIssueMap,
([date, issues]) => ({ date, issues })
);
issuesGroupedByDate.sort((a, b) => a.date.localeCompare(b.date));

// Pick middle date to split issues in two groups
const middleIndex = Math.floor(issuesGroupedByDate.length / 2);
const middleDate = new Date(issuesGroupedByDate[middleIndex].date);
const issuesAfterMiddle = issuesGroupedByDate
.slice(middleIndex)
.flatMap(({ issues }) => issues);

// Load issues created after middle date
const loader = new JiraProjectLoader({
...jiraConf,
createdAfter: middleDate,
});

const filteredDocs = await loader.load();

// Verify we got the expected issues
expect(filteredDocs.length).toBeGreaterThan(0);
expect(filteredDocs.length).toBeLessThan(baseIssues.length);

// Verify all returned issues are created after our cutoff date
const middleDateTimestamp = middleDate.getTime();
filteredDocs.forEach((doc) => {
const issueDateString = doc.pageContent
.split("\n")
.filter((line) => /^Created: /.test(line))[0]
.replace("Created: ", "");
const issueDateTimestamp = new Date(
asStringWithoutTime(new Date(issueDateString))
).getTime();
expect(issueDateTimestamp).toBeGreaterThanOrEqual(middleDateTimestamp);
});

// Verify we got the same issues as in our original set
const filteredIds = new Set(filteredDocs.map((d) => d.metadata.id));
const expectedIds = new Set(issuesAfterMiddle.map((issue) => issue.id));
expect(filteredIds).toEqual(expectedIds);
});

test("should handle invalid credentials", async () => {
const loader = new JiraProjectLoader({
...jiraConf,
username: "invalid_username",
accessToken: "invalid_token",
});

const docs = await loader.load();
expect(docs).toEqual([]);
});

test("should handle invalid project key", async () => {
const loader = new JiraProjectLoader({
...jiraConf,
projectKey: "INVALID_PROJECT_KEY",
});

const docs = await loader.load();
expect(docs).toEqual([]);
});

function requireEnvVar(name: string): string {
// eslint-disable-next-line no-process-env
const value = process.env[name];
if (!value) {
throw new Error(`environment variable "${name}" must be set`);
}
return value;
}

function asStringWithoutTime(date: Date): string {
return date.toISOString().split("T")[0];
}

function sameDate(a: Date, b: Date) {
return asStringWithoutTime(a) === asStringWithoutTime(b);
}

function haveTwoDifferentCreationDates(issues: JiraIssue[]): boolean {
return (
issues.length >= 2 &&
issues
.slice(1)
.some(
(issue) =>
!sameDate(
new Date(issue.fields.created),
new Date(issues[0].fields.created)
)
)
);
}

async function loadJiraDocsUntil(predicate: (docs: Document[]) => boolean) {
const load = (createdAfter: Date) =>
new JiraProjectLoader({
...jiraConf,
createdAfter,
}).load();
return loadUntil(load, predicate);
}

async function loadJiraIssuesUntil(
predicate: (docs: JiraIssue[]) => boolean
) {
const load = (createdAfter: Date) =>
new JiraProjectLoader({
...jiraConf,
createdAfter,
}).loadAsIssues();
return loadUntil(load, predicate);
}

async function loadUntil<T>(
loadCreatedAfter: (date: Date) => Promise<T[]>,
predicate: (loaded: T[]) => boolean
): Promise<T[]> {
const now = new Date();
let months = 1;
const maxMonths = 120;

let loaded: T[] = [];
while (!predicate(loaded) && months < maxMonths) {
const createdAfter = new Date(now);
createdAfter.setDate(now.getDate() - months * 30);
loaded = await loadCreatedAfter(createdAfter);
months *= 1.2;
}

if (months >= maxMonths) {
return [];
}
return loaded;
}
});
Loading