-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(community): Add AirtableLoader to load documents from Airtable w…
…ith retry and pagination handling (#7106) Co-authored-by: Jacob Lee <[email protected]>
- Loading branch information
1 parent
306f31e
commit 18b1810
Showing
9 changed files
with
504 additions
and
5 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
docs/core_docs/docs/integrations/document_loaders/web_loaders/airtable.mdx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
--- | ||
hide_table_of_contents: true | ||
--- | ||
|
||
import loadExample from "@examples/document_loaders/airtable_load"; | ||
import CodeBlock from "@theme/CodeBlock"; | ||
|
||
# AirtableLoader | ||
|
||
The `AirtableLoader` class provides functionality to load documents from Airtable tables. It supports two main methods: | ||
|
||
1. `load()`: Retrieves all records at once, ideal for small to moderate datasets. | ||
2. `loadLazy()`: Fetches records one by one, which is more memory-efficient for large datasets. | ||
|
||
## Prerequisites | ||
|
||
Ensure that your Airtable API token is available as an environment variable: | ||
|
||
```typescript | ||
process.env.AIRTABLE_API_TOKEN = "YOUR_AIRTABLE_API_TOKEN"; | ||
``` | ||
|
||
## Usage | ||
|
||
<CodeBlock language="typescript">{loadExample}</CodeBlock> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import { AirtableLoader } from "@langchain/community/document_loaders/web/airtable"; | ||
import { Document } from "@langchain/core/documents"; | ||
|
||
// Default airtable loader | ||
const loader = new AirtableLoader({ | ||
tableId: "YOUR_TABLE_ID", | ||
baseId: "YOUR_BASE_ID", | ||
}); | ||
|
||
try { | ||
const documents: Document[] = await loader.load(); | ||
console.log("Loaded documents:", documents); | ||
} catch (error) { | ||
console.error("Error loading documents:", error); | ||
} | ||
|
||
// Lazy airtable loader | ||
const loaderLazy = new AirtableLoader({ | ||
tableId: "YOUR_TABLE_ID", | ||
baseId: "YOUR_BASE_ID", | ||
}); | ||
|
||
try { | ||
console.log("Lazily loading documents:"); | ||
for await (const document of loader.loadLazy()) { | ||
console.log("Loaded document:", document); | ||
} | ||
} catch (error) { | ||
console.error("Error loading documents lazily:", error); | ||
} | ||
|
||
// Airtable loader with specific view | ||
const loaderView = new AirtableLoader({ | ||
tableId: "YOUR_TABLE_ID", | ||
baseId: "YOUR_BASE_ID", | ||
kwargs: { view: "YOUR_VIEW_NAME" }, | ||
}); | ||
|
||
try { | ||
const documents: Document[] = await loader.load(); | ||
console.log("Loaded documents with view:", documents); | ||
} catch (error) { | ||
console.error("Error loading documents with view:", error); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
libs/langchain-community/src/document_loaders/tests/airtable.int.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/** | ||
* NOTE: AIRTABLE_API_TOKEN should be set in environment variables | ||
*/ | ||
import { Document } from "@langchain/core/documents"; | ||
import { AirtableLoader } from "../web/airtable.js"; | ||
|
||
describe("AirtableLoader Integration Tests", () => { | ||
// Ensure that the environment variables are set | ||
|
||
const baseId = "BASE_ID"; | ||
const tableId = "TABLE_ID"; | ||
|
||
// Integration tests for the load method | ||
describe("load", () => { | ||
it("should load documents from Airtable", async () => { | ||
const loader = new AirtableLoader({ tableId, baseId }); | ||
|
||
const documents = await loader.load(); | ||
|
||
expect(documents).toBeDefined(); | ||
expect(documents.length).toBeGreaterThan(0); | ||
|
||
documents.forEach((doc) => { | ||
expect(doc).toBeInstanceOf(Document); | ||
expect(doc.pageContent).toBeDefined(); | ||
expect(doc.metadata).toMatchObject({ | ||
source: `${baseId}_${tableId}`, | ||
base_id: baseId, | ||
table_id: tableId, | ||
}); | ||
}); | ||
}, 20000); | ||
}); | ||
|
||
// Integration tests for the loadLazy method | ||
describe("loadLazy", () => { | ||
it("should lazily load documents from Airtable", async () => { | ||
const loader = new AirtableLoader({ tableId, baseId }); | ||
|
||
const documents: Document[] = []; | ||
for await (const doc of loader.loadLazy()) { | ||
documents.push(doc); | ||
} | ||
|
||
expect(documents).toBeDefined(); | ||
expect(documents.length).toBeGreaterThan(0); | ||
|
||
documents.forEach((doc) => { | ||
expect(doc).toBeInstanceOf(Document); | ||
expect(doc.pageContent).toBeDefined(); | ||
expect(doc.metadata).toMatchObject({ | ||
source: `${baseId}_${tableId}`, | ||
base_id: baseId, | ||
table_id: tableId, | ||
}); | ||
}); | ||
}, 20000); | ||
}); | ||
}); |
177 changes: 177 additions & 0 deletions
177
libs/langchain-community/src/document_loaders/tests/airtable.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
/* eslint-disable @typescript-eslint/no-explicit-any */ | ||
/* eslint-disable no-process-env */ | ||
import { Document } from "@langchain/core/documents"; | ||
import { expect, jest } from "@jest/globals"; | ||
import { AirtableLoader } from "../web/airtable.js"; | ||
|
||
// Mock the global fetch function | ||
(global as any).fetch = jest.fn(); | ||
|
||
describe("AirtableLoader", () => { | ||
beforeEach(() => { | ||
jest.clearAllMocks(); | ||
process.env.AIRTABLE_API_TOKEN = "foobar"; | ||
}); | ||
|
||
// Tests for the load method | ||
describe("load", () => { | ||
it("should load documents correctly", async () => { | ||
const loader = new AirtableLoader({ | ||
tableId: "tableId", | ||
baseId: "baseId", | ||
kwargs: { view: "test-view" }, | ||
}); | ||
|
||
// Spy on the private fetchRecords method | ||
const mockFetchRecords = jest.spyOn(loader as any, "fetchRecords"); | ||
|
||
// Mock data to be returned by fetchRecords | ||
const mockRecords = [ | ||
{ | ||
id: "rec1", | ||
fields: { Name: "Record 1" }, | ||
createdTime: "2021-01-01T00:00:00.000Z", | ||
}, | ||
{ | ||
id: "rec2", | ||
fields: { Name: "Record 2" }, | ||
createdTime: "2021-01-02T00:00:00.000Z", | ||
}, | ||
]; | ||
|
||
// Mock the resolved value of fetchRecords | ||
mockFetchRecords.mockResolvedValue({ records: mockRecords }); | ||
|
||
const documents = await loader.load(); | ||
|
||
expect(documents).toHaveLength(2); | ||
expect(documents[0].pageContent).toBe(JSON.stringify(mockRecords[0])); | ||
expect(documents[1].pageContent).toBe(JSON.stringify(mockRecords[1])); | ||
expect(mockFetchRecords).toHaveBeenCalledTimes(1); | ||
}); | ||
|
||
it("should handle pagination correctly", async () => { | ||
const loader = new AirtableLoader({ | ||
tableId: "tableId", | ||
baseId: "baseId", | ||
}); | ||
|
||
const mockFetchRecords = jest.spyOn(loader as any, "fetchRecords"); | ||
const mockRecordsPage1 = [ | ||
{ | ||
id: "rec1", | ||
fields: { Name: "Record 1" }, | ||
createdTime: "2021-01-01T00:00:00.000Z", | ||
}, | ||
]; | ||
const mockRecordsPage2 = [ | ||
{ | ||
id: "rec2", | ||
fields: { Name: "Record 2" }, | ||
createdTime: "2021-01-02T00:00:00.000Z", | ||
}, | ||
]; | ||
|
||
// Mock fetchRecords to simulate pagination | ||
mockFetchRecords | ||
.mockResolvedValueOnce({ | ||
records: mockRecordsPage1, | ||
offset: "next-page", | ||
}) | ||
.mockResolvedValueOnce({ records: mockRecordsPage2 }); | ||
|
||
const documents = await loader.load(); | ||
|
||
expect(documents).toHaveLength(2); | ||
expect(documents[0].pageContent).toBe( | ||
JSON.stringify(mockRecordsPage1[0]) | ||
); | ||
expect(documents[1].pageContent).toBe( | ||
JSON.stringify(mockRecordsPage2[0]) | ||
); | ||
expect(mockFetchRecords).toHaveBeenCalledTimes(2); | ||
}); | ||
|
||
it("should retry fetchRecords on failure", async () => { | ||
const loader = new AirtableLoader({ | ||
tableId: "tableId", | ||
baseId: "baseId", | ||
}); | ||
|
||
const mockFetchRecords = jest.spyOn(loader as any, "fetchRecords"); | ||
const mockError = new Error("Network Error"); | ||
const mockRecords = [ | ||
{ | ||
id: "rec1", | ||
fields: { Name: "Record 1" }, | ||
createdTime: "2021-01-01T00:00:00.000Z", | ||
}, | ||
]; | ||
|
||
// Simulate a failure on the first call and success on the second | ||
mockFetchRecords | ||
.mockRejectedValueOnce(mockError) | ||
.mockResolvedValueOnce({ records: mockRecords }); | ||
|
||
const documents = await loader.load(); | ||
|
||
expect(documents).toHaveLength(1); | ||
expect(documents[0].pageContent).toBe(JSON.stringify(mockRecords[0])); | ||
expect(mockFetchRecords).toHaveBeenCalledTimes(2); | ||
}); | ||
}); | ||
|
||
// Tests for the loadLazy method | ||
describe("loadLazy", () => { | ||
it("should yield documents correctly", async () => { | ||
const loader = new AirtableLoader({ | ||
tableId: "tableId", | ||
baseId: "baseId", | ||
}); | ||
|
||
const mockFetchRecords = jest.spyOn(loader as any, "fetchRecords"); | ||
const mockRecords = [ | ||
{ | ||
id: "rec1", | ||
fields: { Name: "Record 1" }, | ||
createdTime: "2021-01-01T00:00:00.000Z", | ||
}, | ||
{ | ||
id: "rec2", | ||
fields: { Name: "Record 2" }, | ||
createdTime: "2021-01-02T00:00:00.000Z", | ||
}, | ||
]; | ||
|
||
mockFetchRecords.mockResolvedValue({ records: mockRecords }); | ||
|
||
const documents: Document[] = []; | ||
for await (const doc of loader.loadLazy()) { | ||
documents.push(doc); | ||
} | ||
|
||
expect(documents).toHaveLength(2); | ||
expect(documents[0].pageContent).toBe(JSON.stringify(mockRecords[0])); | ||
expect(documents[1].pageContent).toBe(JSON.stringify(mockRecords[1])); | ||
expect(mockFetchRecords).toHaveBeenCalledTimes(1); | ||
}); | ||
|
||
it("should handle errors in loadLazy", async () => { | ||
const loader = new AirtableLoader({ | ||
tableId: "tableId", | ||
baseId: "baseId", | ||
}); | ||
|
||
const mockFetchRecords = jest.spyOn(loader as any, "fetchRecords"); | ||
const mockError = new Error("Network Error"); | ||
|
||
mockFetchRecords.mockRejectedValue(mockError); | ||
|
||
const iterator = loader.loadLazy(); | ||
await expect(iterator.next()).rejects.toThrow( | ||
"Failed to load Airtable records lazily" | ||
); | ||
expect(mockFetchRecords).toHaveBeenCalled(); | ||
}); | ||
}); | ||
}); |
Oops, something went wrong.