aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/client/views/nodes/ChatBox/Agent.ts1
-rw-r--r--src/client/views/nodes/ChatBox/ChatBox.tsx2
-rw-r--r--src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts30
-rw-r--r--src/client/views/nodes/ChatBox/tools/RAGTool.ts4
-rw-r--r--src/client/views/nodes/ChatBox/types.ts15
-rw-r--r--src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts47
-rw-r--r--src/server/ApiManagers/AssistantManager.ts2
7 files changed, 71 insertions, 30 deletions
diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts
index 7b3703449..69b83c1b5 100644
--- a/src/client/views/nodes/ChatBox/Agent.ts
+++ b/src/client/views/nodes/ChatBox/Agent.ts
@@ -75,6 +75,7 @@ export class Agent {
console.log(`Action: ${currentAction}`);
if (this.tools[currentAction]) {
i++;
+ console.log(builder.build({ action_rules: this.tools[currentAction].getActionRule(true) }));
const nextPrompt = [
{
type: 'text',
diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx
index 13c418b32..56c1e37f8 100644
--- a/src/client/views/nodes/ChatBox/ChatBox.tsx
+++ b/src/client/views/nodes/ChatBox/ChatBox.tsx
@@ -11,7 +11,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent';
import { FieldView, FieldViewProps } from '../FieldView';
import './ChatBox.scss';
import MessageComponentBox from './MessageComponent';
-import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType, TEXT_TYPE } from './types';
+import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE } from './types';
import { Vectorstore } from './vectorstore/Vectorstore';
import { Agent } from './Agent';
import dotenv from 'dotenv';
diff --git a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts
new file mode 100644
index 000000000..d2edc4847
--- /dev/null
+++ b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts
@@ -0,0 +1,30 @@
+import { BaseTool } from './BaseTool';
+
+export class DataAnalysisTool extends BaseTool<{ csv_file_name: string }> {
+ private csv_files_function: () => { [filename: string]: string };
+ constructor(csv_files: () => { [filename: string]: string }) {
+ super(
+ 'dataAnalysis',
+ 'Analyzes, and provides insights, from a CSV file',
+ {
+ csv_file_name: {
+ type: 'string',
+ description: 'Name of the CSV file to analyze',
+ required: 'true',
+ },
+ },
+ 'Provide the name of the CSV file to analyze based on the user query and whichever available CSV file may be relevant.',
+ 'Provides the full CSV file text for your analysis based on the user query and the available CSV file. '
+ );
+ this.csv_files_function = csv_files;
+ }
+
+ getFileContent(filename: string): string | undefined {
+ const files = this.csv_files_function();
+ return files[filename];
+ }
+
+ async execute(args: { csv_file_name: string }): Promise<any> {
+ return [{ type: 'text', text: this.getFileContent(args.csv_file_name) }];
+ }
+}
diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts
index be591fa9a..26fa2adc5 100644
--- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts
+++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts
@@ -1,6 +1,6 @@
import { BaseTool } from './BaseTool';
import { Vectorstore } from '../vectorstore/Vectorstore';
-import { Chunk } from '../types';
+import { RAGChunk } from '../types';
import * as fs from 'fs';
import { Networking } from '../../../../Network';
import { file } from 'jszip';
@@ -117,7 +117,7 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> {
return formatted_chunks;
}
- async getFormattedChunks(relevantChunks: Chunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> {
+ async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> {
try {
const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts
index bc3585a5b..4a0a9cfce 100644
--- a/src/client/views/nodes/ChatBox/types.ts
+++ b/src/client/views/nodes/ChatBox/types.ts
@@ -15,6 +15,7 @@ export enum CHUNK_TYPE {
TEXT = 'text',
IMAGE = 'image',
TABLE = 'table',
+ URL = 'url',
}
export function getChunkType(type: string): CHUNK_TYPE {
@@ -25,6 +26,8 @@ export function getChunkType(type: string): CHUNK_TYPE {
return CHUNK_TYPE.IMAGE;
case 'table':
return CHUNK_TYPE.TABLE;
+ case 'url':
+ return CHUNK_TYPE.URL;
default:
return CHUNK_TYPE.TEXT;
}
@@ -51,7 +54,7 @@ export interface Citation {
citation_id: string;
}
-export interface Chunk {
+export interface RAGChunk {
id: string;
values: number[];
metadata: {
@@ -69,12 +72,20 @@ export interface Chunk {
};
}
+export interface SimplifiedChunk {
+ chunkId: string;
+ startPage: number;
+ endPage: number;
+ location?: string;
+ chunkType: CHUNK_TYPE;
+}
+
export interface AI_Document {
purpose: string;
file_name: string;
num_pages: number;
summary: string;
- chunks: Chunk[];
+ chunks: RAGChunk[];
type: string;
}
diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
index 25aec751f..8e7be6eec 100644
--- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
@@ -4,7 +4,7 @@ import { EmbedResponse } from 'cohere-ai/api';
import dotenv from 'dotenv';
import axios from 'axios';
-import { Chunk, AI_Document, CHUNK_TYPE } from '../types';
+import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types';
import { Doc } from '../../../../../fields/Doc';
import { DocData } from '../../../../../fields/DocSymbols';
import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
@@ -44,7 +44,7 @@ export class Vectorstore {
if (!indexList.indexes?.some(index => index.name === this.indexName)) {
await this.pinecone.createIndex({
name: this.indexName,
- dimension: 768,
+ dimension: 1024,
metric: 'cosine',
spec: {
serverless: {
@@ -98,7 +98,7 @@ export class Vectorstore {
image_chunks: [],
};
- document_json.chunks.forEach((chunk: Chunk) => {
+ document_json.chunks.forEach((chunk: RAGChunk) => {
let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number }[];
switch (chunk.metadata.type) {
case CHUNK_TYPE.TEXT:
@@ -125,7 +125,7 @@ export class Vectorstore {
private async indexDocument(document: any) {
console.log('Uploading vectors to content namespace...');
- const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as Chunk[]).map(
+ const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as RAGChunk[]).map(
chunk =>
({
id: chunk.id,
@@ -136,29 +136,28 @@ export class Vectorstore {
await this.index.upsert(pineconeRecords);
}
- async retrieve(query: string, topK: number = 10): Promise<Chunk[]> {
+ async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
console.log(`Retrieving chunks for query: ${query}`);
try {
- const url = 'https://api.jina.ai/v1/embeddings';
- const headers = {
- 'Content-Type': 'application/json',
- Authorization: `Bearer ${process.env.JINA_API_KEY}`,
- };
- const data = {
- model: 'jina-clip-v1',
- normalized: true,
- embedding_type: 'float',
- input: [{ text: query }],
- };
-
- const response = await axios.post(url, data, { headers });
- const embeddings = response.data?.data?.[0]?.embedding;
-
- if (!embeddings || !Array.isArray(embeddings)) {
- throw new Error('Invalid embedding response format from Jina API');
+ const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
+ texts: [query],
+ model: 'embed-english-v3.0',
+ inputType: 'search_query',
+ });
+
+ let queryEmbedding: number[];
+
+ if (Array.isArray(queryEmbeddingResponse.embeddings)) {
+ queryEmbedding = queryEmbeddingResponse.embeddings[0];
+ } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
+ queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
+ } else {
+ throw new Error('Invalid embedding response format');
}
- const queryEmbedding = embeddings;
+ if (!Array.isArray(queryEmbedding)) {
+ throw new Error('Query embedding is not an array');
+ }
const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({
vector: queryEmbedding,
@@ -176,7 +175,7 @@ export class Vectorstore {
id: match.id,
values: match.values as number[],
metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number },
- }) as Chunk
+ }) as RAGChunk
);
} catch (error) {
console.error(`Error retrieving chunks: ${error}`);
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index f69ca1383..91185e042 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -8,7 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData';
import { Method } from '../RouteManager';
import ApiManager, { Registration } from './ApiManager';
import axios from 'axios';
-import { Chunk } from '../../client/views/nodes/ChatBox/types';
+import { RAGChunk } from '../../client/views/nodes/ChatBox/types';
import { UnstructuredClient } from 'unstructured-client';
import { PartitionResponse } from 'unstructured-client/sdk/models/operations';
import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared';