From 0c8001c61a55540cdeeb6ae249fdd2835580121c Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 15 Aug 2024 08:47:46 -0400 Subject: currently works --- src/client/views/nodes/ChatBox/Agent.ts | 1 + src/client/views/nodes/ChatBox/ChatBox.tsx | 2 +- .../views/nodes/ChatBox/tools/DataAnalysisTool.ts | 30 ++++++++++++++ src/client/views/nodes/ChatBox/tools/RAGTool.ts | 4 +- src/client/views/nodes/ChatBox/types.ts | 15 ++++++- .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 47 +++++++++++----------- src/server/ApiManagers/AssistantManager.ts | 2 +- 7 files changed, 71 insertions(+), 30 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 7b3703449..69b83c1b5 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -75,6 +75,7 @@ export class Agent { console.log(`Action: ${currentAction}`); if (this.tools[currentAction]) { i++; + console.log(builder.build({ action_rules: this.tools[currentAction].getActionRule(true) })); const nextPrompt = [ { type: 'text', diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 13c418b32..56c1e37f8 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,7 +11,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType, TEXT_TYPE } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE } from './types'; import { Vectorstore } from './vectorstore/Vectorstore'; import { Agent } from './Agent'; import dotenv from 'dotenv'; diff --git a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts new file mode 100644 index 000000000..d2edc4847 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts @@ -0,0 +1,30 @@ +import { BaseTool } from './BaseTool'; + +export class DataAnalysisTool extends BaseTool<{ csv_file_name: string }> { + private csv_files_function: () => { [filename: string]: string }; + constructor(csv_files: () => { [filename: string]: string }) { + super( + 'dataAnalysis', + 'Analyzes, and provides insights, from a CSV file', + { + csv_file_name: { + type: 'string', + description: 'Name of the CSV file to analyze', + required: 'true', + }, + }, + 'Provide the name of the CSV file to analyze based on the user query and whichever available CSV file may be relevant.', + 'Provides the full CSV file text for your analysis based on the user query and the available CSV file. ' + ); + this.csv_files_function = csv_files; + } + + getFileContent(filename: string): string | undefined { + const files = this.csv_files_function(); + return files[filename]; + } + + async execute(args: { csv_file_name: string }): Promise { + return [{ type: 'text', text: this.getFileContent(args.csv_file_name) }]; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index be591fa9a..26fa2adc5 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -1,6 +1,6 @@ import { BaseTool } from './BaseTool'; import { Vectorstore } from '../vectorstore/Vectorstore'; -import { Chunk } from '../types'; +import { RAGChunk } from '../types'; import * as fs from 'fs'; import { Networking } from '../../../../Network'; import { file } from 'jszip'; @@ -117,7 +117,7 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { return formatted_chunks; } - async getFormattedChunks(relevantChunks: Chunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> { + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> { try { const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index bc3585a5b..4a0a9cfce 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -15,6 +15,7 @@ export enum CHUNK_TYPE { TEXT = 'text', IMAGE = 'image', TABLE = 'table', + URL = 'url', } export function getChunkType(type: string): CHUNK_TYPE { @@ -25,6 +26,8 @@ export function getChunkType(type: string): CHUNK_TYPE { return CHUNK_TYPE.IMAGE; case 'table': return CHUNK_TYPE.TABLE; + case 'url': + return CHUNK_TYPE.URL; default: return CHUNK_TYPE.TEXT; } @@ -51,7 +54,7 @@ export interface Citation { citation_id: string; } -export interface Chunk { +export interface RAGChunk { id: string; values: number[]; metadata: { @@ -69,12 +72,20 @@ export interface Chunk { }; } +export interface SimplifiedChunk { + chunkId: string; + startPage: number; + endPage: number; + location?: string; + chunkType: CHUNK_TYPE; +} + export interface AI_Document { purpose: string; file_name: string; num_pages: number; summary: string; - chunks: Chunk[]; + chunks: RAGChunk[]; type: string; } diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index 25aec751f..8e7be6eec 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -4,7 +4,7 @@ import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; import axios from 'axios'; -import { Chunk, AI_Document, CHUNK_TYPE } from '../types'; +import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; @@ -44,7 +44,7 @@ export class Vectorstore { if (!indexList.indexes?.some(index => index.name === this.indexName)) { await this.pinecone.createIndex({ name: this.indexName, - dimension: 768, + dimension: 1024, metric: 'cosine', spec: { serverless: { @@ -98,7 +98,7 @@ export class Vectorstore { image_chunks: [], }; - document_json.chunks.forEach((chunk: Chunk) => { + document_json.chunks.forEach((chunk: RAGChunk) => { let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number }[]; switch (chunk.metadata.type) { case CHUNK_TYPE.TEXT: @@ -125,7 +125,7 @@ export class Vectorstore { private async indexDocument(document: any) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord[] = (document.chunks as Chunk[]).map( + const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map( chunk => ({ id: chunk.id, @@ -136,29 +136,28 @@ export class Vectorstore { await this.index.upsert(pineconeRecords); } - async retrieve(query: string, topK: number = 10): Promise { + async retrieve(query: string, topK: number = 10): Promise { console.log(`Retrieving chunks for query: ${query}`); try { - const url = 'https://api.jina.ai/v1/embeddings'; - const headers = { - 'Content-Type': 'application/json', - Authorization: `Bearer ${process.env.JINA_API_KEY}`, - }; - const data = { - model: 'jina-clip-v1', - normalized: true, - embedding_type: 'float', - input: [{ text: query }], - }; - - const response = await axios.post(url, data, { headers }); - const embeddings = response.data?.data?.[0]?.embedding; - - if (!embeddings || !Array.isArray(embeddings)) { - throw new Error('Invalid embedding response format from Jina API'); + const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + let queryEmbedding: number[]; + + if (Array.isArray(queryEmbeddingResponse.embeddings)) { + queryEmbedding = queryEmbeddingResponse.embeddings[0]; + } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { + queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; + } else { + throw new Error('Invalid embedding response format'); } - const queryEmbedding = embeddings; + if (!Array.isArray(queryEmbedding)) { + throw new Error('Query embedding is not an array'); + } const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, @@ -176,7 +175,7 @@ export class Vectorstore { id: match.id, values: match.values as number[], metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number }, - }) as Chunk + }) as RAGChunk ); } catch (error) { console.error(`Error retrieving chunks: ${error}`); diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index f69ca1383..91185e042 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -8,7 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; import axios from 'axios'; -import { Chunk } from '../../client/views/nodes/ChatBox/types'; +import { RAGChunk } from '../../client/views/nodes/ChatBox/types'; import { UnstructuredClient } from 'unstructured-client'; import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; -- cgit v1.2.3-70-g09d2