diff options
-rw-r--r-- | src/client/views/nodes/ChatBox/Agent.ts | 8 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.tsx | 86 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/tools/RAGTool.ts | 43 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/types.ts | 6 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 47 | ||||
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 180 |
6 files changed, 198 insertions, 172 deletions
diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 210d3c804..bada4b146 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -17,12 +17,12 @@ export class Agent { private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; - private history: () => string; + private _history: () => string; - constructor(_vectorstore: Vectorstore, summaries: () => string, _history: () => string) { + constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; - this.history = _history; + this._history = history; this.tools = { wikipedia: new WikipediaTool(), calculate: new CalculateTool(), @@ -33,7 +33,7 @@ export class Agent { async askAgent(question: string, maxTurns: number = 8): Promise<string> { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); - const chatHistory = this.history(); + const chatHistory = this._history(); console.log(`Chat history: ${chatHistory}`); const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory); console.log(`System prompt: ${systemPrompt}`); diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index a47e9a95b..5d0a16b4f 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -63,11 +63,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id); - this.agent = new Agent( - this.vectorstore, - () => this.summaries, - () => this.formattedHistory - ); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory); reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), @@ -162,18 +158,66 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - const doc_chunk_ids: string[] = JSON.parse(StrCast(doc.chunk_ids)); - if (!doc_chunk_ids.includes(chunk_id)) continue; - const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; - console.log('URL: ' + doc_url); - //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - console.log(doc.data); - //look at context path for each docview and choose the doc view that has as - //its parent the same collection view the chatbox is in - const first_view = Array.from(doc[DocViews])[0]; - first_view.ComponentView?.search?.(citation.direct_text); - }); + console.log(JSON.parse(StrCast(doc.chunk_simpl))); + const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }]; + const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }]; + + const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_text_chunk) { + const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; + console.log('URL: ' + doc_url); + + //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); + } + + const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_image_chunk) { + const location_string: string = found_image_chunk.location; + + // Extract variables from location_string + const values = location_string.replace(/[\[\]]/g, '').split(','); + + // Ensure we have exactly 4 values + if (values.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; // or handle this error as appropriate + } + + const x1 = parseInt(values[0]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); + const y1 = parseInt(values[1]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); + const x2 = parseInt(values[2]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); + const y2 = parseInt(values[3]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); + + // Parse values to numbers + // const [x1, y1, x2, y2] = values.map(Number); + + // Check if any parsing resulted in NaN + if ([x1, y1, x2, y2].some(isNaN)) { + console.error('All values in location string must be valid numbers'); + return; // or handle this error as appropriate + } + + const highlight_doc = Docs.Create.FreeformDocument([], { + x: x1, + y: y1, + _width: x2 - x1, + _height: y2 - y1, + backgroundColor: 'rgba(255, 255, 0, 0.5)', + }); + + Doc.AddDocToList(doc[DocData], Doc.LayoutFieldKey(doc) + '_annotations', highlight_doc); + highlight_doc.annotationOn = doc; + Doc.SetContainer(highlight_doc, doc); + DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {}); + } } // You can implement additional functionality here, such as showing a modal with the full citation content }; @@ -296,9 +340,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { return history; } - retrieveSummaries(): string { + retrieveSummaries = () => { return this.summaries; - } + }; + + retrieveFormattedHistory = () => { + return this.formattedHistory; + }; @action handleFollowUpClick = (question: string) => { diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 90f7bebfe..0a4529974 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -2,6 +2,7 @@ import { BaseTool } from './BaseTool'; import { Vectorstore } from '../vectorstore/VectorstoreUpload'; import { Chunk } from '../types'; import * as fs from 'fs'; +import { Networking } from '../../../../Network'; export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { constructor( @@ -52,42 +53,22 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { async execute(args: { hypothetical_document_chunk: string }): Promise<any> { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); - return this.getFormattedChunks(relevantChunks); + const formatted_chunks = await this.getFormattedChunks(relevantChunks); + return formatted_chunks; } - private getFormattedChunks(relevantChunks: Chunk[]): { type: string; text?: string; image_url?: { url: string } }[] { - const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }]; + async getFormattedChunks(relevantChunks: Chunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> { + try { + const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); - for (const chunk of relevantChunks) { - content.push({ - type: 'text', - text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type === 'image' || chunk.metadata.type === 'table' ? 'image' : 'text'}>`, - }); - - if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { - try { - const imageBuffer = fs.readFileSync(chunk.metadata.file_path); - const base64Image = imageBuffer.toString('base64'); - if (base64Image) { - content.push({ - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${base64Image}`, - }, - }); - } else { - console.log(`Failed to encode image for chunk ${chunk.id}`); - } - } catch (error) { - console.error(`Error reading image file for chunk ${chunk.id}:`, error); - } + if (!formattedChunks) { + throw new Error('Failed to format chunks'); } - content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` }); + return formattedChunks; + } catch (error) { + console.error('Error formatting chunks:', error); + throw error; } - - content.push({ type: 'text', text: '</chunks>' }); - - return content; } } diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index e510837c8..783610d6d 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -50,6 +50,9 @@ export interface Chunk { location: string; start_page: number; end_page: number; + base64_data?: string; + page_width: number; + page_height: number; }; } @@ -93,6 +96,9 @@ export function convertToAIDocument(json: any): AI_Document { location: chunk.metadata.location, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page, + base64_data: chunk.metadata.base64_data ?? undefined, + width: chunk.metadata.width ?? undefined, + height: chunk.metadata.height ?? undefined, }, })); diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index b47e276e7..b3e3f8679 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { Chunk, AI_Document, convertToAIDocument } from '../types'; +import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; @@ -74,24 +74,47 @@ export class Vectorstore { if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') { const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); console.log('Document JSON:', document_json); - const ai_document: AI_Document = convertToAIDocument(document_json); - this.documents.push(ai_document); - await this.indexDocument(ai_document); - console.log(`Document added: ${ai_document.file_name}`); - doc.summary = ai_document.summary; - doc.ai_purpose = ai_document.purpose; + //const ai_document: AI_Document = convertToAIDocument(document_json); + this.documents.push(document_json); + await this.indexDocument(convertToAIDocument(document_json)); + console.log(`Document added: ${document_json.file_name}`); + doc.summary = document_json.summary; + doc.ai_purpose = document_json.purpose; if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { doc.vectorstore_id = JSON.stringify([this.id]); } else { doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id])); } - if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') { - doc.chunk_ids = JSON.stringify([]); + if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') { + doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] }); } - ai_document.chunks.forEach(chunk => { - console.log(doc.chunk_ids); - doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id])); + let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = { + text_chunks: [], + image_chunks: [], + }; + + document_json.chunks.forEach((chunk: Chunk) => { + let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[]; + switch (chunk.metadata.type) { + case CHUNK_TYPE.TEXT: + chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }]; + new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.text_chunks = new_chunk_simpl.text_chunks.concat(chunk_to_add); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + break; + case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: + console.log('Location:', chunk.metadata.location); + console.log('Height:', chunk.metadata.page_height); + console.log('Width:', chunk.metadata.page_width); + chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }]; + new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + break; + } }); + doc.ai_document_status = 'COMPLETED'; } } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index d5a8ebeb3..36468157a 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -8,6 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; import axios from 'axios'; +import { Chunk } from '../../client/views/nodes/ChatBox/types'; export enum Directory { parsed_files = 'parsed_files', @@ -44,112 +45,6 @@ export default class AssistantManager extends ApiManager { register({ method: Method.POST, - subscription: '/uploadPDFToVectorStore', - secureHandler: async ({ req, res }) => { - const { urls, threadID, assistantID, vector_store_id } = req.body; - - const csvFilesIds: string[] = []; - const otherFileIds: string[] = []; - const allFileIds: string[] = []; - - const fileProcesses = urls.map(async (source: string) => { - const fullPath = path.join(publicDirectory, source); - const fileData = await openai.files.create({ file: createReadStream(fullPath), purpose: 'assistants' }); - allFileIds.push(fileData.id); - if (source.endsWith('.csv')) { - console.log(source); - csvFilesIds.push(fileData.id); - } else { - openai.beta.vectorStores.files.create(vector_store_id, { file_id: fileData.id }); - otherFileIds.push(fileData.id); - } - }); - try { - await Promise.all(fileProcesses).then(() => { - res.send({ vector_store_id: vector_store_id, openai_file_ids: allFileIds }); - }); - } catch (error) { - res.status(500).send({ error: 'Failed to process files' + error }); - } - }, - }); - - register({ - method: Method.POST, - subscription: '/downloadFileFromOpenAI', - secureHandler: async ({ req, res }) => { - const { file_id, file_name } = req.body; - //let files_directory: string; - let files_directory = '/files/openAIFiles/'; - switch (file_name.split('.').pop()) { - case 'pdf': - files_directory = '/files/pdfs/'; - break; - case 'csv': - files_directory = '/files/csv/'; - break; - case 'png': - case 'jpg': - case 'jpeg': - files_directory = '/files/images/'; - break; - default: - break; - } - - const directory = path.join(publicDirectory, files_directory); - - if (!fs.existsSync(directory)) { - fs.mkdirSync(directory); - } - const file = await openai.files.content(file_id); - const new_file_name = `${uuid.v4()}-${file_name}`; - const file_path = path.join(directory, new_file_name); - const file_array_buffer = await file.arrayBuffer(); - const bufferView = new Uint8Array(file_array_buffer); - try { - const written_file = await writeFileAsync(file_path, bufferView); - console.log(written_file); - console.log(file_path); - console.log(file_array_buffer); - console.log(bufferView); - const file_object = new File([bufferView], file_name); - //DashUploadUtils.upload(file_object, 'openAIFiles'); - res.send({ file_path: path.join(files_directory, new_file_name) }); - /* res.send( { - source: "file", - result: { - accessPaths: { - agnostic: {client: path.join('/files/openAIFiles/', `${uuid.v4()}-${file_name}`)} - }, - rawText: "", - duration: 0, - }, - } ); */ - } catch (error) { - res.status(500).send({ error: 'Failed to write file' + error }); - } - }, - }); - - register({ - method: Method.POST, - subscription: '/askAgent', - secureHandler: async ({ req, res }) => { - const { input } = req.body; - - try { - const response = await axios.post('http://localhost:8080/ask', { input }); - res.send({ response: response.data.response }); - } catch (error: any) { - console.error('Error communicating with chatbot:', error); - res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); - } - }, - }); - - register({ - method: Method.POST, subscription: '/getWikipediaSummary', secureHandler: async ({ req, res }) => { const { title } = req.body; @@ -212,6 +107,37 @@ export default class AssistantManager extends ApiManager { } } + if (result.chunks && Array.isArray(result.chunks)) { + for (const chunk of result.chunks) { + if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) { + let files_directory = '/files/chunk_images/'; + const directory = path.join(publicDirectory, files_directory); + + if (!fs.existsSync(directory)) { + fs.mkdirSync(directory); + } + + const fileName = path.basename(chunk.metadata.file_path); + const filePath = path.join(directory, fileName); + + // Check if base64_data exists + if (chunk.metadata.base64_data) { + // Decode Base64 and save as file + const buffer = Buffer.from(chunk.metadata.base64_data, 'base64'); + await fs.promises.writeFile(filePath, buffer); + + // Update the file path in the chunk + chunk.metadata.file_path = path.join(files_directory, fileName); + chunk.metadata.base64_data = undefined; + } else { + console.warn(`No base64_data found for chunk: ${fileName}`); + } + } + } + } else { + console.warn("Result does not contain an iterable 'chunks' property"); + } + res.send({ document_json: result }); } catch (error: any) { console.error('Error communicating with chatbot:', error); @@ -219,5 +145,47 @@ export default class AssistantManager extends ApiManager { } }, }); + + register({ + method: Method.POST, + subscription: '/formatChunks', + secureHandler: async ({ req, res }) => { + const { relevantChunks } = req.body; + const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }]; + + for (const chunk of relevantChunks) { + content.push({ + type: 'text', + text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type === 'image' || chunk.metadata.type === 'table' ? 'image' : 'text'}>`, + }); + + if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { + try { + const filePath = serverPathToFile(Directory.parsed_files, chunk.metadata.file_path); + const imageBuffer = await readFileAsync(filePath); + const base64Image = imageBuffer.toString('base64'); + if (base64Image) { + content.push({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${base64Image}`, + }, + }); + } else { + console.log(`Failed to encode image for chunk ${chunk.id}`); + } + } catch (error) { + console.error(`Error reading image file for chunk ${chunk.id}:`, error); + } + } + + content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` }); + } + + content.push({ type: 'text', text: '</chunks>' }); + + res.send({ formattedChunks: content }); + }, + }); } } |