aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/client/views/nodes/ChatBox/Agent.ts8
-rw-r--r--src/client/views/nodes/ChatBox/ChatBox.tsx86
-rw-r--r--src/client/views/nodes/ChatBox/tools/RAGTool.ts43
-rw-r--r--src/client/views/nodes/ChatBox/types.ts6
-rw-r--r--src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts47
-rw-r--r--src/server/ApiManagers/AssistantManager.ts180
6 files changed, 198 insertions, 172 deletions
diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts
index 210d3c804..bada4b146 100644
--- a/src/client/views/nodes/ChatBox/Agent.ts
+++ b/src/client/views/nodes/ChatBox/Agent.ts
@@ -17,12 +17,12 @@ export class Agent {
private messages: AgentMessage[] = [];
private interMessages: AgentMessage[] = [];
private vectorstore: Vectorstore;
- private history: () => string;
+ private _history: () => string;
- constructor(_vectorstore: Vectorstore, summaries: () => string, _history: () => string) {
+ constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string) {
this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
this.vectorstore = _vectorstore;
- this.history = _history;
+ this._history = history;
this.tools = {
wikipedia: new WikipediaTool(),
calculate: new CalculateTool(),
@@ -33,7 +33,7 @@ export class Agent {
async askAgent(question: string, maxTurns: number = 8): Promise<string> {
console.log(`Starting query: ${question}`);
this.messages.push({ role: 'user', content: question });
- const chatHistory = this.history();
+ const chatHistory = this._history();
console.log(`Chat history: ${chatHistory}`);
const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory);
console.log(`System prompt: ${systemPrompt}`);
diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx
index a47e9a95b..5d0a16b4f 100644
--- a/src/client/views/nodes/ChatBox/ChatBox.tsx
+++ b/src/client/views/nodes/ChatBox/ChatBox.tsx
@@ -63,11 +63,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id);
}
this.vectorstore = new Vectorstore(this.vectorstore_id);
- this.agent = new Agent(
- this.vectorstore,
- () => this.summaries,
- () => this.formattedHistory
- );
+ this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory);
reaction(
() => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })),
@@ -162,18 +158,66 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
const currentLinkedDocs: Doc[] = this.linkedDocs;
const chunk_id = citation.chunk_id;
for (let doc of currentLinkedDocs) {
- const doc_chunk_ids: string[] = JSON.parse(StrCast(doc.chunk_ids));
- if (!doc_chunk_ids.includes(chunk_id)) continue;
- const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname;
- console.log('URL: ' + doc_url);
- //const ai_field_id = doc[this.Document[Id] + '_ai_field_id'];
- DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
- console.log(doc.data);
- //look at context path for each docview and choose the doc view that has as
- //its parent the same collection view the chatbox is in
- const first_view = Array.from(doc[DocViews])[0];
- first_view.ComponentView?.search?.(citation.direct_text);
- });
+ console.log(JSON.parse(StrCast(doc.chunk_simpl)));
+ const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }];
+ const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }];
+
+ const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id);
+ if (found_text_chunk) {
+ const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname;
+ console.log('URL: ' + doc_url);
+
+ //const ai_field_id = doc[this.Document[Id] + '_ai_field_id'];
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ console.log(doc.data);
+ //look at context path for each docview and choose the doc view that has as
+ //its parent the same collection view the chatbox is in
+ const first_view = Array.from(doc[DocViews])[0];
+ first_view.ComponentView?.search?.(citation.direct_text);
+ });
+ }
+
+ const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id);
+ if (found_image_chunk) {
+ const location_string: string = found_image_chunk.location;
+
+ // Extract variables from location_string
+ const values = location_string.replace(/[\[\]]/g, '').split(',');
+
+ // Ensure we have exactly 4 values
+ if (values.length !== 4) {
+ console.error('Location string must contain exactly 4 numbers');
+ return; // or handle this error as appropriate
+ }
+
+ const x1 = parseInt(values[0]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width);
+ const y1 = parseInt(values[1]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height);
+ const x2 = parseInt(values[2]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width);
+ const y2 = parseInt(values[3]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height);
+
+ // Parse values to numbers
+ // const [x1, y1, x2, y2] = values.map(Number);
+
+ // Check if any parsing resulted in NaN
+ if ([x1, y1, x2, y2].some(isNaN)) {
+ console.error('All values in location string must be valid numbers');
+ return; // or handle this error as appropriate
+ }
+
+ const highlight_doc = Docs.Create.FreeformDocument([], {
+ x: x1,
+ y: y1,
+ _width: x2 - x1,
+ _height: y2 - y1,
+ backgroundColor: 'rgba(255, 255, 0, 0.5)',
+ });
+
+ Doc.AddDocToList(doc[DocData], Doc.LayoutFieldKey(doc) + '_annotations', highlight_doc);
+ highlight_doc.annotationOn = doc;
+ Doc.SetContainer(highlight_doc, doc);
+ DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {});
+ }
}
// You can implement additional functionality here, such as showing a modal with the full citation content
};
@@ -296,9 +340,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
return history;
}
- retrieveSummaries(): string {
+ retrieveSummaries = () => {
return this.summaries;
- }
+ };
+
+ retrieveFormattedHistory = () => {
+ return this.formattedHistory;
+ };
@action
handleFollowUpClick = (question: string) => {
diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts
index 90f7bebfe..0a4529974 100644
--- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts
+++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts
@@ -2,6 +2,7 @@ import { BaseTool } from './BaseTool';
import { Vectorstore } from '../vectorstore/VectorstoreUpload';
import { Chunk } from '../types';
import * as fs from 'fs';
+import { Networking } from '../../../../Network';
export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> {
constructor(
@@ -52,42 +53,22 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> {
async execute(args: { hypothetical_document_chunk: string }): Promise<any> {
const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
- return this.getFormattedChunks(relevantChunks);
+ const formatted_chunks = await this.getFormattedChunks(relevantChunks);
+ return formatted_chunks;
}
- private getFormattedChunks(relevantChunks: Chunk[]): { type: string; text?: string; image_url?: { url: string } }[] {
- const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }];
+ async getFormattedChunks(relevantChunks: Chunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> {
+ try {
+ const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
- for (const chunk of relevantChunks) {
- content.push({
- type: 'text',
- text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type === 'image' || chunk.metadata.type === 'table' ? 'image' : 'text'}>`,
- });
-
- if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
- try {
- const imageBuffer = fs.readFileSync(chunk.metadata.file_path);
- const base64Image = imageBuffer.toString('base64');
- if (base64Image) {
- content.push({
- type: 'image_url',
- image_url: {
- url: `data:image/jpeg;base64,${base64Image}`,
- },
- });
- } else {
- console.log(`Failed to encode image for chunk ${chunk.id}`);
- }
- } catch (error) {
- console.error(`Error reading image file for chunk ${chunk.id}:`, error);
- }
+ if (!formattedChunks) {
+ throw new Error('Failed to format chunks');
}
- content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` });
+ return formattedChunks;
+ } catch (error) {
+ console.error('Error formatting chunks:', error);
+ throw error;
}
-
- content.push({ type: 'text', text: '</chunks>' });
-
- return content;
}
}
diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts
index e510837c8..783610d6d 100644
--- a/src/client/views/nodes/ChatBox/types.ts
+++ b/src/client/views/nodes/ChatBox/types.ts
@@ -50,6 +50,9 @@ export interface Chunk {
location: string;
start_page: number;
end_page: number;
+ base64_data?: string;
+ page_width: number;
+ page_height: number;
};
}
@@ -93,6 +96,9 @@ export function convertToAIDocument(json: any): AI_Document {
location: chunk.metadata.location,
start_page: chunk.metadata.start_page,
end_page: chunk.metadata.end_page,
+ base64_data: chunk.metadata.base64_data ?? undefined,
+ width: chunk.metadata.width ?? undefined,
+ height: chunk.metadata.height ?? undefined,
},
}));
diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
index b47e276e7..b3e3f8679 100644
--- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
+++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
@@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai';
import { EmbedResponse } from 'cohere-ai/api';
import dotenv from 'dotenv';
-import { Chunk, AI_Document, convertToAIDocument } from '../types';
+import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types';
import { Doc } from '../../../../../fields/Doc';
import { DocData } from '../../../../../fields/DocSymbols';
import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
@@ -74,24 +74,47 @@ export class Vectorstore {
if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') {
const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
console.log('Document JSON:', document_json);
- const ai_document: AI_Document = convertToAIDocument(document_json);
- this.documents.push(ai_document);
- await this.indexDocument(ai_document);
- console.log(`Document added: ${ai_document.file_name}`);
- doc.summary = ai_document.summary;
- doc.ai_purpose = ai_document.purpose;
+ //const ai_document: AI_Document = convertToAIDocument(document_json);
+ this.documents.push(document_json);
+ await this.indexDocument(convertToAIDocument(document_json));
+ console.log(`Document added: ${document_json.file_name}`);
+ doc.summary = document_json.summary;
+ doc.ai_purpose = document_json.purpose;
if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') {
doc.vectorstore_id = JSON.stringify([this.id]);
} else {
doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id]));
}
- if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') {
- doc.chunk_ids = JSON.stringify([]);
+ if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') {
+ doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] });
}
- ai_document.chunks.forEach(chunk => {
- console.log(doc.chunk_ids);
- doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id]));
+ let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = {
+ text_chunks: [],
+ image_chunks: [],
+ };
+
+ document_json.chunks.forEach((chunk: Chunk) => {
+ let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[];
+ switch (chunk.metadata.type) {
+ case CHUNK_TYPE.TEXT:
+ chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }];
+ new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ new_chunk_simpl.text_chunks = new_chunk_simpl.text_chunks.concat(chunk_to_add);
+ doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+ break;
+ case CHUNK_TYPE.IMAGE:
+ case CHUNK_TYPE.TABLE:
+ console.log('Location:', chunk.metadata.location);
+ console.log('Height:', chunk.metadata.page_height);
+ console.log('Width:', chunk.metadata.page_width);
+ chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }];
+ new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add);
+ doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+ break;
+ }
});
+
doc.ai_document_status = 'COMPLETED';
}
}
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index d5a8ebeb3..36468157a 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -8,6 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData';
import { Method } from '../RouteManager';
import ApiManager, { Registration } from './ApiManager';
import axios from 'axios';
+import { Chunk } from '../../client/views/nodes/ChatBox/types';
export enum Directory {
parsed_files = 'parsed_files',
@@ -44,112 +45,6 @@ export default class AssistantManager extends ApiManager {
register({
method: Method.POST,
- subscription: '/uploadPDFToVectorStore',
- secureHandler: async ({ req, res }) => {
- const { urls, threadID, assistantID, vector_store_id } = req.body;
-
- const csvFilesIds: string[] = [];
- const otherFileIds: string[] = [];
- const allFileIds: string[] = [];
-
- const fileProcesses = urls.map(async (source: string) => {
- const fullPath = path.join(publicDirectory, source);
- const fileData = await openai.files.create({ file: createReadStream(fullPath), purpose: 'assistants' });
- allFileIds.push(fileData.id);
- if (source.endsWith('.csv')) {
- console.log(source);
- csvFilesIds.push(fileData.id);
- } else {
- openai.beta.vectorStores.files.create(vector_store_id, { file_id: fileData.id });
- otherFileIds.push(fileData.id);
- }
- });
- try {
- await Promise.all(fileProcesses).then(() => {
- res.send({ vector_store_id: vector_store_id, openai_file_ids: allFileIds });
- });
- } catch (error) {
- res.status(500).send({ error: 'Failed to process files' + error });
- }
- },
- });
-
- register({
- method: Method.POST,
- subscription: '/downloadFileFromOpenAI',
- secureHandler: async ({ req, res }) => {
- const { file_id, file_name } = req.body;
- //let files_directory: string;
- let files_directory = '/files/openAIFiles/';
- switch (file_name.split('.').pop()) {
- case 'pdf':
- files_directory = '/files/pdfs/';
- break;
- case 'csv':
- files_directory = '/files/csv/';
- break;
- case 'png':
- case 'jpg':
- case 'jpeg':
- files_directory = '/files/images/';
- break;
- default:
- break;
- }
-
- const directory = path.join(publicDirectory, files_directory);
-
- if (!fs.existsSync(directory)) {
- fs.mkdirSync(directory);
- }
- const file = await openai.files.content(file_id);
- const new_file_name = `${uuid.v4()}-${file_name}`;
- const file_path = path.join(directory, new_file_name);
- const file_array_buffer = await file.arrayBuffer();
- const bufferView = new Uint8Array(file_array_buffer);
- try {
- const written_file = await writeFileAsync(file_path, bufferView);
- console.log(written_file);
- console.log(file_path);
- console.log(file_array_buffer);
- console.log(bufferView);
- const file_object = new File([bufferView], file_name);
- //DashUploadUtils.upload(file_object, 'openAIFiles');
- res.send({ file_path: path.join(files_directory, new_file_name) });
- /* res.send( {
- source: "file",
- result: {
- accessPaths: {
- agnostic: {client: path.join('/files/openAIFiles/', `${uuid.v4()}-${file_name}`)}
- },
- rawText: "",
- duration: 0,
- },
- } ); */
- } catch (error) {
- res.status(500).send({ error: 'Failed to write file' + error });
- }
- },
- });
-
- register({
- method: Method.POST,
- subscription: '/askAgent',
- secureHandler: async ({ req, res }) => {
- const { input } = req.body;
-
- try {
- const response = await axios.post('http://localhost:8080/ask', { input });
- res.send({ response: response.data.response });
- } catch (error: any) {
- console.error('Error communicating with chatbot:', error);
- res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message });
- }
- },
- });
-
- register({
- method: Method.POST,
subscription: '/getWikipediaSummary',
secureHandler: async ({ req, res }) => {
const { title } = req.body;
@@ -212,6 +107,37 @@ export default class AssistantManager extends ApiManager {
}
}
+ if (result.chunks && Array.isArray(result.chunks)) {
+ for (const chunk of result.chunks) {
+ if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) {
+ let files_directory = '/files/chunk_images/';
+ const directory = path.join(publicDirectory, files_directory);
+
+ if (!fs.existsSync(directory)) {
+ fs.mkdirSync(directory);
+ }
+
+ const fileName = path.basename(chunk.metadata.file_path);
+ const filePath = path.join(directory, fileName);
+
+ // Check if base64_data exists
+ if (chunk.metadata.base64_data) {
+ // Decode Base64 and save as file
+ const buffer = Buffer.from(chunk.metadata.base64_data, 'base64');
+ await fs.promises.writeFile(filePath, buffer);
+
+ // Update the file path in the chunk
+ chunk.metadata.file_path = path.join(files_directory, fileName);
+ chunk.metadata.base64_data = undefined;
+ } else {
+ console.warn(`No base64_data found for chunk: ${fileName}`);
+ }
+ }
+ }
+ } else {
+ console.warn("Result does not contain an iterable 'chunks' property");
+ }
+
res.send({ document_json: result });
} catch (error: any) {
console.error('Error communicating with chatbot:', error);
@@ -219,5 +145,47 @@ export default class AssistantManager extends ApiManager {
}
},
});
+
+ register({
+ method: Method.POST,
+ subscription: '/formatChunks',
+ secureHandler: async ({ req, res }) => {
+ const { relevantChunks } = req.body;
+ const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }];
+
+ for (const chunk of relevantChunks) {
+ content.push({
+ type: 'text',
+ text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type === 'image' || chunk.metadata.type === 'table' ? 'image' : 'text'}>`,
+ });
+
+ if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
+ try {
+ const filePath = serverPathToFile(Directory.parsed_files, chunk.metadata.file_path);
+ const imageBuffer = await readFileAsync(filePath);
+ const base64Image = imageBuffer.toString('base64');
+ if (base64Image) {
+ content.push({
+ type: 'image_url',
+ image_url: {
+ url: `data:image/jpeg;base64,${base64Image}`,
+ },
+ });
+ } else {
+ console.log(`Failed to encode image for chunk ${chunk.id}`);
+ }
+ } catch (error) {
+ console.error(`Error reading image file for chunk ${chunk.id}:`, error);
+ }
+ }
+
+ content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` });
+ }
+
+ content.push({ type: 'text', text: '</chunks>' });
+
+ res.send({ formattedChunks: content });
+ },
+ });
}
}