aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/ChatBox/vectorstore
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/ChatBox/vectorstore')
-rw-r--r--src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts47
1 files changed, 35 insertions, 12 deletions
diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
index b47e276e7..b3e3f8679 100644
--- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
+++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
@@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai';
import { EmbedResponse } from 'cohere-ai/api';
import dotenv from 'dotenv';
-import { Chunk, AI_Document, convertToAIDocument } from '../types';
+import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types';
import { Doc } from '../../../../../fields/Doc';
import { DocData } from '../../../../../fields/DocSymbols';
import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
@@ -74,24 +74,47 @@ export class Vectorstore {
if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') {
const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
console.log('Document JSON:', document_json);
- const ai_document: AI_Document = convertToAIDocument(document_json);
- this.documents.push(ai_document);
- await this.indexDocument(ai_document);
- console.log(`Document added: ${ai_document.file_name}`);
- doc.summary = ai_document.summary;
- doc.ai_purpose = ai_document.purpose;
+ //const ai_document: AI_Document = convertToAIDocument(document_json);
+ this.documents.push(document_json);
+ await this.indexDocument(convertToAIDocument(document_json));
+ console.log(`Document added: ${document_json.file_name}`);
+ doc.summary = document_json.summary;
+ doc.ai_purpose = document_json.purpose;
if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') {
doc.vectorstore_id = JSON.stringify([this.id]);
} else {
doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id]));
}
- if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') {
- doc.chunk_ids = JSON.stringify([]);
+ if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') {
+ doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] });
}
- ai_document.chunks.forEach(chunk => {
- console.log(doc.chunk_ids);
- doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id]));
+ let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = {
+ text_chunks: [],
+ image_chunks: [],
+ };
+
+ document_json.chunks.forEach((chunk: Chunk) => {
+ let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[];
+ switch (chunk.metadata.type) {
+ case CHUNK_TYPE.TEXT:
+ chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }];
+ new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ new_chunk_simpl.text_chunks = new_chunk_simpl.text_chunks.concat(chunk_to_add);
+ doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+ break;
+ case CHUNK_TYPE.IMAGE:
+ case CHUNK_TYPE.TABLE:
+ console.log('Location:', chunk.metadata.location);
+ console.log('Height:', chunk.metadata.page_height);
+ console.log('Width:', chunk.metadata.page_width);
+ chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }];
+ new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add);
+ doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+ break;
+ }
});
+
doc.ai_document_status = 'COMPLETED';
}
}