diff options
Diffstat (limited to 'src/client/views/nodes/ChatBox/vectorstore')
| -rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 47 |
1 files changed, 35 insertions, 12 deletions
diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index b47e276e7..b3e3f8679 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { Chunk, AI_Document, convertToAIDocument } from '../types'; +import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; @@ -74,24 +74,47 @@ export class Vectorstore { if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') { const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); console.log('Document JSON:', document_json); - const ai_document: AI_Document = convertToAIDocument(document_json); - this.documents.push(ai_document); - await this.indexDocument(ai_document); - console.log(`Document added: ${ai_document.file_name}`); - doc.summary = ai_document.summary; - doc.ai_purpose = ai_document.purpose; + //const ai_document: AI_Document = convertToAIDocument(document_json); + this.documents.push(document_json); + await this.indexDocument(convertToAIDocument(document_json)); + console.log(`Document added: ${document_json.file_name}`); + doc.summary = document_json.summary; + doc.ai_purpose = document_json.purpose; if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { doc.vectorstore_id = JSON.stringify([this.id]); } else { doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id])); } - if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') { - doc.chunk_ids = JSON.stringify([]); + if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') { + doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] }); } - ai_document.chunks.forEach(chunk => { - console.log(doc.chunk_ids); - doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id])); + let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = { + text_chunks: [], + image_chunks: [], + }; + + document_json.chunks.forEach((chunk: Chunk) => { + let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[]; + switch (chunk.metadata.type) { + case CHUNK_TYPE.TEXT: + chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }]; + new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.text_chunks = new_chunk_simpl.text_chunks.concat(chunk_to_add); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + break; + case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: + console.log('Location:', chunk.metadata.location); + console.log('Height:', chunk.metadata.page_height); + console.log('Width:', chunk.metadata.page_width); + chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }]; + new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + break; + } }); + doc.ai_document_status = 'COMPLETED'; } } |
