diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-07-17 12:06:40 -0400 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-07-17 12:06:40 -0400 |
commit | 0340c24eccce3d90c03934dec14d574128fb32ef (patch) | |
tree | da796e844fd84b5885d161f47f551b1e4145dbce /src/client | |
parent | 6e0dd5cf8b36e66edbced83cf5e6d4e2e272be3f (diff) |
added image citation highlights
Diffstat (limited to 'src/client')
-rw-r--r-- | src/client/views/nodes/ChatBox/AnswerParser.ts | 4 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.scss | 4 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.tsx | 128 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/MessageComponent.tsx | 2 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/types.ts | 69 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 16 |
6 files changed, 115 insertions, 108 deletions
diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index f77d2261d..1162d46b0 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -1,4 +1,5 @@ import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types'; +import { v4 as uuid } from 'uuid'; export class AnswerParser { static parse(xml: string): AssistantMessage { @@ -33,7 +34,8 @@ export class AnswerParser { direct_text: direct_text.trim(), type: getChunkType(type), chunk_id: chunk_id, - location: citationPlainStart, + text_location: citationPlainStart, + citation_id: uuid(), }); citationOffset += fullMatch.length; diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index 75171fe56..e39938c4f 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -174,7 +174,7 @@ $follow-up-hover-bg-color: #dee2e6; } } -.initializing-overlay { +.uploading-overlay { position: absolute; top: 0; left: 0; @@ -189,7 +189,7 @@ $follow-up-hover-bg-color: #dee2e6; z-index: 10; &::before { - content: 'Initializing...'; + content: 'Uploading Docs...'; font-weight: bold; } } diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index c7ae9a354..8b4a7bd0a 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -2,7 +2,7 @@ import { action, computed, makeObservable, observable, observe, reaction, runInA import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; -import { Doc } from '../../../../fields/Doc'; +import { Doc, DocListCast } from '../../../../fields/Doc'; import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types'; import { Networking } from '../../../Network'; import { DocumentType } from '../../../documents/DocumentTypes'; @@ -12,7 +12,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; @@ -26,6 +26,7 @@ import { UUID } from 'bson'; import { v4 as uuidv4 } from 'uuid'; import { aS } from '@fullcalendar/core/internal-common'; import { computeRect } from '@fullcalendar/core/internal'; +import { DocUtils } from '../../../documents/DocUtils'; dotenv.config(); @@ -35,7 +36,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; - @observable isInitializing: boolean = false; + @observable isUploadingDocs: boolean = false; @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; @observable private linked_docs_to_add: ObservableSet<Doc> = observable.set(); @@ -75,9 +76,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @action addDocToVectorstore = async (newLinkedDoc: Doc) => { - this.isInitializing = true; await this.vectorstore.addAIDoc(newLinkedDoc); - this.isInitializing = false; }; // @action @@ -158,75 +157,76 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - //console.log(JSON.parse(StrCast(doc.chunk_simpl))); - console.log(JSON.stringify(StrCast(doc.chunk_simpl))); - const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl) as string); - console.log(doc_chunk_simpl); - const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | []; - const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }] | []; - - const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); - if (found_text_chunk) { - const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; - console.log('URL: ' + doc_url); - - //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - console.log(doc.data); - //look at context path for each docview and choose the doc view that has as - //its parent the same collection view the chatbox is in - const first_view = Array.from(doc[DocViews])[0]; - first_view.ComponentView?.search?.(citation.direct_text); - }); - } - - const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id); - if (found_image_chunk) { - const location_string: string = found_image_chunk.location; + if (doc.chunk_simpl) { + //console.log(JSON.parse(StrCast(doc.chunk_simpl))); + const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + console.log(doc_chunk_simpl); + const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | []; + const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number }] | []; + + const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_text_chunk) { + const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; + console.log('URL: ' + doc_url); + + //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); + } - // Extract variables from location_string - const values = location_string.replace(/[\[\]]/g, '').split(','); + const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_image_chunk) { + const location_string: string = found_image_chunk.location; - // Ensure we have exactly 4 values - if (values.length !== 4) { - console.error('Location string must contain exactly 4 numbers'); - return; // or handle this error as appropriate - } + // Extract variables from location_string + const values = location_string.replace(/[\[\]]/g, '').split(','); - const x1 = parseInt(values[0]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); - const y1 = parseInt(values[1]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); - const x2 = parseInt(values[2]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); - const y2 = parseInt(values[3]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); + // Ensure we have exactly 4 values + if (values.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; // or handle this error as appropriate + } - // Parse values to numbers - // const [x1, y1, x2, y2] = values.map(Number); + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc); + const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc); - // Check if any parsing resulted in NaN - if ([x1, y1, x2, y2].some(isNaN)) { - console.error('All values in location string must be valid numbers'); - return; // or handle this error as appropriate - } + const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; - const highlight_doc = Docs.Create.FreeformDocument([], { - x: x1, - y: y1, - _width: x2 - x1, - _height: y2 - y1, - backgroundColor: 'rgba(255, 255, 0, 0.5)', - }); + const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + const highlight_doc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); - Doc.AddDocToList(doc[DocData], Doc.LayoutFieldKey(doc) + '_annotations', highlight_doc); - highlight_doc.annotationOn = doc; - Doc.SetContainer(highlight_doc, doc); - DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {}); + DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {}); + } } } // You can implement additional functionality here, such as showing a modal with the full citation content }; + createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => { + const highlight_doc = Docs.Create.FreeformDocument([], { + x: x1, + y: y1, + _width: x2 - x1, + _height: y2 - y1, + backgroundColor: 'rgba(255, 255, 0, 0.5)', + }); + highlight_doc[DocData].citation_id = citation.citation_id; + Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc); + highlight_doc.annotationOn = pdfDoc; + Doc.SetContainer(highlight_doc, pdfDoc); + return highlight_doc; + }; + // @action // uploadLinks = async (linkedDocs: Doc[]) => { - // if (this.isInitializing) { + // if (this.isUploadingDocs) { // console.log('Initialization in progress, upload aborted.'); // return; // } @@ -293,7 +293,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { observe(this.linked_docs_to_add, change => { if (change.type === 'add') { + runInAction(() => { + this.isUploadingDocs = true; + }); this.addDocToVectorstore(change.newValue); + runInAction(() => { + this.isUploadingDocs = false; + }); } else if (change.type === 'delete') { console.log('Deleted docs: ', change.oldValue); } @@ -358,7 +364,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { render() { return ( <div className="chatBox"> - {this.isInitializing && <div className="initializing-overlay">Initializing...</div>} + {this.isUploadingDocs && <div className="uploading-overlay"></div>} <div className="scroll-box chat-content" ref={r => { diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index e18224405..9f3dee990 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -20,7 +20,7 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message let lastIndex = 0; message.citations.forEach((citation, idx) => { - const location = citation.location; + const location = citation.text_location; const textBefore = content.slice(lastIndex, location); const citationButton = ( <button diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 783610d6d..d702d5c41 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -36,7 +36,8 @@ export interface Citation { direct_text?: string; type: CHUNK_TYPE; chunk_id: string; - location: number; + text_location: number; + citation_id: string; } export interface Chunk { @@ -50,9 +51,9 @@ export interface Chunk { location: string; start_page: number; end_page: number; - base64_data?: string; - page_width: number; - page_height: number; + base64_data?: string | undefined; + page_width?: number | undefined; + page_height?: number | undefined; }; } @@ -80,36 +81,36 @@ export interface AgentMessage { content: string | { type: string; text?: string; image_url?: { url: string } }[]; } -export function convertToAIDocument(json: any): AI_Document { - if (!json) { - throw new Error('Invalid JSON object'); - } +// export function convertToAIDocument(json: any): AI_Document { +// if (!json) { +// throw new Error('Invalid JSON object'); +// } - const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ - id: chunk.id, - values: chunk.values, - metadata: { - text: chunk.metadata.text, - type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting - original_document: chunk.metadata.original_document, - file_path: chunk.metadata.file_path, - location: chunk.metadata.location, - start_page: chunk.metadata.start_page, - end_page: chunk.metadata.end_page, - base64_data: chunk.metadata.base64_data ?? undefined, - width: chunk.metadata.width ?? undefined, - height: chunk.metadata.height ?? undefined, - }, - })); +// const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ +// id: chunk.id, +// values: chunk.values, +// metadata: { +// text: chunk.metadata.text, +// type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting +// original_document: chunk.metadata.original_document, +// file_path: chunk.metadata.file_path, +// location: chunk.metadata.location, +// start_page: chunk.metadata.start_page, +// end_page: chunk.metadata.end_page, +// base64_data: chunk.metadata.base64_data, +// width: chunk.metadata.width, +// height: chunk.metadata.height, +// }, +// })); - const aiDocument: AI_Document = { - purpose: json.purpose, - file_name: json.file_name, - num_pages: json.num_pages, - summary: json.summary, - chunks: chunks, - type: json.type, - }; +// const aiDocument: AI_Document = { +// purpose: json.purpose, +// file_name: json.file_name, +// num_pages: json.num_pages, +// summary: json.summary, +// chunks: chunks, +// type: json.type, +// }; - return aiDocument; -} +// return aiDocument; +// } diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index b3e3f8679..ab0b6e617 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types'; +import { Chunk, AI_Document, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; @@ -76,7 +76,7 @@ export class Vectorstore { console.log('Document JSON:', document_json); //const ai_document: AI_Document = convertToAIDocument(document_json); this.documents.push(document_json); - await this.indexDocument(convertToAIDocument(document_json)); + await this.indexDocument(JSON.parse(JSON.stringify(document_json, (key, value) => (value === null || value === undefined ? undefined : value)))); console.log(`Document added: ${document_json.file_name}`); doc.summary = document_json.summary; doc.ai_purpose = document_json.purpose; @@ -88,13 +88,13 @@ export class Vectorstore { if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') { doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] }); } - let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = { + let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number }[] } = { text_chunks: [], image_chunks: [], }; document_json.chunks.forEach((chunk: Chunk) => { - let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[]; + let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number }[]; switch (chunk.metadata.type) { case CHUNK_TYPE.TEXT: chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }]; @@ -105,9 +105,7 @@ export class Vectorstore { case CHUNK_TYPE.IMAGE: case CHUNK_TYPE.TABLE: console.log('Location:', chunk.metadata.location); - console.log('Height:', chunk.metadata.page_height); - console.log('Width:', chunk.metadata.page_width); - chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }]; + chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page }]; new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add); doc.chunk_simpl = JSON.stringify(new_chunk_simpl); @@ -120,9 +118,9 @@ export class Vectorstore { } } - private async indexDocument(document: AI_Document) { + private async indexDocument(document: any) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord<RecordMetadata>[] = document.chunks.map( + const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as Chunk[]).map( chunk => ({ id: chunk.id, |