aboutsummaryrefslogtreecommitdiff
path: root/src/client
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-07-17 12:06:40 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-07-17 12:06:40 -0400
commit0340c24eccce3d90c03934dec14d574128fb32ef (patch)
treeda796e844fd84b5885d161f47f551b1e4145dbce /src/client
parent6e0dd5cf8b36e66edbced83cf5e6d4e2e272be3f (diff)
added image citation highlights
Diffstat (limited to 'src/client')
-rw-r--r--src/client/views/nodes/ChatBox/AnswerParser.ts4
-rw-r--r--src/client/views/nodes/ChatBox/ChatBox.scss4
-rw-r--r--src/client/views/nodes/ChatBox/ChatBox.tsx128
-rw-r--r--src/client/views/nodes/ChatBox/MessageComponent.tsx2
-rw-r--r--src/client/views/nodes/ChatBox/types.ts69
-rw-r--r--src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts16
6 files changed, 115 insertions, 108 deletions
diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts
index f77d2261d..1162d46b0 100644
--- a/src/client/views/nodes/ChatBox/AnswerParser.ts
+++ b/src/client/views/nodes/ChatBox/AnswerParser.ts
@@ -1,4 +1,5 @@
import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types';
+import { v4 as uuid } from 'uuid';
export class AnswerParser {
static parse(xml: string): AssistantMessage {
@@ -33,7 +34,8 @@ export class AnswerParser {
direct_text: direct_text.trim(),
type: getChunkType(type),
chunk_id: chunk_id,
- location: citationPlainStart,
+ text_location: citationPlainStart,
+ citation_id: uuid(),
});
citationOffset += fullMatch.length;
diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss
index 75171fe56..e39938c4f 100644
--- a/src/client/views/nodes/ChatBox/ChatBox.scss
+++ b/src/client/views/nodes/ChatBox/ChatBox.scss
@@ -174,7 +174,7 @@ $follow-up-hover-bg-color: #dee2e6;
}
}
-.initializing-overlay {
+.uploading-overlay {
position: absolute;
top: 0;
left: 0;
@@ -189,7 +189,7 @@ $follow-up-hover-bg-color: #dee2e6;
z-index: 10;
&::before {
- content: 'Initializing...';
+ content: 'Uploading Docs...';
font-weight: bold;
}
}
diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx
index c7ae9a354..8b4a7bd0a 100644
--- a/src/client/views/nodes/ChatBox/ChatBox.tsx
+++ b/src/client/views/nodes/ChatBox/ChatBox.tsx
@@ -2,7 +2,7 @@ import { action, computed, makeObservable, observable, observe, reaction, runInA
import { observer } from 'mobx-react';
import OpenAI, { ClientOptions } from 'openai';
import * as React from 'react';
-import { Doc } from '../../../../fields/Doc';
+import { Doc, DocListCast } from '../../../../fields/Doc';
import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types';
import { Networking } from '../../../Network';
import { DocumentType } from '../../../documents/DocumentTypes';
@@ -12,7 +12,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent';
import { FieldView, FieldViewProps } from '../FieldView';
import './ChatBox.scss';
import MessageComponentBox from './MessageComponent';
-import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types';
+import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types';
import { Vectorstore } from './vectorstore/VectorstoreUpload';
import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView';
import { CollectionFreeFormView } from '../../collections/collectionFreeForm';
@@ -26,6 +26,7 @@ import { UUID } from 'bson';
import { v4 as uuidv4 } from 'uuid';
import { aS } from '@fullcalendar/core/internal-common';
import { computeRect } from '@fullcalendar/core/internal';
+import { DocUtils } from '../../../documents/DocUtils';
dotenv.config();
@@ -35,7 +36,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
@observable.deep current_message: AssistantMessage | undefined = undefined;
@observable isLoading: boolean = false;
- @observable isInitializing: boolean = false;
+ @observable isUploadingDocs: boolean = false;
@observable expandedScratchpadIndex: number | null = null;
@observable inputValue: string = '';
@observable private linked_docs_to_add: ObservableSet<Doc> = observable.set();
@@ -75,9 +76,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
@action
addDocToVectorstore = async (newLinkedDoc: Doc) => {
- this.isInitializing = true;
await this.vectorstore.addAIDoc(newLinkedDoc);
- this.isInitializing = false;
};
// @action
@@ -158,75 +157,76 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
const currentLinkedDocs: Doc[] = this.linkedDocs;
const chunk_id = citation.chunk_id;
for (let doc of currentLinkedDocs) {
- //console.log(JSON.parse(StrCast(doc.chunk_simpl)));
- console.log(JSON.stringify(StrCast(doc.chunk_simpl)));
- const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl) as string);
- console.log(doc_chunk_simpl);
- const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | [];
- const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }] | [];
-
- const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id);
- if (found_text_chunk) {
- const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname;
- console.log('URL: ' + doc_url);
-
- //const ai_field_id = doc[this.Document[Id] + '_ai_field_id'];
- DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
- console.log(doc.data);
- //look at context path for each docview and choose the doc view that has as
- //its parent the same collection view the chatbox is in
- const first_view = Array.from(doc[DocViews])[0];
- first_view.ComponentView?.search?.(citation.direct_text);
- });
- }
-
- const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id);
- if (found_image_chunk) {
- const location_string: string = found_image_chunk.location;
+ if (doc.chunk_simpl) {
+ //console.log(JSON.parse(StrCast(doc.chunk_simpl)));
+ const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ console.log(doc_chunk_simpl);
+ const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | [];
+ const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number }] | [];
+
+ const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id);
+ if (found_text_chunk) {
+ const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname;
+ console.log('URL: ' + doc_url);
+
+ //const ai_field_id = doc[this.Document[Id] + '_ai_field_id'];
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ console.log(doc.data);
+ //look at context path for each docview and choose the doc view that has as
+ //its parent the same collection view the chatbox is in
+ const first_view = Array.from(doc[DocViews])[0];
+ first_view.ComponentView?.search?.(citation.direct_text);
+ });
+ }
- // Extract variables from location_string
- const values = location_string.replace(/[\[\]]/g, '').split(',');
+ const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id);
+ if (found_image_chunk) {
+ const location_string: string = found_image_chunk.location;
- // Ensure we have exactly 4 values
- if (values.length !== 4) {
- console.error('Location string must contain exactly 4 numbers');
- return; // or handle this error as appropriate
- }
+ // Extract variables from location_string
+ const values = location_string.replace(/[\[\]]/g, '').split(',');
- const x1 = parseInt(values[0]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width);
- const y1 = parseInt(values[1]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height);
- const x2 = parseInt(values[2]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width);
- const y2 = parseInt(values[3]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height);
+ // Ensure we have exactly 4 values
+ if (values.length !== 4) {
+ console.error('Location string must contain exactly 4 numbers');
+ return; // or handle this error as appropriate
+ }
- // Parse values to numbers
- // const [x1, y1, x2, y2] = values.map(Number);
+ const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc);
+ const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc);
+ const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc);
+ const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc);
- // Check if any parsing resulted in NaN
- if ([x1, y1, x2, y2].some(isNaN)) {
- console.error('All values in location string must be valid numbers');
- return; // or handle this error as appropriate
- }
+ const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations';
- const highlight_doc = Docs.Create.FreeformDocument([], {
- x: x1,
- y: y1,
- _width: x2 - x1,
- _height: y2 - y1,
- backgroundColor: 'rgba(255, 255, 0, 0.5)',
- });
+ const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id);
+ const highlight_doc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc);
- Doc.AddDocToList(doc[DocData], Doc.LayoutFieldKey(doc) + '_annotations', highlight_doc);
- highlight_doc.annotationOn = doc;
- Doc.SetContainer(highlight_doc, doc);
- DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {});
+ DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {});
+ }
}
}
// You can implement additional functionality here, such as showing a modal with the full citation content
};
+ createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => {
+ const highlight_doc = Docs.Create.FreeformDocument([], {
+ x: x1,
+ y: y1,
+ _width: x2 - x1,
+ _height: y2 - y1,
+ backgroundColor: 'rgba(255, 255, 0, 0.5)',
+ });
+ highlight_doc[DocData].citation_id = citation.citation_id;
+ Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc);
+ highlight_doc.annotationOn = pdfDoc;
+ Doc.SetContainer(highlight_doc, pdfDoc);
+ return highlight_doc;
+ };
+
// @action
// uploadLinks = async (linkedDocs: Doc[]) => {
- // if (this.isInitializing) {
+ // if (this.isUploadingDocs) {
// console.log('Initialization in progress, upload aborted.');
// return;
// }
@@ -293,7 +293,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
observe(this.linked_docs_to_add, change => {
if (change.type === 'add') {
+ runInAction(() => {
+ this.isUploadingDocs = true;
+ });
this.addDocToVectorstore(change.newValue);
+ runInAction(() => {
+ this.isUploadingDocs = false;
+ });
} else if (change.type === 'delete') {
console.log('Deleted docs: ', change.oldValue);
}
@@ -358,7 +364,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
render() {
return (
<div className="chatBox">
- {this.isInitializing && <div className="initializing-overlay">Initializing...</div>}
+ {this.isUploadingDocs && <div className="uploading-overlay"></div>}
<div
className="scroll-box chat-content"
ref={r => {
diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx
index e18224405..9f3dee990 100644
--- a/src/client/views/nodes/ChatBox/MessageComponent.tsx
+++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx
@@ -20,7 +20,7 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message
let lastIndex = 0;
message.citations.forEach((citation, idx) => {
- const location = citation.location;
+ const location = citation.text_location;
const textBefore = content.slice(lastIndex, location);
const citationButton = (
<button
diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts
index 783610d6d..d702d5c41 100644
--- a/src/client/views/nodes/ChatBox/types.ts
+++ b/src/client/views/nodes/ChatBox/types.ts
@@ -36,7 +36,8 @@ export interface Citation {
direct_text?: string;
type: CHUNK_TYPE;
chunk_id: string;
- location: number;
+ text_location: number;
+ citation_id: string;
}
export interface Chunk {
@@ -50,9 +51,9 @@ export interface Chunk {
location: string;
start_page: number;
end_page: number;
- base64_data?: string;
- page_width: number;
- page_height: number;
+ base64_data?: string | undefined;
+ page_width?: number | undefined;
+ page_height?: number | undefined;
};
}
@@ -80,36 +81,36 @@ export interface AgentMessage {
content: string | { type: string; text?: string; image_url?: { url: string } }[];
}
-export function convertToAIDocument(json: any): AI_Document {
- if (!json) {
- throw new Error('Invalid JSON object');
- }
+// export function convertToAIDocument(json: any): AI_Document {
+// if (!json) {
+// throw new Error('Invalid JSON object');
+// }
- const chunks: Chunk[] = json.chunks.map((chunk: any) => ({
- id: chunk.id,
- values: chunk.values,
- metadata: {
- text: chunk.metadata.text,
- type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting
- original_document: chunk.metadata.original_document,
- file_path: chunk.metadata.file_path,
- location: chunk.metadata.location,
- start_page: chunk.metadata.start_page,
- end_page: chunk.metadata.end_page,
- base64_data: chunk.metadata.base64_data ?? undefined,
- width: chunk.metadata.width ?? undefined,
- height: chunk.metadata.height ?? undefined,
- },
- }));
+// const chunks: Chunk[] = json.chunks.map((chunk: any) => ({
+// id: chunk.id,
+// values: chunk.values,
+// metadata: {
+// text: chunk.metadata.text,
+// type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting
+// original_document: chunk.metadata.original_document,
+// file_path: chunk.metadata.file_path,
+// location: chunk.metadata.location,
+// start_page: chunk.metadata.start_page,
+// end_page: chunk.metadata.end_page,
+// base64_data: chunk.metadata.base64_data,
+// width: chunk.metadata.width,
+// height: chunk.metadata.height,
+// },
+// }));
- const aiDocument: AI_Document = {
- purpose: json.purpose,
- file_name: json.file_name,
- num_pages: json.num_pages,
- summary: json.summary,
- chunks: chunks,
- type: json.type,
- };
+// const aiDocument: AI_Document = {
+// purpose: json.purpose,
+// file_name: json.file_name,
+// num_pages: json.num_pages,
+// summary: json.summary,
+// chunks: chunks,
+// type: json.type,
+// };
- return aiDocument;
-}
+// return aiDocument;
+// }
diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
index b3e3f8679..ab0b6e617 100644
--- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
+++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
@@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai';
import { EmbedResponse } from 'cohere-ai/api';
import dotenv from 'dotenv';
-import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types';
+import { Chunk, AI_Document, CHUNK_TYPE } from '../types';
import { Doc } from '../../../../../fields/Doc';
import { DocData } from '../../../../../fields/DocSymbols';
import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
@@ -76,7 +76,7 @@ export class Vectorstore {
console.log('Document JSON:', document_json);
//const ai_document: AI_Document = convertToAIDocument(document_json);
this.documents.push(document_json);
- await this.indexDocument(convertToAIDocument(document_json));
+ await this.indexDocument(JSON.parse(JSON.stringify(document_json, (key, value) => (value === null || value === undefined ? undefined : value))));
console.log(`Document added: ${document_json.file_name}`);
doc.summary = document_json.summary;
doc.ai_purpose = document_json.purpose;
@@ -88,13 +88,13 @@ export class Vectorstore {
if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') {
doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] });
}
- let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = {
+ let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number }[] } = {
text_chunks: [],
image_chunks: [],
};
document_json.chunks.forEach((chunk: Chunk) => {
- let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[];
+ let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number }[];
switch (chunk.metadata.type) {
case CHUNK_TYPE.TEXT:
chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }];
@@ -105,9 +105,7 @@ export class Vectorstore {
case CHUNK_TYPE.IMAGE:
case CHUNK_TYPE.TABLE:
console.log('Location:', chunk.metadata.location);
- console.log('Height:', chunk.metadata.page_height);
- console.log('Width:', chunk.metadata.page_width);
- chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }];
+ chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page }];
new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add);
doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
@@ -120,9 +118,9 @@ export class Vectorstore {
}
}
- private async indexDocument(document: AI_Document) {
+ private async indexDocument(document: any) {
console.log('Uploading vectors to content namespace...');
- const pineconeRecords: PineconeRecord<RecordMetadata>[] = document.chunks.map(
+ const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as Chunk[]).map(
chunk =>
({
id: chunk.id,