trying new image generation plus new implementaion of video and audio

author: A.J. Shulman <Shulman.aj@gmail.com> 2024-12-18 20:34:33 -0500
committer: A.J. Shulman <Shulman.aj@gmail.com> 2024-12-18 20:34:33 -0500
commit: 57e3c9b9977228a561e8972a469a67f17f4bcd9c (patch)
tree: 1a4f23921e121ca891b3fa6a49a30a92ea76d233 /src
parent: ad1e0cf62187e0f8bbb19b4720b7681585361de9 (diff)
9 files changed, 462 insertions, 203 deletions
diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts
index e539e3c65..52cd36401 100644
--- a/src/client/documents/Documents.ts
+++ b/src/client/documents/Documents.ts
@@ -826,8 +826,8 @@ export namespace Docs {
                 ...options,
             });
         }
-        export function DiagramDocument(options: DocumentOptions = { title: '' }) {
-            return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), undefined, options);
+        export function DiagramDocument(data?: string, options: DocumentOptions = { title: '' }) {
+            return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), data, options);
         }
 
         export function AudioDocument(url: string, options: DocumentOptions = {}, overwriteDoc?: Doc) {
diff --git a/src/client/util/LinkManager.ts b/src/client/util/LinkManager.ts
index e11482572..d04d41968 100644
--- a/src/client/util/LinkManager.ts
+++ b/src/client/util/LinkManager.ts
@@ -257,10 +257,10 @@ export function UPDATE_SERVER_CACHE() {
     cacheDocumentIds = newCacheUpdate;
 
     // print out cached docs
-    Doc.MyDockedBtns.linearView_IsOpen && console.log('Set cached docs = ');
+    //Doc.MyDockedBtns.linearView_IsOpen && console.log('Set cached docs = ');
     const isFiltered = filtered.filter(doc => !Doc.IsSystem(doc));
     const strings = isFiltered.map(doc => StrCast(doc.title) + ' ' + (Doc.IsDataProto(doc) ? '(data)' : '(embedding)'));
-    Doc.MyDockedBtns.linearView_IsOpen && strings.sort().forEach((str, i) => console.log(i.toString() + ' ' + str));
+    //Doc.MyDockedBtns.linearView_IsOpen && strings.sort().forEach((str, i) => console.log(i.toString() + ' ' + str));
 
     rp.post(ClientUtils.prepend('/setCacheDocumentIds'), {
         body: {
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 3c8b30125..1eb5e3963 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -20,6 +20,7 @@ import { Parameter, ParametersType, TypeMap } from '../types/tool_types';
 import { CreateTextDocTool } from '../tools/CreateTextDocumentTool';
 import { DocumentOptions } from '../../../../documents/Documents';
 import { CreateAnyDocumentTool } from '../tools/CreateAnyDocTool';
+import { ImageCreationTool } from '../tools/ImageCreationTool';
 
 dotenv.config();
 
@@ -73,12 +74,13 @@ export class Agent {
             calculate: new CalculateTool(),
             rag: new RAGTool(this.vectorstore),
             dataAnalysis: new DataAnalysisTool(csvData),
-            websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
-            searchTool: new SearchTool(addLinkedUrlDoc),
+            //websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+            //searchTool: new SearchTool(addLinkedUrlDoc),
             createCSV: new CreateCSVTool(createCSVInDash),
             noTool: new NoTool(),
-            createTextDoc: new CreateTextDocTool(addLinkedDoc),
-            //createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc),
+            imageCreationTool: new ImageCreationTool(addLinkedDoc),
+            //createTextDoc: new CreateTextDocTool(addLinkedDoc),
+            createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc),
         };
     }
 
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index b22f2455e..baa4ad521 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -34,6 +34,11 @@ import './ChatBox.scss';
 import MessageComponentBox from './MessageComponent';
 import { ProgressBar } from './ProgressBar';
 import { RichTextField } from '../../../../../fields/RichTextField';
+import { VideoBox } from '../../VideoBox';
+import { AudioBox } from '../../AudioBox';
+import { DiagramBox } from '../../DiagramBox';
+import { ImageField } from '../../../../../fields/URLField';
+import { DashUploadUtils } from '../../../../../server/DashUploadUtils';
 
 dotenv.config();
 
@@ -402,13 +407,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     createDocInDash = async (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => {
-        let doc;
+        let doc: Doc;
 
         switch (doc_type.toLowerCase()) {
             case 'text':
                 doc = Docs.Create.TextDocument(data || '', options);
                 break;
             case 'image':
+                console.log('imageURL: ' + data);
+                //DashUploadUtils.UploadImage(data!);
                 doc = Docs.Create.ImageDocument(data || '', options);
                 break;
             case 'pdf':
@@ -417,6 +424,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             case 'video':
                 doc = Docs.Create.VideoDocument(data || '', options);
                 break;
+            case 'mermaid_diagram':
+                doc = Docs.Create.DiagramDocument(data, options);
+                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+                    const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+                    (firstView.ComponentView as DiagramBox)?.renderMermaid?.(data!);
+                });
+                break;
             case 'audio':
                 doc = Docs.Create.AudioDocument(data || '', options);
                 break;
@@ -426,12 +440,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             case 'equation':
                 doc = Docs.Create.EquationDocument(data || '', options);
                 break;
-            case 'functionplot':
             case 'function_plot':
                 doc = Docs.Create.FunctionPlotDocument([], options);
                 break;
             case 'dataviz':
-            case 'data_viz':
                 const { fileUrl, id } = await Networking.PostToServer('/createCSV', {
                     filename: (options.title as string).replace(/\s+/g, '') + '.csv',
                     data: data,
@@ -467,12 +479,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
                 if (foundChunk) {
                     // Handle media chunks specifically
-                    if (foundChunk.chunkType === CHUNK_TYPE.MEDIA) {
-                        const directMatchSegment = this.getDirectMatchingSegment(doc, citation.direct_text || '');
 
-                        if (directMatchSegment) {
+                    if (doc.ai_type == 'video' || doc.ai_type == 'audio') {
+                        const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
+
+                        if (directMatchSegmentStart) {
                             // Navigate to the segment's start time in the media player
-                            await this.goToMediaTimestamp(doc, directMatchSegment.start_time);
+                            await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type);
                         } else {
                             console.error('No direct matching segment found for the citation.');
                         }
@@ -485,29 +498,53 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         }
     };
 
-    /**
-     * Finds the first segment with a direct match to the citation text.
-     * A match occurs if the segment's text is a subset of the citation's direct text or vice versa.
-     * @param doc The document containing media metadata.
-     * @param citationText The citation text to find a matching segment for.
-     * @returns The segment with the direct match or null if no match is found.
-     */
-    getDirectMatchingSegment = (doc: Doc, citationText: string): { start_time: number; end_time: number; text: string } | null => {
-        const mediaMetadata = JSON.parse(StrCast(doc.segments)); // Assuming segments are stored in metadata
+    getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => {
+        const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({
+            index: index.toString(),
+            text: segment.text,
+            start: segment.start,
+            end: segment.end,
+        }));
 
-        if (!Array.isArray(mediaMetadata) || mediaMetadata.length === 0) {
-            return null;
+        if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) {
+            return 0;
         }
 
-        for (const segment of mediaMetadata) {
-            const segmentText = segment.text || '';
-            // Check if the segment's text is a subset of the citation text or vice versa
-            if (citationText.includes(segmentText) || segmentText.includes(citationText)) {
-                return segment; // Return the first matching segment
+        // Create itemsToSearch array based on indexesOfSegments
+        const itemsToSearch = indexesOfSegments.map((indexStr: string) => {
+            const index = parseInt(indexStr, 10);
+            const segment = originalSegments[index];
+            return { text: segment.text, start: segment.start };
+        });
+
+        console.log('Constructed itemsToSearch:', itemsToSearch);
+
+        // Helper function to calculate word overlap score
+        const calculateWordOverlap = (text1: string, text2: string): number => {
+            const words1 = new Set(text1.toLowerCase().split(/\W+/));
+            const words2 = new Set(text2.toLowerCase().split(/\W+/));
+            const intersection = new Set([...words1].filter(word => words2.has(word)));
+            return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity
+        };
+
+        // Search for the best matching segment
+        let bestMatchStart = 0;
+        let bestScore = 0;
+
+        console.log(`Searching for best match for query: "${citationText}"`);
+        itemsToSearch.forEach(item => {
+            const score = calculateWordOverlap(citationText, item.text);
+            console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`);
+            if (score > bestScore) {
+                bestScore = score;
+                bestMatchStart = item.start;
             }
-        }
+        });
 
-        return null; // No match found
+        console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart);
+
+        // Return the start time of the best match
+        return bestMatchStart;
     };
 
     /**
@@ -515,15 +552,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      * @param doc The document containing the media file.
      * @param timestamp The timestamp to navigate to.
      */
-    goToMediaTimestamp = async (doc: Doc, timestamp: number) => {
+    goToMediaTimestamp = async (doc: Doc, timestamp: number, type: 'video' | 'audio') => {
         try {
             // Show the media document in the viewer
-            await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true });
-
-            // Simulate navigation to the timestamp
-            const firstView = Array.from(doc[DocViews])[0] as DocumentView;
-            (firstView.ComponentView as any)?.gotoTimestamp?.(timestamp);
-
+            if (type == 'video') {
+                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+                    const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+                    (firstView.ComponentView as VideoBox)?.Seek?.(timestamp);
+                });
+            } else {
+                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+                    const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+                    (firstView.ComponentView as AudioBox)?.playFrom?.(timestamp);
+                });
+            }
             console.log(`Navigated to timestamp: ${timestamp}s in document ${doc.id}`);
         } catch (error) {
             console.error('Error navigating to media timestamp:', error);
@@ -538,6 +580,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     handleOtherChunkTypes = (foundChunk: SimplifiedChunk, citation: Citation, doc: Doc) => {
         switch (foundChunk.chunkType) {
+            case CHUNK_TYPE.IMAGE:
+            case CHUNK_TYPE.TABLE:
+                {
+                    const values = foundChunk.location?.replace(/[[\]]/g, '').split(',');
+
+                    if (values?.length !== 4) {
+                        console.error('Location string must contain exactly 4 numbers');
+                        return;
+                    }
+                    if (foundChunk.startPage === undefined || foundChunk.endPage === undefined) {
+                        DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                        return;
+                    }
+                    const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc);
+                    const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc);
+                    const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc);
+                    const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc);
+
+                    const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations';
+
+                    const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id);
+                    const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc);
+
+                    DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {});
+                }
+                break;
             case CHUNK_TYPE.TEXT:
                 this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
                 setTimeout(() => (this.citationPopup.visible = false), 3000);
@@ -686,7 +754,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
             .map(d => DocCast(d?.annotationOn, d))
             .filter(d => d)
-            .filter(d => d.ai_doc_id)
+            .filter(d => {
+                console.log(d.ai_doc_id);
+                return d.ai_doc_id;
+            })
             .map(d => StrCast(d.ai_doc_id));
     }
 
diff --git a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
index a4871f7fd..4c059177b 100644
--- a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
+++ b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
@@ -7,8 +7,8 @@ import { DocumentOptions, Docs } from '../../../../documents/Documents';
 /**
  * List of supported document types that can be created via text LLM.
  */
-type supportedDocumentTypesType = 'text' | 'html' | 'equation' | 'functionPlot' | 'dataviz' | 'noteTaking' | 'rtf' | 'message';
-const supportedDocumentTypes: supportedDocumentTypesType[] = ['text', 'html', 'equation', 'functionPlot', 'dataviz', 'noteTaking', 'rtf', 'message'];
+type supportedDocumentTypesType = 'text' | 'html' | 'equation' | 'function_plot' | 'dataviz' | 'note_taking' | 'rtf' | 'message' | 'mermaid_diagram';
+const supportedDocumentTypes: supportedDocumentTypesType[] = ['text', 'html', 'equation', 'function_plot', 'dataviz', 'note_taking', 'rtf', 'message', 'mermaid_diagram'];
 
 /**
  * Description of document options and data field for each type.
@@ -26,7 +26,7 @@ const documentTypesInfo = {
         options: ['title', 'backgroundColor', 'fontColor', 'layout'],
         dataDescription: 'The equation content as a string.',
     },
-    functionPlot: {
+    function_plot: {
         options: ['title', 'backgroundColor', 'layout', 'function_definition'],
         dataDescription: 'The function definition(s) for plotting. Provide as a string or array of function definitions.',
     },
@@ -34,7 +34,7 @@ const documentTypesInfo = {
         options: ['title', 'backgroundColor', 'layout', 'chartType'],
         dataDescription: 'A string of comma-separated values representing the CSV data.',
     },
-    noteTaking: {
+    note_taking: {
         options: ['title', 'backgroundColor', 'layout'],
         dataDescription: 'The initial content or structure for note-taking.',
     },
@@ -46,6 +46,10 @@ const documentTypesInfo = {
         options: ['title', 'backgroundColor', 'layout'],
         dataDescription: 'The message content of the document.',
     },
+    mermaid_diagram: {
+        options: ['title', 'backgroundColor', 'layout'],
+        dataDescription: 'The Mermaid diagram content.',
+    },
 };
 
 const createAnyDocumentToolParams = [
diff --git a/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
new file mode 100644
index 000000000..cf9e8cfc8
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
@@ -0,0 +1,74 @@
+import { v4 as uuidv4 } from 'uuid';
+import { Networking } from '../../../../Network';
+import { BaseTool } from './BaseTool';
+import { Observation } from '../types/types';
+import { ParametersType, ToolInfo } from '../types/tool_types';
+import { DocumentOptions } from '../../../../documents/Documents';
+
+const imageCreationToolParams = [
+    {
+        name: 'image_prompt',
+        type: 'string',
+        description: 'The prompt for the image to be created. This should be a string that describes the image to be created in extreme detail for an AI image generator.',
+        required: true,
+    },
+] as const;
+
+type ImageCreationToolParamsType = typeof imageCreationToolParams;
+
+const imageCreationToolInfo: ToolInfo<ImageCreationToolParamsType> = {
+    name: 'imageCreationTool',
+    citationRules: 'No citation needed. Cannot cite image generation for a response.',
+    parameterRules: imageCreationToolParams,
+    description: 'Create an image of any style, content, or design, based on a prompt. The prompt should be a detailed description of the image to be created.',
+};
+
+export class ImageCreationTool extends BaseTool<ImageCreationToolParamsType> {
+    private _addLinkedDoc: (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => void;
+    constructor(addLinkedDoc: (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => void) {
+        super(imageCreationToolInfo);
+        this._addLinkedDoc = addLinkedDoc;
+    }
+
+    async execute(args: ParametersType<ImageCreationToolParamsType>): Promise<Observation[]> {
+        const image_prompt = args.image_prompt;
+
+        console.log(`Generating image for prompt: ${image_prompt}`);
+        // Create an array of promises, each one handling a search for a query
+        try {
+            try {
+                const { image_url } = await Networking.PostToServer('/generateImage', {
+                    image_prompt,
+                });
+                if (res) {
+                    const result = await Networking.PostToServer('/uploadRemoteImage', { sources: res });
+                    const source = ClientUtils.prepend(result[0].accessPaths.agnostic.client);
+                    return source;
+                }
+            } catch (e) {
+                console.log(e);
+            }
+
+            const { base64_data, image_path } = await Networking.PostToServer('/generateImage', {
+                image_prompt,
+            });
+            const id = uuidv4();
+
+            this._addLinkedDoc('image', image_path, {}, id);
+            return [
+                {
+                    type: 'image_url',
+                    image_url: { url: `data:image/jpeg;base64,${base64_data}` },
+                },
+            ];
+        } catch (error) {
+            console.log(error);
+            return [
+                {
+                    type: 'text',
+                    text: `An error occurred while generating image.`,
+                },
+            ];
+        }
+    }
+}
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
index c15ae4c6e..54fd7c979 100644
--- a/src/client/views/nodes/chatbot/types/types.ts
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -1,3 +1,4 @@
+import { indexes } from 'd3';
 import { AnyLayer } from 'react-map-gl';
 
 export enum ASSISTANT_ROLE {
@@ -95,6 +96,7 @@ export interface RAGChunk {
         page_height?: number | undefined;
         start_time?: number | undefined;
         end_time?: number | undefined;
+        indexes?: string[] | undefined;
     };
 }
 
@@ -107,6 +109,7 @@ export interface SimplifiedChunk {
     url?: string;
     start_time?: number;
     end_time?: number;
+    indexes?: string[];
 }
 
 export interface AI_Document {
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index af27ebe80..3ed433778 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -15,6 +15,7 @@ import { Networking } from '../../../../Network';
 import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
 import path from 'path';
 import { v4 as uuidv4 } from 'uuid';
+import { indexes } from 'd3';
 
 dotenv.config();
 
@@ -28,7 +29,7 @@ export class Vectorstore {
     private cohere: CohereClient; // Cohere client for generating embeddings.
     private indexName: string = 'pdf-chatbot'; // Default name for the index.
     private _id: string; // Unique ID for the Vectorstore instance.
-    private _doc_ids: string[] = []; // List of document IDs handled by this instance.
+    private _doc_ids: () => string[]; // List of document IDs handled by this instance.
 
     documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
 
@@ -48,7 +49,7 @@ export class Vectorstore {
         this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
         this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
         this._id = id;
-        this._doc_ids = doc_ids();
+        this._doc_ids = doc_ids;
         this.initializeIndex();
     }
 
@@ -85,131 +86,155 @@ export class Vectorstore {
      * @param progressCallback Callback to track progress.
      */
     async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) {
-        const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname;
-
-        if (!local_file_path) {
-            throw new Error('Invalid file path.');
-        }
-
-        const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4');
-        let result: AI_Document & { doc_id: string };
-
-        if (isAudioOrVideo) {
-            console.log('Processing media file...');
-            const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
-            const segmentedTranscript = response;
+        const ai_document_status: string = StrCast(doc.ai_document_status);
+
+        // Skip if the document is already in progress or completed.
+        if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') {
+            if (ai_document_status === 'PROGRESS') {
+                console.log('Already in progress.');
+                return;
+            } else if (ai_document_status === 'COMPLETED') {
+                console.log('Already completed.');
+                return;
+            }
+        } else {
+            // Start processing the document.
+            doc.ai_document_status = 'PROGRESS';
+            const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname;
 
-            // Generate embeddings for each chunk
-            const texts = segmentedTranscript.map((chunk: any) => chunk.text);
+            if (!local_file_path) {
+                console.log('Invalid file path.');
+                return;
+            }
 
-            try {
-                const embeddingsResponse = await this.cohere.v2.embed({
-                    model: 'embed-english-v3.0',
-                    inputType: 'classification',
-                    embeddingTypes: ['float'], // Specify that embeddings should be floats
-                    texts, // Pass the array of chunk texts
-                });
+            const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4');
+            let result: AI_Document & { doc_id: string };
+            if (isAudioOrVideo) {
+                console.log('Processing media file...');
+                const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
+                const segmentedTranscript = response.condensed;
+                console.log(segmentedTranscript);
+                const summary = response.summary;
+                doc.summary = summary;
+                // Generate embeddings for each chunk
+                const texts = segmentedTranscript.map((chunk: any) => chunk.text);
+
+                try {
+                    const embeddingsResponse = await this.cohere.v2.embed({
+                        model: 'embed-english-v3.0',
+                        inputType: 'classification',
+                        embeddingTypes: ['float'], // Specify that embeddings should be floats
+                        texts, // Pass the array of chunk texts
+                    });
+
+                    if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) {
+                        throw new Error('Mismatch between embeddings and the number of chunks');
+                    }
 
-                if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) {
-                    throw new Error('Mismatch between embeddings and the number of chunks');
+                    // Assign embeddings to each chunk
+                    segmentedTranscript.forEach((chunk: any, index: number) => {
+                        if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) {
+                            throw new Error('Invalid embeddings response');
+                        }
+                    });
+                    doc.original_segments = JSON.stringify(response.full);
+                    doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
+                    const doc_id = uuidv4();
+
+                    // Add transcript and embeddings to metadata
+                    result = {
+                        doc_id,
+                        purpose: '',
+                        file_name: local_file_path,
+                        num_pages: 0,
+                        summary: '',
+                        chunks: segmentedTranscript.map((chunk: any, index: number) => ({
+                            id: uuidv4(),
+                            values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding
+                            metadata: {
+                                indexes: chunk.indexes,
+                                original_document: local_file_path,
+                                doc_id: doc_id,
+                                file_path: local_file_path,
+                                start_time: chunk.start,
+                                end_time: chunk.end,
+                                text: chunk.text,
+                                chunkType: 'text',
+                            },
+                        })),
+                        type: 'media',
+                    };
+                } catch (error) {
+                    console.error('Error generating embeddings:', error);
+                    throw new Error('Embedding generation failed');
                 }
 
-                // Assign embeddings to each chunk
-                segmentedTranscript.forEach((chunk: any, index: number) => {
-                    if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) {
-                        throw new Error('Invalid embeddings response');
+                doc.segmented_transcript = JSON.stringify(segmentedTranscript);
+                // Simplify chunks for storage
+                const simplifiedChunks = result.chunks.map(chunk => ({
+                    chunkId: chunk.id,
+                    start_time: chunk.metadata.start_time,
+                    end_time: chunk.metadata.end_time,
+                    indexes: chunk.metadata.indexes,
+                    chunkType: CHUNK_TYPE.TEXT,
+                    text: chunk.metadata.text,
+                }));
+                doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
+            } else {
+                // Existing document processing logic remains unchanged
+                console.log('Processing regular document...');
+                const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+
+                while (true) {
+                    await new Promise(resolve => setTimeout(resolve, 2000));
+                    const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
+                    const resultResponseJson = JSON.parse(resultResponse);
+                    if (resultResponseJson.status === 'completed') {
+                        result = resultResponseJson;
+                        break;
+                    }
+                    const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
+                    const progressResponseJson = JSON.parse(progressResponse);
+                    if (progressResponseJson) {
+                        progressCallback(progressResponseJson.progress, progressResponseJson.step);
                     }
-                    //chunk.embedding = embeddingsResponse.embeddings.float[index];
-                });
-
-                // Add transcript and embeddings to metadata
-                result = {
-                    purpose: '',
-                    file_name: path.basename(local_file_path),
-                    num_pages: 0,
-                    summary: '',
-                    chunks: segmentedTranscript.map((chunk: any, index: number) => ({
-                        id: uuidv4(),
-                        values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding
-                        metadata: {
-                            ...chunk,
-                            original_document: doc.id,
-                            doc_id: doc.id,
-                            file_path: local_file_path,
-                            start_time: chunk.start,
-                            end_time: chunk.end,
-                            text: chunk.text,
-                        },
-                    })),
-                    type: 'media',
-                    doc_id: StrCast(doc.id),
-                };
-            } catch (error) {
-                console.error('Error generating embeddings:', error);
-                throw new Error('Embedding generation failed');
-            }
-
-            doc.segmented_transcript = JSON.stringify(segmentedTranscript);
-        } else {
-            // Existing document processing logic remains unchanged
-            console.log('Processing regular document...');
-            const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
-
-            while (true) {
-                await new Promise(resolve => setTimeout(resolve, 2000));
-                const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
-                const resultResponseJson = JSON.parse(resultResponse);
-                if (resultResponseJson.status === 'completed') {
-                    result = resultResponseJson;
-                    break;
                 }
-                const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
-                const progressResponseJson = JSON.parse(progressResponse);
-                if (progressResponseJson) {
-                    progressCallback(progressResponseJson.progress, progressResponseJson.step);
+                if (!doc.chunk_simpl) {
+                    doc.chunk_simpl = JSON.stringify({ chunks: [] });
                 }
+                doc.summary = result.summary;
+                doc.ai_purpose = result.purpose;
+
+                result.chunks.forEach((chunk: RAGChunk) => {
+                    const chunkToAdd = {
+                        chunkId: chunk.id,
+                        startPage: chunk.metadata.start_page,
+                        endPage: chunk.metadata.end_page,
+                        location: chunk.metadata.location,
+                        chunkType: chunk.metadata.type as CHUNK_TYPE,
+                        text: chunk.metadata.text,
+                    };
+                    const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+                    new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
+                    doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+                });
             }
-        }
 
-        // Index the document
-        await this.indexDocument(result);
+            // Index the document
+            await this.indexDocument(result);
 
-        // Simplify chunks for storage
-        const simplifiedChunks = result.chunks.map(chunk => ({
-            chunkId: chunk.id,
-            start_time: chunk.metadata.start_time,
-            end_time: chunk.metadata.end_time,
-            chunkType: CHUNK_TYPE.TEXT,
-            text: chunk.metadata.text,
-        }));
-        doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
+            // Preserve existing metadata updates
+            if (!doc.vectorstore_id) {
+                doc.vectorstore_id = JSON.stringify([this._id]);
+            } else {
+                doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id]));
+            }
 
-        // Preserve existing metadata updates
-        if (!doc.vectorstore_id) {
-            doc.vectorstore_id = JSON.stringify([this._id]);
-        } else {
-            doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id]));
-        }
+            doc.ai_doc_id = result.doc_id;
 
-        if (!doc.chunk_simpl) {
-            doc.chunk_simpl = JSON.stringify({ chunks: [] });
+            console.log(`Document added: ${result.file_name}`);
+            doc.ai_document_status = 'COMPLETED';
         }
-
-        result.chunks.forEach((chunk: RAGChunk) => {
-            const chunkToAdd = {
-                chunkId: chunk.id,
-                startPage: chunk.metadata.start_page,
-                endPage: chunk.metadata.end_page,
-                location: chunk.metadata.location,
-                chunkType: chunk.metadata.type as CHUNK_TYPE,
-                text: chunk.metadata.text,
-            };
-            const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
-            new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
-            doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
-        });
-
-        console.log(`Document added: ${result.file_name}`);
     }
 
     /**
@@ -294,17 +319,18 @@ export class Vectorstore {
             if (!Array.isArray(queryEmbedding)) {
                 throw new Error('Query embedding is not an array');
             }
-
+            console.log(this._doc_ids());
             // Query the Pinecone index using the embedding and filter by document IDs.
             const queryResponse: QueryResponse = await this.index.query({
                 vector: queryEmbedding,
                 filter: {
-                    doc_id: { $in: this._doc_ids },
+                    doc_id: { $in: this._doc_ids() },
                 },
                 topK,
                 includeValues: true,
                 includeMetadata: true,
             });
+            console.log(queryResponse);
 
             // Map the results into RAGChunks and return them.
             return queryResponse.matches.map(
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 1fd88cbd6..83bb1b228 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -29,6 +29,7 @@ import ffmpegInstaller from '@ffmpeg-installer/ffmpeg';
 import ffmpeg from 'fluent-ffmpeg';
 import OpenAI from 'openai';
 import * as xmlbuilder from 'xmlbuilder';
+import { last } from 'lodash';
 
 // Enumeration of directories where different file types are stored
 export enum Directory {
@@ -285,60 +286,93 @@ export default class AssistantManager extends ApiManager {
 
                     // Step 3: Extract concise JSON
                     console.log('Extracting concise JSON...');
-                    const conciseJSON = transcription.segments?.map((segment: any) => ({
+                    const originalSegments = transcription.segments?.map((segment: any, index: number) => ({
+                        index: index.toString(),
                         text: segment.text,
                         start: segment.start,
                         end: segment.end,
                     }));
 
-                    // Step 4: Combine segments with GPT-4
-                    console.log('Combining segments with GPT-4...');
-                    const schema = {
-                        name: 'combine_segments_schema',
-                        schema: {
-                            type: 'object',
-                            properties: {
-                                combined_segments: {
-                                    type: 'array',
-                                    items: {
-                                        type: 'object',
-                                        properties: {
-                                            text: { type: 'string' },
-                                            start: { type: 'number' },
-                                            end: { type: 'number' },
-                                        },
-                                        required: ['text', 'start', 'end'],
-                                    },
-                                },
-                            },
-                            required: ['combined_segments'],
-                        },
-                    };
-
-                    const completion = await openai.chat.completions.create({
-                        model: 'gpt-4o-2024-08-06',
-                        messages: [
-                            {
-                                role: 'system',
-                                content: 'Combine text segments into coherent sections, each between 5 and 10 seconds, based on their content. Return the result as JSON that follows the schema.',
-                            },
-                            {
-                                role: 'user',
-                                content: JSON.stringify(conciseJSON),
-                            },
-                        ],
-                        response_format: {
-                            type: 'json_schema',
-                            json_schema: schema,
-                        },
+                    interface ConciseSegment {
+                        text: string;
+                        indexes: string[];
+                        start: number | null;
+                        end: number | null;
+                    }
+
+                    const combinedSegments = [];
+                    let currentGroup: ConciseSegment = { text: '', indexes: [], start: null, end: null };
+                    let currentDuration = 0;
+
+                    originalSegments?.forEach(segment => {
+                        const segmentDuration = segment.end - segment.start;
+
+                        if (currentDuration + segmentDuration <= 4000) {
+                            // Add segment to the current group
+                            currentGroup.text += (currentGroup.text ? ' ' : '') + segment.text;
+                            currentGroup.indexes.push(segment.index);
+                            if (currentGroup.start === null) {
+                                currentGroup.start = segment.start;
+                            }
+                            currentGroup.end = segment.end;
+                            currentDuration += segmentDuration;
+                        } else {
+                            // Push the current group and start a new one
+                            combinedSegments.push({ ...currentGroup });
+                            currentGroup = {
+                                text: segment.text,
+                                indexes: [segment.index],
+                                start: segment.start,
+                                end: segment.end,
+                            };
+                            currentDuration = segmentDuration;
+                        }
                     });
 
-                    const combinedSegments = JSON.parse(completion.choices[0].message?.content ?? '{"combined_segments": []}').combined_segments;
+                    // Push the final group if it has content
+                    if (currentGroup.text) {
+                        combinedSegments.push({ ...currentGroup });
+                    }
+                    const lastSegment = combinedSegments[combinedSegments.length - 1];
+
+                    // Check if the last segment is too short and combine it with the second last
+                    if (combinedSegments.length > 1 && lastSegment.end && lastSegment.start) {
+                        const secondLastSegment = combinedSegments[combinedSegments.length - 2];
+                        const lastDuration = lastSegment.end - lastSegment.start;
+
+                        if (lastDuration < 30) {
+                            // Combine the last segment with the second last
+                            secondLastSegment.text += (secondLastSegment.text ? ' ' : '') + lastSegment.text;
+                            secondLastSegment.indexes = secondLastSegment.indexes.concat(lastSegment.indexes);
+                            secondLastSegment.end = lastSegment.end;
+
+                            // Remove the last segment from the array
+                            combinedSegments.pop();
+                        }
+                    }
 
                     console.log('Segments combined successfully.');
 
+                    console.log('Generating summary using GPT-4...');
+                    const combinedText = combinedSegments.map(segment => segment.text).join(' ');
+
+                    let summary = '';
+                    try {
+                        const completion = await openai.chat.completions.create({
+                            messages: [{ role: 'system', content: `Summarize the following text in a concise paragraph:\n\n${combinedText}` }],
+                            model: 'gpt-4o',
+                        });
+                        console.log('Summary generation complete.');
+                        summary = completion.choices[0].message.content ?? 'Summary could not be generated.';
+                    } catch (summaryError) {
+                        console.error('Error generating summary:', summaryError);
+                        summary = 'Summary could not be generated.';
+                    }
+                    // Step 5: Return the JSON result
+                    res.send({ full: originalSegments, condensed: combinedSegments, summary });
+
                     // Step 5: Return the JSON result
-                    res.send(combinedSegments);
+                    res.send({ full: originalSegments, condensed: combinedSegments, summary: summary });
                 } catch (error) {
                     console.error('Error processing media file:', error);
                     res.status(500).send({ error: 'Failed to process media file' });
@@ -380,6 +414,51 @@ export default class AssistantManager extends ApiManager {
             }
         };
 
+        register({
+            method: Method.POST,
+            subscription: '/generateImage',
+            secureHandler: async ({ req, res }) => {
+                const { image_prompt } = req.body;
+
+                if (!image_prompt) {
+                    res.status(400).send({ error: 'No prompt provided' });
+                    return;
+                }
+
+                try {
+                    const image = await openai.images.generate({ model: 'dall-e-3', prompt: image_prompt, response_format: 'b64_json' });
+                    console.log(image);
+
+                    const base64String = image.data[0].b64_json;
+                    if (!base64String) {
+                        throw new Error('No base64 data received from image generation');
+                    }
+                    // Generate a UUID for the file to ensure unique naming
+                    const uuidv4 = uuid.v4();
+                    const fullFilename = `${uuidv4}.jpg`; // Prefix the file name with the UUID
+
+                    // Get the full server path where the file will be saved
+                    const serverFilePath = serverPathToFile(Directory.images, fullFilename);
+
+                    const binaryData = Buffer.from(base64String, 'base64');
+
+                    // Write the CSV data (which is a raw string) to the file
+                    await writeFileAsync(serverFilePath, binaryData);
+
+                    // Construct the client-accessible URL for the file
+                    const fileUrl = clientPathToFile(Directory.images, fullFilename);
+
+                    // Send the file URL and UUID back to the client
+                    res.send({ base64_data: base64String, image_path: fileUrl });
+                } catch (error) {
+                    console.error('Error fetching the URL:', error);
+                    res.status(500).send({
+                        error: 'Failed to fetch the URL',
+                    });
+                }
+            },
+        });
+
         // Register a proxy fetch API route
         register({
             method: Method.POST,
author	A.J. Shulman <Shulman.aj@gmail.com>	2024-12-18 20:34:33 -0500
committer	A.J. Shulman <Shulman.aj@gmail.com>	2024-12-18 20:34:33 -0500
commit	57e3c9b9977228a561e8972a469a67f17f4bcd9c (patch)
tree	1a4f23921e121ca891b3fa6a49a30a92ea76d233 /src
parent	ad1e0cf62187e0f8bbb19b4720b7681585361de9 (diff)