diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-12-18 20:34:33 -0500 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-12-18 20:34:33 -0500 |
commit | 57e3c9b9977228a561e8972a469a67f17f4bcd9c (patch) | |
tree | 1a4f23921e121ca891b3fa6a49a30a92ea76d233 /src | |
parent | ad1e0cf62187e0f8bbb19b4720b7681585361de9 (diff) |
trying new image generation plus new implementaion of video and audio
Diffstat (limited to 'src')
-rw-r--r-- | src/client/documents/Documents.ts | 4 | ||||
-rw-r--r-- | src/client/util/LinkManager.ts | 4 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/agentsystem/Agent.ts | 10 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx | 137 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts | 12 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/ImageCreationTool.ts | 74 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/types/types.ts | 3 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts | 256 | ||||
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 165 |
9 files changed, 462 insertions, 203 deletions
diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index e539e3c65..52cd36401 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -826,8 +826,8 @@ export namespace Docs { ...options, }); } - export function DiagramDocument(options: DocumentOptions = { title: '' }) { - return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), undefined, options); + export function DiagramDocument(data?: string, options: DocumentOptions = { title: '' }) { + return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), data, options); } export function AudioDocument(url: string, options: DocumentOptions = {}, overwriteDoc?: Doc) { diff --git a/src/client/util/LinkManager.ts b/src/client/util/LinkManager.ts index e11482572..d04d41968 100644 --- a/src/client/util/LinkManager.ts +++ b/src/client/util/LinkManager.ts @@ -257,10 +257,10 @@ export function UPDATE_SERVER_CACHE() { cacheDocumentIds = newCacheUpdate; // print out cached docs - Doc.MyDockedBtns.linearView_IsOpen && console.log('Set cached docs = '); + //Doc.MyDockedBtns.linearView_IsOpen && console.log('Set cached docs = '); const isFiltered = filtered.filter(doc => !Doc.IsSystem(doc)); const strings = isFiltered.map(doc => StrCast(doc.title) + ' ' + (Doc.IsDataProto(doc) ? '(data)' : '(embedding)')); - Doc.MyDockedBtns.linearView_IsOpen && strings.sort().forEach((str, i) => console.log(i.toString() + ' ' + str)); + //Doc.MyDockedBtns.linearView_IsOpen && strings.sort().forEach((str, i) => console.log(i.toString() + ' ' + str)); rp.post(ClientUtils.prepend('/setCacheDocumentIds'), { body: { diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 3c8b30125..1eb5e3963 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -20,6 +20,7 @@ import { Parameter, ParametersType, TypeMap } from '../types/tool_types'; import { CreateTextDocTool } from '../tools/CreateTextDocumentTool'; import { DocumentOptions } from '../../../../documents/Documents'; import { CreateAnyDocumentTool } from '../tools/CreateAnyDocTool'; +import { ImageCreationTool } from '../tools/ImageCreationTool'; dotenv.config(); @@ -73,12 +74,13 @@ export class Agent { calculate: new CalculateTool(), rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), - websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), - searchTool: new SearchTool(addLinkedUrlDoc), + //websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), + //searchTool: new SearchTool(addLinkedUrlDoc), createCSV: new CreateCSVTool(createCSVInDash), noTool: new NoTool(), - createTextDoc: new CreateTextDocTool(addLinkedDoc), - //createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc), + imageCreationTool: new ImageCreationTool(addLinkedDoc), + //createTextDoc: new CreateTextDocTool(addLinkedDoc), + createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc), }; } diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index b22f2455e..baa4ad521 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -34,6 +34,11 @@ import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; import { ProgressBar } from './ProgressBar'; import { RichTextField } from '../../../../../fields/RichTextField'; +import { VideoBox } from '../../VideoBox'; +import { AudioBox } from '../../AudioBox'; +import { DiagramBox } from '../../DiagramBox'; +import { ImageField } from '../../../../../fields/URLField'; +import { DashUploadUtils } from '../../../../../server/DashUploadUtils'; dotenv.config(); @@ -402,13 +407,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ @action createDocInDash = async (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => { - let doc; + let doc: Doc; switch (doc_type.toLowerCase()) { case 'text': doc = Docs.Create.TextDocument(data || '', options); break; case 'image': + console.log('imageURL: ' + data); + //DashUploadUtils.UploadImage(data!); doc = Docs.Create.ImageDocument(data || '', options); break; case 'pdf': @@ -417,6 +424,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { case 'video': doc = Docs.Create.VideoDocument(data || '', options); break; + case 'mermaid_diagram': + doc = Docs.Create.DiagramDocument(data, options); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0] as DocumentView; + (firstView.ComponentView as DiagramBox)?.renderMermaid?.(data!); + }); + break; case 'audio': doc = Docs.Create.AudioDocument(data || '', options); break; @@ -426,12 +440,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { case 'equation': doc = Docs.Create.EquationDocument(data || '', options); break; - case 'functionplot': case 'function_plot': doc = Docs.Create.FunctionPlotDocument([], options); break; case 'dataviz': - case 'data_viz': const { fileUrl, id } = await Networking.PostToServer('/createCSV', { filename: (options.title as string).replace(/\s+/g, '') + '.csv', data: data, @@ -467,12 +479,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { if (foundChunk) { // Handle media chunks specifically - if (foundChunk.chunkType === CHUNK_TYPE.MEDIA) { - const directMatchSegment = this.getDirectMatchingSegment(doc, citation.direct_text || ''); - if (directMatchSegment) { + if (doc.ai_type == 'video' || doc.ai_type == 'audio') { + const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); + + if (directMatchSegmentStart) { // Navigate to the segment's start time in the media player - await this.goToMediaTimestamp(doc, directMatchSegment.start_time); + await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type); } else { console.error('No direct matching segment found for the citation.'); } @@ -485,29 +498,53 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { } }; - /** - * Finds the first segment with a direct match to the citation text. - * A match occurs if the segment's text is a subset of the citation's direct text or vice versa. - * @param doc The document containing media metadata. - * @param citationText The citation text to find a matching segment for. - * @returns The segment with the direct match or null if no match is found. - */ - getDirectMatchingSegment = (doc: Doc, citationText: string): { start_time: number; end_time: number; text: string } | null => { - const mediaMetadata = JSON.parse(StrCast(doc.segments)); // Assuming segments are stored in metadata + getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => { + const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({ + index: index.toString(), + text: segment.text, + start: segment.start, + end: segment.end, + })); - if (!Array.isArray(mediaMetadata) || mediaMetadata.length === 0) { - return null; + if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) { + return 0; } - for (const segment of mediaMetadata) { - const segmentText = segment.text || ''; - // Check if the segment's text is a subset of the citation text or vice versa - if (citationText.includes(segmentText) || segmentText.includes(citationText)) { - return segment; // Return the first matching segment + // Create itemsToSearch array based on indexesOfSegments + const itemsToSearch = indexesOfSegments.map((indexStr: string) => { + const index = parseInt(indexStr, 10); + const segment = originalSegments[index]; + return { text: segment.text, start: segment.start }; + }); + + console.log('Constructed itemsToSearch:', itemsToSearch); + + // Helper function to calculate word overlap score + const calculateWordOverlap = (text1: string, text2: string): number => { + const words1 = new Set(text1.toLowerCase().split(/\W+/)); + const words2 = new Set(text2.toLowerCase().split(/\W+/)); + const intersection = new Set([...words1].filter(word => words2.has(word))); + return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity + }; + + // Search for the best matching segment + let bestMatchStart = 0; + let bestScore = 0; + + console.log(`Searching for best match for query: "${citationText}"`); + itemsToSearch.forEach(item => { + const score = calculateWordOverlap(citationText, item.text); + console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`); + if (score > bestScore) { + bestScore = score; + bestMatchStart = item.start; } - } + }); - return null; // No match found + console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart); + + // Return the start time of the best match + return bestMatchStart; }; /** @@ -515,15 +552,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { * @param doc The document containing the media file. * @param timestamp The timestamp to navigate to. */ - goToMediaTimestamp = async (doc: Doc, timestamp: number) => { + goToMediaTimestamp = async (doc: Doc, timestamp: number, type: 'video' | 'audio') => { try { // Show the media document in the viewer - await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }); - - // Simulate navigation to the timestamp - const firstView = Array.from(doc[DocViews])[0] as DocumentView; - (firstView.ComponentView as any)?.gotoTimestamp?.(timestamp); - + if (type == 'video') { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0] as DocumentView; + (firstView.ComponentView as VideoBox)?.Seek?.(timestamp); + }); + } else { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0] as DocumentView; + (firstView.ComponentView as AudioBox)?.playFrom?.(timestamp); + }); + } console.log(`Navigated to timestamp: ${timestamp}s in document ${doc.id}`); } catch (error) { console.error('Error navigating to media timestamp:', error); @@ -538,6 +580,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ handleOtherChunkTypes = (foundChunk: SimplifiedChunk, citation: Citation, doc: Doc) => { switch (foundChunk.chunkType) { + case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: + { + const values = foundChunk.location?.replace(/[[\]]/g, '').split(','); + + if (values?.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; + } + if (foundChunk.startPage === undefined || foundChunk.endPage === undefined) { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + return; + } + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + + const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; + + const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); + + DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); + } + break; case CHUNK_TYPE.TEXT: this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true }; setTimeout(() => (this.citationPopup.visible = false), 3000); @@ -686,7 +754,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) .map(d => DocCast(d?.annotationOn, d)) .filter(d => d) - .filter(d => d.ai_doc_id) + .filter(d => { + console.log(d.ai_doc_id); + return d.ai_doc_id; + }) .map(d => StrCast(d.ai_doc_id)); } diff --git a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts index a4871f7fd..4c059177b 100644 --- a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts +++ b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts @@ -7,8 +7,8 @@ import { DocumentOptions, Docs } from '../../../../documents/Documents'; /** * List of supported document types that can be created via text LLM. */ -type supportedDocumentTypesType = 'text' | 'html' | 'equation' | 'functionPlot' | 'dataviz' | 'noteTaking' | 'rtf' | 'message'; -const supportedDocumentTypes: supportedDocumentTypesType[] = ['text', 'html', 'equation', 'functionPlot', 'dataviz', 'noteTaking', 'rtf', 'message']; +type supportedDocumentTypesType = 'text' | 'html' | 'equation' | 'function_plot' | 'dataviz' | 'note_taking' | 'rtf' | 'message' | 'mermaid_diagram'; +const supportedDocumentTypes: supportedDocumentTypesType[] = ['text', 'html', 'equation', 'function_plot', 'dataviz', 'note_taking', 'rtf', 'message', 'mermaid_diagram']; /** * Description of document options and data field for each type. @@ -26,7 +26,7 @@ const documentTypesInfo = { options: ['title', 'backgroundColor', 'fontColor', 'layout'], dataDescription: 'The equation content as a string.', }, - functionPlot: { + function_plot: { options: ['title', 'backgroundColor', 'layout', 'function_definition'], dataDescription: 'The function definition(s) for plotting. Provide as a string or array of function definitions.', }, @@ -34,7 +34,7 @@ const documentTypesInfo = { options: ['title', 'backgroundColor', 'layout', 'chartType'], dataDescription: 'A string of comma-separated values representing the CSV data.', }, - noteTaking: { + note_taking: { options: ['title', 'backgroundColor', 'layout'], dataDescription: 'The initial content or structure for note-taking.', }, @@ -46,6 +46,10 @@ const documentTypesInfo = { options: ['title', 'backgroundColor', 'layout'], dataDescription: 'The message content of the document.', }, + mermaid_diagram: { + options: ['title', 'backgroundColor', 'layout'], + dataDescription: 'The Mermaid diagram content.', + }, }; const createAnyDocumentToolParams = [ diff --git a/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts new file mode 100644 index 000000000..cf9e8cfc8 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts @@ -0,0 +1,74 @@ +import { v4 as uuidv4 } from 'uuid'; +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { Observation } from '../types/types'; +import { ParametersType, ToolInfo } from '../types/tool_types'; +import { DocumentOptions } from '../../../../documents/Documents'; + +const imageCreationToolParams = [ + { + name: 'image_prompt', + type: 'string', + description: 'The prompt for the image to be created. This should be a string that describes the image to be created in extreme detail for an AI image generator.', + required: true, + }, +] as const; + +type ImageCreationToolParamsType = typeof imageCreationToolParams; + +const imageCreationToolInfo: ToolInfo<ImageCreationToolParamsType> = { + name: 'imageCreationTool', + citationRules: 'No citation needed. Cannot cite image generation for a response.', + parameterRules: imageCreationToolParams, + description: 'Create an image of any style, content, or design, based on a prompt. The prompt should be a detailed description of the image to be created.', +}; + +export class ImageCreationTool extends BaseTool<ImageCreationToolParamsType> { + private _addLinkedDoc: (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => void; + constructor(addLinkedDoc: (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => void) { + super(imageCreationToolInfo); + this._addLinkedDoc = addLinkedDoc; + } + + async execute(args: ParametersType<ImageCreationToolParamsType>): Promise<Observation[]> { + const image_prompt = args.image_prompt; + + console.log(`Generating image for prompt: ${image_prompt}`); + // Create an array of promises, each one handling a search for a query + try { + try { + const { image_url } = await Networking.PostToServer('/generateImage', { + image_prompt, + }); + if (res) { + const result = await Networking.PostToServer('/uploadRemoteImage', { sources: res }); + const source = ClientUtils.prepend(result[0].accessPaths.agnostic.client); + return source; + } + } catch (e) { + console.log(e); + } + + const { base64_data, image_path } = await Networking.PostToServer('/generateImage', { + image_prompt, + }); + const id = uuidv4(); + + this._addLinkedDoc('image', image_path, {}, id); + return [ + { + type: 'image_url', + image_url: { url: `data:image/jpeg;base64,${base64_data}` }, + }, + ]; + } catch (error) { + console.log(error); + return [ + { + type: 'text', + text: `An error occurred while generating image.`, + }, + ]; + } + } +} diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts index c15ae4c6e..54fd7c979 100644 --- a/src/client/views/nodes/chatbot/types/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -1,3 +1,4 @@ +import { indexes } from 'd3'; import { AnyLayer } from 'react-map-gl'; export enum ASSISTANT_ROLE { @@ -95,6 +96,7 @@ export interface RAGChunk { page_height?: number | undefined; start_time?: number | undefined; end_time?: number | undefined; + indexes?: string[] | undefined; }; } @@ -107,6 +109,7 @@ export interface SimplifiedChunk { url?: string; start_time?: number; end_time?: number; + indexes?: string[]; } export interface AI_Document { diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index af27ebe80..3ed433778 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -15,6 +15,7 @@ import { Networking } from '../../../../Network'; import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types'; import path from 'path'; import { v4 as uuidv4 } from 'uuid'; +import { indexes } from 'd3'; dotenv.config(); @@ -28,7 +29,7 @@ export class Vectorstore { private cohere: CohereClient; // Cohere client for generating embeddings. private indexName: string = 'pdf-chatbot'; // Default name for the index. private _id: string; // Unique ID for the Vectorstore instance. - private _doc_ids: string[] = []; // List of document IDs handled by this instance. + private _doc_ids: () => string[]; // List of document IDs handled by this instance. documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. @@ -48,7 +49,7 @@ export class Vectorstore { this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY }); this._id = id; - this._doc_ids = doc_ids(); + this._doc_ids = doc_ids; this.initializeIndex(); } @@ -85,131 +86,155 @@ export class Vectorstore { * @param progressCallback Callback to track progress. */ async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { - const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname; - - if (!local_file_path) { - throw new Error('Invalid file path.'); - } - - const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4'); - let result: AI_Document & { doc_id: string }; - - if (isAudioOrVideo) { - console.log('Processing media file...'); - const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) }); - const segmentedTranscript = response; + const ai_document_status: string = StrCast(doc.ai_document_status); + + // Skip if the document is already in progress or completed. + if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') { + if (ai_document_status === 'PROGRESS') { + console.log('Already in progress.'); + return; + } else if (ai_document_status === 'COMPLETED') { + console.log('Already completed.'); + return; + } + } else { + // Start processing the document. + doc.ai_document_status = 'PROGRESS'; + const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname; - // Generate embeddings for each chunk - const texts = segmentedTranscript.map((chunk: any) => chunk.text); + if (!local_file_path) { + console.log('Invalid file path.'); + return; + } - try { - const embeddingsResponse = await this.cohere.v2.embed({ - model: 'embed-english-v3.0', - inputType: 'classification', - embeddingTypes: ['float'], // Specify that embeddings should be floats - texts, // Pass the array of chunk texts - }); + const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4'); + let result: AI_Document & { doc_id: string }; + if (isAudioOrVideo) { + console.log('Processing media file...'); + const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) }); + const segmentedTranscript = response.condensed; + console.log(segmentedTranscript); + const summary = response.summary; + doc.summary = summary; + // Generate embeddings for each chunk + const texts = segmentedTranscript.map((chunk: any) => chunk.text); + + try { + const embeddingsResponse = await this.cohere.v2.embed({ + model: 'embed-english-v3.0', + inputType: 'classification', + embeddingTypes: ['float'], // Specify that embeddings should be floats + texts, // Pass the array of chunk texts + }); + + if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) { + throw new Error('Mismatch between embeddings and the number of chunks'); + } - if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) { - throw new Error('Mismatch between embeddings and the number of chunks'); + // Assign embeddings to each chunk + segmentedTranscript.forEach((chunk: any, index: number) => { + if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) { + throw new Error('Invalid embeddings response'); + } + }); + doc.original_segments = JSON.stringify(response.full); + doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video'; + const doc_id = uuidv4(); + + // Add transcript and embeddings to metadata + result = { + doc_id, + purpose: '', + file_name: local_file_path, + num_pages: 0, + summary: '', + chunks: segmentedTranscript.map((chunk: any, index: number) => ({ + id: uuidv4(), + values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding + metadata: { + indexes: chunk.indexes, + original_document: local_file_path, + doc_id: doc_id, + file_path: local_file_path, + start_time: chunk.start, + end_time: chunk.end, + text: chunk.text, + chunkType: 'text', + }, + })), + type: 'media', + }; + } catch (error) { + console.error('Error generating embeddings:', error); + throw new Error('Embedding generation failed'); } - // Assign embeddings to each chunk - segmentedTranscript.forEach((chunk: any, index: number) => { - if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) { - throw new Error('Invalid embeddings response'); + doc.segmented_transcript = JSON.stringify(segmentedTranscript); + // Simplify chunks for storage + const simplifiedChunks = result.chunks.map(chunk => ({ + chunkId: chunk.id, + start_time: chunk.metadata.start_time, + end_time: chunk.metadata.end_time, + indexes: chunk.metadata.indexes, + chunkType: CHUNK_TYPE.TEXT, + text: chunk.metadata.text, + })); + doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks }); + } else { + // Existing document processing logic remains unchanged + console.log('Processing regular document...'); + const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + + while (true) { + await new Promise(resolve => setTimeout(resolve, 2000)); + const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); + const resultResponseJson = JSON.parse(resultResponse); + if (resultResponseJson.status === 'completed') { + result = resultResponseJson; + break; + } + const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); + const progressResponseJson = JSON.parse(progressResponse); + if (progressResponseJson) { + progressCallback(progressResponseJson.progress, progressResponseJson.step); } - //chunk.embedding = embeddingsResponse.embeddings.float[index]; - }); - - // Add transcript and embeddings to metadata - result = { - purpose: '', - file_name: path.basename(local_file_path), - num_pages: 0, - summary: '', - chunks: segmentedTranscript.map((chunk: any, index: number) => ({ - id: uuidv4(), - values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding - metadata: { - ...chunk, - original_document: doc.id, - doc_id: doc.id, - file_path: local_file_path, - start_time: chunk.start, - end_time: chunk.end, - text: chunk.text, - }, - })), - type: 'media', - doc_id: StrCast(doc.id), - }; - } catch (error) { - console.error('Error generating embeddings:', error); - throw new Error('Embedding generation failed'); - } - - doc.segmented_transcript = JSON.stringify(segmentedTranscript); - } else { - // Existing document processing logic remains unchanged - console.log('Processing regular document...'); - const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - - while (true) { - await new Promise(resolve => setTimeout(resolve, 2000)); - const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); - const resultResponseJson = JSON.parse(resultResponse); - if (resultResponseJson.status === 'completed') { - result = resultResponseJson; - break; } - const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); - const progressResponseJson = JSON.parse(progressResponse); - if (progressResponseJson) { - progressCallback(progressResponseJson.progress, progressResponseJson.step); + if (!doc.chunk_simpl) { + doc.chunk_simpl = JSON.stringify({ chunks: [] }); } + doc.summary = result.summary; + doc.ai_purpose = result.purpose; + + result.chunks.forEach((chunk: RAGChunk) => { + const chunkToAdd = { + chunkId: chunk.id, + startPage: chunk.metadata.start_page, + endPage: chunk.metadata.end_page, + location: chunk.metadata.location, + chunkType: chunk.metadata.type as CHUNK_TYPE, + text: chunk.metadata.text, + }; + const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + }); } - } - // Index the document - await this.indexDocument(result); + // Index the document + await this.indexDocument(result); - // Simplify chunks for storage - const simplifiedChunks = result.chunks.map(chunk => ({ - chunkId: chunk.id, - start_time: chunk.metadata.start_time, - end_time: chunk.metadata.end_time, - chunkType: CHUNK_TYPE.TEXT, - text: chunk.metadata.text, - })); - doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks }); + // Preserve existing metadata updates + if (!doc.vectorstore_id) { + doc.vectorstore_id = JSON.stringify([this._id]); + } else { + doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id])); + } - // Preserve existing metadata updates - if (!doc.vectorstore_id) { - doc.vectorstore_id = JSON.stringify([this._id]); - } else { - doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id])); - } + doc.ai_doc_id = result.doc_id; - if (!doc.chunk_simpl) { - doc.chunk_simpl = JSON.stringify({ chunks: [] }); + console.log(`Document added: ${result.file_name}`); + doc.ai_document_status = 'COMPLETED'; } - - result.chunks.forEach((chunk: RAGChunk) => { - const chunkToAdd = { - chunkId: chunk.id, - startPage: chunk.metadata.start_page, - endPage: chunk.metadata.end_page, - location: chunk.metadata.location, - chunkType: chunk.metadata.type as CHUNK_TYPE, - text: chunk.metadata.text, - }; - const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); - new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); - doc.chunk_simpl = JSON.stringify(new_chunk_simpl); - }); - - console.log(`Document added: ${result.file_name}`); } /** @@ -294,17 +319,18 @@ export class Vectorstore { if (!Array.isArray(queryEmbedding)) { throw new Error('Query embedding is not an array'); } - + console.log(this._doc_ids()); // Query the Pinecone index using the embedding and filter by document IDs. const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { - doc_id: { $in: this._doc_ids }, + doc_id: { $in: this._doc_ids() }, }, topK, includeValues: true, includeMetadata: true, }); + console.log(queryResponse); // Map the results into RAGChunks and return them. return queryResponse.matches.map( diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 1fd88cbd6..83bb1b228 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -29,6 +29,7 @@ import ffmpegInstaller from '@ffmpeg-installer/ffmpeg'; import ffmpeg from 'fluent-ffmpeg'; import OpenAI from 'openai'; import * as xmlbuilder from 'xmlbuilder'; +import { last } from 'lodash'; // Enumeration of directories where different file types are stored export enum Directory { @@ -285,60 +286,93 @@ export default class AssistantManager extends ApiManager { // Step 3: Extract concise JSON console.log('Extracting concise JSON...'); - const conciseJSON = transcription.segments?.map((segment: any) => ({ + const originalSegments = transcription.segments?.map((segment: any, index: number) => ({ + index: index.toString(), text: segment.text, start: segment.start, end: segment.end, })); - // Step 4: Combine segments with GPT-4 - console.log('Combining segments with GPT-4...'); - const schema = { - name: 'combine_segments_schema', - schema: { - type: 'object', - properties: { - combined_segments: { - type: 'array', - items: { - type: 'object', - properties: { - text: { type: 'string' }, - start: { type: 'number' }, - end: { type: 'number' }, - }, - required: ['text', 'start', 'end'], - }, - }, - }, - required: ['combined_segments'], - }, - }; - - const completion = await openai.chat.completions.create({ - model: 'gpt-4o-2024-08-06', - messages: [ - { - role: 'system', - content: 'Combine text segments into coherent sections, each between 5 and 10 seconds, based on their content. Return the result as JSON that follows the schema.', - }, - { - role: 'user', - content: JSON.stringify(conciseJSON), - }, - ], - response_format: { - type: 'json_schema', - json_schema: schema, - }, + interface ConciseSegment { + text: string; + indexes: string[]; + start: number | null; + end: number | null; + } + + const combinedSegments = []; + let currentGroup: ConciseSegment = { text: '', indexes: [], start: null, end: null }; + let currentDuration = 0; + + originalSegments?.forEach(segment => { + const segmentDuration = segment.end - segment.start; + + if (currentDuration + segmentDuration <= 4000) { + // Add segment to the current group + currentGroup.text += (currentGroup.text ? ' ' : '') + segment.text; + currentGroup.indexes.push(segment.index); + if (currentGroup.start === null) { + currentGroup.start = segment.start; + } + currentGroup.end = segment.end; + currentDuration += segmentDuration; + } else { + // Push the current group and start a new one + combinedSegments.push({ ...currentGroup }); + currentGroup = { + text: segment.text, + indexes: [segment.index], + start: segment.start, + end: segment.end, + }; + currentDuration = segmentDuration; + } }); - const combinedSegments = JSON.parse(completion.choices[0].message?.content ?? '{"combined_segments": []}').combined_segments; + // Push the final group if it has content + if (currentGroup.text) { + combinedSegments.push({ ...currentGroup }); + } + const lastSegment = combinedSegments[combinedSegments.length - 1]; + + // Check if the last segment is too short and combine it with the second last + if (combinedSegments.length > 1 && lastSegment.end && lastSegment.start) { + const secondLastSegment = combinedSegments[combinedSegments.length - 2]; + const lastDuration = lastSegment.end - lastSegment.start; + + if (lastDuration < 30) { + // Combine the last segment with the second last + secondLastSegment.text += (secondLastSegment.text ? ' ' : '') + lastSegment.text; + secondLastSegment.indexes = secondLastSegment.indexes.concat(lastSegment.indexes); + secondLastSegment.end = lastSegment.end; + + // Remove the last segment from the array + combinedSegments.pop(); + } + } console.log('Segments combined successfully.'); + console.log('Generating summary using GPT-4...'); + const combinedText = combinedSegments.map(segment => segment.text).join(' '); + + let summary = ''; + try { + const completion = await openai.chat.completions.create({ + messages: [{ role: 'system', content: `Summarize the following text in a concise paragraph:\n\n${combinedText}` }], + model: 'gpt-4o', + }); + console.log('Summary generation complete.'); + summary = completion.choices[0].message.content ?? 'Summary could not be generated.'; + } catch (summaryError) { + console.error('Error generating summary:', summaryError); + summary = 'Summary could not be generated.'; + } + // Step 5: Return the JSON result + res.send({ full: originalSegments, condensed: combinedSegments, summary }); + // Step 5: Return the JSON result - res.send(combinedSegments); + res.send({ full: originalSegments, condensed: combinedSegments, summary: summary }); } catch (error) { console.error('Error processing media file:', error); res.status(500).send({ error: 'Failed to process media file' }); @@ -380,6 +414,51 @@ export default class AssistantManager extends ApiManager { } }; + register({ + method: Method.POST, + subscription: '/generateImage', + secureHandler: async ({ req, res }) => { + const { image_prompt } = req.body; + + if (!image_prompt) { + res.status(400).send({ error: 'No prompt provided' }); + return; + } + + try { + const image = await openai.images.generate({ model: 'dall-e-3', prompt: image_prompt, response_format: 'b64_json' }); + console.log(image); + + const base64String = image.data[0].b64_json; + if (!base64String) { + throw new Error('No base64 data received from image generation'); + } + // Generate a UUID for the file to ensure unique naming + const uuidv4 = uuid.v4(); + const fullFilename = `${uuidv4}.jpg`; // Prefix the file name with the UUID + + // Get the full server path where the file will be saved + const serverFilePath = serverPathToFile(Directory.images, fullFilename); + + const binaryData = Buffer.from(base64String, 'base64'); + + // Write the CSV data (which is a raw string) to the file + await writeFileAsync(serverFilePath, binaryData); + + // Construct the client-accessible URL for the file + const fileUrl = clientPathToFile(Directory.images, fullFilename); + + // Send the file URL and UUID back to the client + res.send({ base64_data: base64String, image_path: fileUrl }); + } catch (error) { + console.error('Error fetching the URL:', error); + res.status(500).send({ + error: 'Failed to fetch the URL', + }); + } + }, + }); + // Register a proxy fetch API route register({ method: Method.POST, |