aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/client/documents/Documents.ts4
-rw-r--r--src/client/util/LinkManager.ts4
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/Agent.ts10
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx137
-rw-r--r--src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts12
-rw-r--r--src/client/views/nodes/chatbot/tools/ImageCreationTool.ts74
-rw-r--r--src/client/views/nodes/chatbot/types/types.ts3
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts256
-rw-r--r--src/server/ApiManagers/AssistantManager.ts165
9 files changed, 462 insertions, 203 deletions
diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts
index e539e3c65..52cd36401 100644
--- a/src/client/documents/Documents.ts
+++ b/src/client/documents/Documents.ts
@@ -826,8 +826,8 @@ export namespace Docs {
...options,
});
}
- export function DiagramDocument(options: DocumentOptions = { title: '' }) {
- return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), undefined, options);
+ export function DiagramDocument(data?: string, options: DocumentOptions = { title: '' }) {
+ return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), data, options);
}
export function AudioDocument(url: string, options: DocumentOptions = {}, overwriteDoc?: Doc) {
diff --git a/src/client/util/LinkManager.ts b/src/client/util/LinkManager.ts
index e11482572..d04d41968 100644
--- a/src/client/util/LinkManager.ts
+++ b/src/client/util/LinkManager.ts
@@ -257,10 +257,10 @@ export function UPDATE_SERVER_CACHE() {
cacheDocumentIds = newCacheUpdate;
// print out cached docs
- Doc.MyDockedBtns.linearView_IsOpen && console.log('Set cached docs = ');
+ //Doc.MyDockedBtns.linearView_IsOpen && console.log('Set cached docs = ');
const isFiltered = filtered.filter(doc => !Doc.IsSystem(doc));
const strings = isFiltered.map(doc => StrCast(doc.title) + ' ' + (Doc.IsDataProto(doc) ? '(data)' : '(embedding)'));
- Doc.MyDockedBtns.linearView_IsOpen && strings.sort().forEach((str, i) => console.log(i.toString() + ' ' + str));
+ //Doc.MyDockedBtns.linearView_IsOpen && strings.sort().forEach((str, i) => console.log(i.toString() + ' ' + str));
rp.post(ClientUtils.prepend('/setCacheDocumentIds'), {
body: {
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 3c8b30125..1eb5e3963 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -20,6 +20,7 @@ import { Parameter, ParametersType, TypeMap } from '../types/tool_types';
import { CreateTextDocTool } from '../tools/CreateTextDocumentTool';
import { DocumentOptions } from '../../../../documents/Documents';
import { CreateAnyDocumentTool } from '../tools/CreateAnyDocTool';
+import { ImageCreationTool } from '../tools/ImageCreationTool';
dotenv.config();
@@ -73,12 +74,13 @@ export class Agent {
calculate: new CalculateTool(),
rag: new RAGTool(this.vectorstore),
dataAnalysis: new DataAnalysisTool(csvData),
- websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
- searchTool: new SearchTool(addLinkedUrlDoc),
+ //websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+ //searchTool: new SearchTool(addLinkedUrlDoc),
createCSV: new CreateCSVTool(createCSVInDash),
noTool: new NoTool(),
- createTextDoc: new CreateTextDocTool(addLinkedDoc),
- //createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc),
+ imageCreationTool: new ImageCreationTool(addLinkedDoc),
+ //createTextDoc: new CreateTextDocTool(addLinkedDoc),
+ createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc),
};
}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index b22f2455e..baa4ad521 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -34,6 +34,11 @@ import './ChatBox.scss';
import MessageComponentBox from './MessageComponent';
import { ProgressBar } from './ProgressBar';
import { RichTextField } from '../../../../../fields/RichTextField';
+import { VideoBox } from '../../VideoBox';
+import { AudioBox } from '../../AudioBox';
+import { DiagramBox } from '../../DiagramBox';
+import { ImageField } from '../../../../../fields/URLField';
+import { DashUploadUtils } from '../../../../../server/DashUploadUtils';
dotenv.config();
@@ -402,13 +407,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
*/
@action
createDocInDash = async (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => {
- let doc;
+ let doc: Doc;
switch (doc_type.toLowerCase()) {
case 'text':
doc = Docs.Create.TextDocument(data || '', options);
break;
case 'image':
+ console.log('imageURL: ' + data);
+ //DashUploadUtils.UploadImage(data!);
doc = Docs.Create.ImageDocument(data || '', options);
break;
case 'pdf':
@@ -417,6 +424,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
case 'video':
doc = Docs.Create.VideoDocument(data || '', options);
break;
+ case 'mermaid_diagram':
+ doc = Docs.Create.DiagramDocument(data, options);
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+ (firstView.ComponentView as DiagramBox)?.renderMermaid?.(data!);
+ });
+ break;
case 'audio':
doc = Docs.Create.AudioDocument(data || '', options);
break;
@@ -426,12 +440,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
case 'equation':
doc = Docs.Create.EquationDocument(data || '', options);
break;
- case 'functionplot':
case 'function_plot':
doc = Docs.Create.FunctionPlotDocument([], options);
break;
case 'dataviz':
- case 'data_viz':
const { fileUrl, id } = await Networking.PostToServer('/createCSV', {
filename: (options.title as string).replace(/\s+/g, '') + '.csv',
data: data,
@@ -467,12 +479,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
if (foundChunk) {
// Handle media chunks specifically
- if (foundChunk.chunkType === CHUNK_TYPE.MEDIA) {
- const directMatchSegment = this.getDirectMatchingSegment(doc, citation.direct_text || '');
- if (directMatchSegment) {
+ if (doc.ai_type == 'video' || doc.ai_type == 'audio') {
+ const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
+
+ if (directMatchSegmentStart) {
// Navigate to the segment's start time in the media player
- await this.goToMediaTimestamp(doc, directMatchSegment.start_time);
+ await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type);
} else {
console.error('No direct matching segment found for the citation.');
}
@@ -485,29 +498,53 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
}
};
- /**
- * Finds the first segment with a direct match to the citation text.
- * A match occurs if the segment's text is a subset of the citation's direct text or vice versa.
- * @param doc The document containing media metadata.
- * @param citationText The citation text to find a matching segment for.
- * @returns The segment with the direct match or null if no match is found.
- */
- getDirectMatchingSegment = (doc: Doc, citationText: string): { start_time: number; end_time: number; text: string } | null => {
- const mediaMetadata = JSON.parse(StrCast(doc.segments)); // Assuming segments are stored in metadata
+ getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => {
+ const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({
+ index: index.toString(),
+ text: segment.text,
+ start: segment.start,
+ end: segment.end,
+ }));
- if (!Array.isArray(mediaMetadata) || mediaMetadata.length === 0) {
- return null;
+ if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) {
+ return 0;
}
- for (const segment of mediaMetadata) {
- const segmentText = segment.text || '';
- // Check if the segment's text is a subset of the citation text or vice versa
- if (citationText.includes(segmentText) || segmentText.includes(citationText)) {
- return segment; // Return the first matching segment
+ // Create itemsToSearch array based on indexesOfSegments
+ const itemsToSearch = indexesOfSegments.map((indexStr: string) => {
+ const index = parseInt(indexStr, 10);
+ const segment = originalSegments[index];
+ return { text: segment.text, start: segment.start };
+ });
+
+ console.log('Constructed itemsToSearch:', itemsToSearch);
+
+ // Helper function to calculate word overlap score
+ const calculateWordOverlap = (text1: string, text2: string): number => {
+ const words1 = new Set(text1.toLowerCase().split(/\W+/));
+ const words2 = new Set(text2.toLowerCase().split(/\W+/));
+ const intersection = new Set([...words1].filter(word => words2.has(word)));
+ return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity
+ };
+
+ // Search for the best matching segment
+ let bestMatchStart = 0;
+ let bestScore = 0;
+
+ console.log(`Searching for best match for query: "${citationText}"`);
+ itemsToSearch.forEach(item => {
+ const score = calculateWordOverlap(citationText, item.text);
+ console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`);
+ if (score > bestScore) {
+ bestScore = score;
+ bestMatchStart = item.start;
}
- }
+ });
- return null; // No match found
+ console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart);
+
+ // Return the start time of the best match
+ return bestMatchStart;
};
/**
@@ -515,15 +552,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
* @param doc The document containing the media file.
* @param timestamp The timestamp to navigate to.
*/
- goToMediaTimestamp = async (doc: Doc, timestamp: number) => {
+ goToMediaTimestamp = async (doc: Doc, timestamp: number, type: 'video' | 'audio') => {
try {
// Show the media document in the viewer
- await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true });
-
- // Simulate navigation to the timestamp
- const firstView = Array.from(doc[DocViews])[0] as DocumentView;
- (firstView.ComponentView as any)?.gotoTimestamp?.(timestamp);
-
+ if (type == 'video') {
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+ (firstView.ComponentView as VideoBox)?.Seek?.(timestamp);
+ });
+ } else {
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+ (firstView.ComponentView as AudioBox)?.playFrom?.(timestamp);
+ });
+ }
console.log(`Navigated to timestamp: ${timestamp}s in document ${doc.id}`);
} catch (error) {
console.error('Error navigating to media timestamp:', error);
@@ -538,6 +580,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
*/
handleOtherChunkTypes = (foundChunk: SimplifiedChunk, citation: Citation, doc: Doc) => {
switch (foundChunk.chunkType) {
+ case CHUNK_TYPE.IMAGE:
+ case CHUNK_TYPE.TABLE:
+ {
+ const values = foundChunk.location?.replace(/[[\]]/g, '').split(',');
+
+ if (values?.length !== 4) {
+ console.error('Location string must contain exactly 4 numbers');
+ return;
+ }
+ if (foundChunk.startPage === undefined || foundChunk.endPage === undefined) {
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ return;
+ }
+ const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc);
+ const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc);
+ const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc);
+ const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc);
+
+ const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations';
+
+ const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id);
+ const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc);
+
+ DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {});
+ }
+ break;
case CHUNK_TYPE.TEXT:
this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
setTimeout(() => (this.citationPopup.visible = false), 3000);
@@ -686,7 +754,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
.map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
.map(d => DocCast(d?.annotationOn, d))
.filter(d => d)
- .filter(d => d.ai_doc_id)
+ .filter(d => {
+ console.log(d.ai_doc_id);
+ return d.ai_doc_id;
+ })
.map(d => StrCast(d.ai_doc_id));
}
diff --git a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
index a4871f7fd..4c059177b 100644
--- a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
+++ b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
@@ -7,8 +7,8 @@ import { DocumentOptions, Docs } from '../../../../documents/Documents';
/**
* List of supported document types that can be created via text LLM.
*/
-type supportedDocumentTypesType = 'text' | 'html' | 'equation' | 'functionPlot' | 'dataviz' | 'noteTaking' | 'rtf' | 'message';
-const supportedDocumentTypes: supportedDocumentTypesType[] = ['text', 'html', 'equation', 'functionPlot', 'dataviz', 'noteTaking', 'rtf', 'message'];
+type supportedDocumentTypesType = 'text' | 'html' | 'equation' | 'function_plot' | 'dataviz' | 'note_taking' | 'rtf' | 'message' | 'mermaid_diagram';
+const supportedDocumentTypes: supportedDocumentTypesType[] = ['text', 'html', 'equation', 'function_plot', 'dataviz', 'note_taking', 'rtf', 'message', 'mermaid_diagram'];
/**
* Description of document options and data field for each type.
@@ -26,7 +26,7 @@ const documentTypesInfo = {
options: ['title', 'backgroundColor', 'fontColor', 'layout'],
dataDescription: 'The equation content as a string.',
},
- functionPlot: {
+ function_plot: {
options: ['title', 'backgroundColor', 'layout', 'function_definition'],
dataDescription: 'The function definition(s) for plotting. Provide as a string or array of function definitions.',
},
@@ -34,7 +34,7 @@ const documentTypesInfo = {
options: ['title', 'backgroundColor', 'layout', 'chartType'],
dataDescription: 'A string of comma-separated values representing the CSV data.',
},
- noteTaking: {
+ note_taking: {
options: ['title', 'backgroundColor', 'layout'],
dataDescription: 'The initial content or structure for note-taking.',
},
@@ -46,6 +46,10 @@ const documentTypesInfo = {
options: ['title', 'backgroundColor', 'layout'],
dataDescription: 'The message content of the document.',
},
+ mermaid_diagram: {
+ options: ['title', 'backgroundColor', 'layout'],
+ dataDescription: 'The Mermaid diagram content.',
+ },
};
const createAnyDocumentToolParams = [
diff --git a/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
new file mode 100644
index 000000000..cf9e8cfc8
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
@@ -0,0 +1,74 @@
+import { v4 as uuidv4 } from 'uuid';
+import { Networking } from '../../../../Network';
+import { BaseTool } from './BaseTool';
+import { Observation } from '../types/types';
+import { ParametersType, ToolInfo } from '../types/tool_types';
+import { DocumentOptions } from '../../../../documents/Documents';
+
+const imageCreationToolParams = [
+ {
+ name: 'image_prompt',
+ type: 'string',
+ description: 'The prompt for the image to be created. This should be a string that describes the image to be created in extreme detail for an AI image generator.',
+ required: true,
+ },
+] as const;
+
+type ImageCreationToolParamsType = typeof imageCreationToolParams;
+
+const imageCreationToolInfo: ToolInfo<ImageCreationToolParamsType> = {
+ name: 'imageCreationTool',
+ citationRules: 'No citation needed. Cannot cite image generation for a response.',
+ parameterRules: imageCreationToolParams,
+ description: 'Create an image of any style, content, or design, based on a prompt. The prompt should be a detailed description of the image to be created.',
+};
+
+export class ImageCreationTool extends BaseTool<ImageCreationToolParamsType> {
+ private _addLinkedDoc: (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => void;
+ constructor(addLinkedDoc: (doc_type: string, data: string | undefined, options: DocumentOptions, id: string) => void) {
+ super(imageCreationToolInfo);
+ this._addLinkedDoc = addLinkedDoc;
+ }
+
+ async execute(args: ParametersType<ImageCreationToolParamsType>): Promise<Observation[]> {
+ const image_prompt = args.image_prompt;
+
+ console.log(`Generating image for prompt: ${image_prompt}`);
+ // Create an array of promises, each one handling a search for a query
+ try {
+ try {
+ const { image_url } = await Networking.PostToServer('/generateImage', {
+ image_prompt,
+ });
+ if (res) {
+ const result = await Networking.PostToServer('/uploadRemoteImage', { sources: res });
+ const source = ClientUtils.prepend(result[0].accessPaths.agnostic.client);
+ return source;
+ }
+ } catch (e) {
+ console.log(e);
+ }
+
+ const { base64_data, image_path } = await Networking.PostToServer('/generateImage', {
+ image_prompt,
+ });
+ const id = uuidv4();
+
+ this._addLinkedDoc('image', image_path, {}, id);
+ return [
+ {
+ type: 'image_url',
+ image_url: { url: `data:image/jpeg;base64,${base64_data}` },
+ },
+ ];
+ } catch (error) {
+ console.log(error);
+ return [
+ {
+ type: 'text',
+ text: `An error occurred while generating image.`,
+ },
+ ];
+ }
+ }
+}
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
index c15ae4c6e..54fd7c979 100644
--- a/src/client/views/nodes/chatbot/types/types.ts
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -1,3 +1,4 @@
+import { indexes } from 'd3';
import { AnyLayer } from 'react-map-gl';
export enum ASSISTANT_ROLE {
@@ -95,6 +96,7 @@ export interface RAGChunk {
page_height?: number | undefined;
start_time?: number | undefined;
end_time?: number | undefined;
+ indexes?: string[] | undefined;
};
}
@@ -107,6 +109,7 @@ export interface SimplifiedChunk {
url?: string;
start_time?: number;
end_time?: number;
+ indexes?: string[];
}
export interface AI_Document {
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index af27ebe80..3ed433778 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -15,6 +15,7 @@ import { Networking } from '../../../../Network';
import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
import path from 'path';
import { v4 as uuidv4 } from 'uuid';
+import { indexes } from 'd3';
dotenv.config();
@@ -28,7 +29,7 @@ export class Vectorstore {
private cohere: CohereClient; // Cohere client for generating embeddings.
private indexName: string = 'pdf-chatbot'; // Default name for the index.
private _id: string; // Unique ID for the Vectorstore instance.
- private _doc_ids: string[] = []; // List of document IDs handled by this instance.
+ private _doc_ids: () => string[]; // List of document IDs handled by this instance.
documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
@@ -48,7 +49,7 @@ export class Vectorstore {
this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
this._id = id;
- this._doc_ids = doc_ids();
+ this._doc_ids = doc_ids;
this.initializeIndex();
}
@@ -85,131 +86,155 @@ export class Vectorstore {
* @param progressCallback Callback to track progress.
*/
async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) {
- const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname;
-
- if (!local_file_path) {
- throw new Error('Invalid file path.');
- }
-
- const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4');
- let result: AI_Document & { doc_id: string };
-
- if (isAudioOrVideo) {
- console.log('Processing media file...');
- const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
- const segmentedTranscript = response;
+ const ai_document_status: string = StrCast(doc.ai_document_status);
+
+ // Skip if the document is already in progress or completed.
+ if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') {
+ if (ai_document_status === 'PROGRESS') {
+ console.log('Already in progress.');
+ return;
+ } else if (ai_document_status === 'COMPLETED') {
+ console.log('Already completed.');
+ return;
+ }
+ } else {
+ // Start processing the document.
+ doc.ai_document_status = 'PROGRESS';
+ const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname;
- // Generate embeddings for each chunk
- const texts = segmentedTranscript.map((chunk: any) => chunk.text);
+ if (!local_file_path) {
+ console.log('Invalid file path.');
+ return;
+ }
- try {
- const embeddingsResponse = await this.cohere.v2.embed({
- model: 'embed-english-v3.0',
- inputType: 'classification',
- embeddingTypes: ['float'], // Specify that embeddings should be floats
- texts, // Pass the array of chunk texts
- });
+ const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4');
+ let result: AI_Document & { doc_id: string };
+ if (isAudioOrVideo) {
+ console.log('Processing media file...');
+ const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
+ const segmentedTranscript = response.condensed;
+ console.log(segmentedTranscript);
+ const summary = response.summary;
+ doc.summary = summary;
+ // Generate embeddings for each chunk
+ const texts = segmentedTranscript.map((chunk: any) => chunk.text);
+
+ try {
+ const embeddingsResponse = await this.cohere.v2.embed({
+ model: 'embed-english-v3.0',
+ inputType: 'classification',
+ embeddingTypes: ['float'], // Specify that embeddings should be floats
+ texts, // Pass the array of chunk texts
+ });
+
+ if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) {
+ throw new Error('Mismatch between embeddings and the number of chunks');
+ }
- if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) {
- throw new Error('Mismatch between embeddings and the number of chunks');
+ // Assign embeddings to each chunk
+ segmentedTranscript.forEach((chunk: any, index: number) => {
+ if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) {
+ throw new Error('Invalid embeddings response');
+ }
+ });
+ doc.original_segments = JSON.stringify(response.full);
+ doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
+ const doc_id = uuidv4();
+
+ // Add transcript and embeddings to metadata
+ result = {
+ doc_id,
+ purpose: '',
+ file_name: local_file_path,
+ num_pages: 0,
+ summary: '',
+ chunks: segmentedTranscript.map((chunk: any, index: number) => ({
+ id: uuidv4(),
+ values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding
+ metadata: {
+ indexes: chunk.indexes,
+ original_document: local_file_path,
+ doc_id: doc_id,
+ file_path: local_file_path,
+ start_time: chunk.start,
+ end_time: chunk.end,
+ text: chunk.text,
+ chunkType: 'text',
+ },
+ })),
+ type: 'media',
+ };
+ } catch (error) {
+ console.error('Error generating embeddings:', error);
+ throw new Error('Embedding generation failed');
}
- // Assign embeddings to each chunk
- segmentedTranscript.forEach((chunk: any, index: number) => {
- if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) {
- throw new Error('Invalid embeddings response');
+ doc.segmented_transcript = JSON.stringify(segmentedTranscript);
+ // Simplify chunks for storage
+ const simplifiedChunks = result.chunks.map(chunk => ({
+ chunkId: chunk.id,
+ start_time: chunk.metadata.start_time,
+ end_time: chunk.metadata.end_time,
+ indexes: chunk.metadata.indexes,
+ chunkType: CHUNK_TYPE.TEXT,
+ text: chunk.metadata.text,
+ }));
+ doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
+ } else {
+ // Existing document processing logic remains unchanged
+ console.log('Processing regular document...');
+ const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+
+ while (true) {
+ await new Promise(resolve => setTimeout(resolve, 2000));
+ const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
+ const resultResponseJson = JSON.parse(resultResponse);
+ if (resultResponseJson.status === 'completed') {
+ result = resultResponseJson;
+ break;
+ }
+ const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
+ const progressResponseJson = JSON.parse(progressResponse);
+ if (progressResponseJson) {
+ progressCallback(progressResponseJson.progress, progressResponseJson.step);
}
- //chunk.embedding = embeddingsResponse.embeddings.float[index];
- });
-
- // Add transcript and embeddings to metadata
- result = {
- purpose: '',
- file_name: path.basename(local_file_path),
- num_pages: 0,
- summary: '',
- chunks: segmentedTranscript.map((chunk: any, index: number) => ({
- id: uuidv4(),
- values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding
- metadata: {
- ...chunk,
- original_document: doc.id,
- doc_id: doc.id,
- file_path: local_file_path,
- start_time: chunk.start,
- end_time: chunk.end,
- text: chunk.text,
- },
- })),
- type: 'media',
- doc_id: StrCast(doc.id),
- };
- } catch (error) {
- console.error('Error generating embeddings:', error);
- throw new Error('Embedding generation failed');
- }
-
- doc.segmented_transcript = JSON.stringify(segmentedTranscript);
- } else {
- // Existing document processing logic remains unchanged
- console.log('Processing regular document...');
- const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
-
- while (true) {
- await new Promise(resolve => setTimeout(resolve, 2000));
- const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
- const resultResponseJson = JSON.parse(resultResponse);
- if (resultResponseJson.status === 'completed') {
- result = resultResponseJson;
- break;
}
- const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
- const progressResponseJson = JSON.parse(progressResponse);
- if (progressResponseJson) {
- progressCallback(progressResponseJson.progress, progressResponseJson.step);
+ if (!doc.chunk_simpl) {
+ doc.chunk_simpl = JSON.stringify({ chunks: [] });
}
+ doc.summary = result.summary;
+ doc.ai_purpose = result.purpose;
+
+ result.chunks.forEach((chunk: RAGChunk) => {
+ const chunkToAdd = {
+ chunkId: chunk.id,
+ startPage: chunk.metadata.start_page,
+ endPage: chunk.metadata.end_page,
+ location: chunk.metadata.location,
+ chunkType: chunk.metadata.type as CHUNK_TYPE,
+ text: chunk.metadata.text,
+ };
+ const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
+ doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+ });
}
- }
- // Index the document
- await this.indexDocument(result);
+ // Index the document
+ await this.indexDocument(result);
- // Simplify chunks for storage
- const simplifiedChunks = result.chunks.map(chunk => ({
- chunkId: chunk.id,
- start_time: chunk.metadata.start_time,
- end_time: chunk.metadata.end_time,
- chunkType: CHUNK_TYPE.TEXT,
- text: chunk.metadata.text,
- }));
- doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
+ // Preserve existing metadata updates
+ if (!doc.vectorstore_id) {
+ doc.vectorstore_id = JSON.stringify([this._id]);
+ } else {
+ doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id]));
+ }
- // Preserve existing metadata updates
- if (!doc.vectorstore_id) {
- doc.vectorstore_id = JSON.stringify([this._id]);
- } else {
- doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id]));
- }
+ doc.ai_doc_id = result.doc_id;
- if (!doc.chunk_simpl) {
- doc.chunk_simpl = JSON.stringify({ chunks: [] });
+ console.log(`Document added: ${result.file_name}`);
+ doc.ai_document_status = 'COMPLETED';
}
-
- result.chunks.forEach((chunk: RAGChunk) => {
- const chunkToAdd = {
- chunkId: chunk.id,
- startPage: chunk.metadata.start_page,
- endPage: chunk.metadata.end_page,
- location: chunk.metadata.location,
- chunkType: chunk.metadata.type as CHUNK_TYPE,
- text: chunk.metadata.text,
- };
- const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
- new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
- doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
- });
-
- console.log(`Document added: ${result.file_name}`);
}
/**
@@ -294,17 +319,18 @@ export class Vectorstore {
if (!Array.isArray(queryEmbedding)) {
throw new Error('Query embedding is not an array');
}
-
+ console.log(this._doc_ids());
// Query the Pinecone index using the embedding and filter by document IDs.
const queryResponse: QueryResponse = await this.index.query({
vector: queryEmbedding,
filter: {
- doc_id: { $in: this._doc_ids },
+ doc_id: { $in: this._doc_ids() },
},
topK,
includeValues: true,
includeMetadata: true,
});
+ console.log(queryResponse);
// Map the results into RAGChunks and return them.
return queryResponse.matches.map(
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 1fd88cbd6..83bb1b228 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -29,6 +29,7 @@ import ffmpegInstaller from '@ffmpeg-installer/ffmpeg';
import ffmpeg from 'fluent-ffmpeg';
import OpenAI from 'openai';
import * as xmlbuilder from 'xmlbuilder';
+import { last } from 'lodash';
// Enumeration of directories where different file types are stored
export enum Directory {
@@ -285,60 +286,93 @@ export default class AssistantManager extends ApiManager {
// Step 3: Extract concise JSON
console.log('Extracting concise JSON...');
- const conciseJSON = transcription.segments?.map((segment: any) => ({
+ const originalSegments = transcription.segments?.map((segment: any, index: number) => ({
+ index: index.toString(),
text: segment.text,
start: segment.start,
end: segment.end,
}));
- // Step 4: Combine segments with GPT-4
- console.log('Combining segments with GPT-4...');
- const schema = {
- name: 'combine_segments_schema',
- schema: {
- type: 'object',
- properties: {
- combined_segments: {
- type: 'array',
- items: {
- type: 'object',
- properties: {
- text: { type: 'string' },
- start: { type: 'number' },
- end: { type: 'number' },
- },
- required: ['text', 'start', 'end'],
- },
- },
- },
- required: ['combined_segments'],
- },
- };
-
- const completion = await openai.chat.completions.create({
- model: 'gpt-4o-2024-08-06',
- messages: [
- {
- role: 'system',
- content: 'Combine text segments into coherent sections, each between 5 and 10 seconds, based on their content. Return the result as JSON that follows the schema.',
- },
- {
- role: 'user',
- content: JSON.stringify(conciseJSON),
- },
- ],
- response_format: {
- type: 'json_schema',
- json_schema: schema,
- },
+ interface ConciseSegment {
+ text: string;
+ indexes: string[];
+ start: number | null;
+ end: number | null;
+ }
+
+ const combinedSegments = [];
+ let currentGroup: ConciseSegment = { text: '', indexes: [], start: null, end: null };
+ let currentDuration = 0;
+
+ originalSegments?.forEach(segment => {
+ const segmentDuration = segment.end - segment.start;
+
+ if (currentDuration + segmentDuration <= 4000) {
+ // Add segment to the current group
+ currentGroup.text += (currentGroup.text ? ' ' : '') + segment.text;
+ currentGroup.indexes.push(segment.index);
+ if (currentGroup.start === null) {
+ currentGroup.start = segment.start;
+ }
+ currentGroup.end = segment.end;
+ currentDuration += segmentDuration;
+ } else {
+ // Push the current group and start a new one
+ combinedSegments.push({ ...currentGroup });
+ currentGroup = {
+ text: segment.text,
+ indexes: [segment.index],
+ start: segment.start,
+ end: segment.end,
+ };
+ currentDuration = segmentDuration;
+ }
});
- const combinedSegments = JSON.parse(completion.choices[0].message?.content ?? '{"combined_segments": []}').combined_segments;
+ // Push the final group if it has content
+ if (currentGroup.text) {
+ combinedSegments.push({ ...currentGroup });
+ }
+ const lastSegment = combinedSegments[combinedSegments.length - 1];
+
+ // Check if the last segment is too short and combine it with the second last
+ if (combinedSegments.length > 1 && lastSegment.end && lastSegment.start) {
+ const secondLastSegment = combinedSegments[combinedSegments.length - 2];
+ const lastDuration = lastSegment.end - lastSegment.start;
+
+ if (lastDuration < 30) {
+ // Combine the last segment with the second last
+ secondLastSegment.text += (secondLastSegment.text ? ' ' : '') + lastSegment.text;
+ secondLastSegment.indexes = secondLastSegment.indexes.concat(lastSegment.indexes);
+ secondLastSegment.end = lastSegment.end;
+
+ // Remove the last segment from the array
+ combinedSegments.pop();
+ }
+ }
console.log('Segments combined successfully.');
+ console.log('Generating summary using GPT-4...');
+ const combinedText = combinedSegments.map(segment => segment.text).join(' ');
+
+ let summary = '';
+ try {
+ const completion = await openai.chat.completions.create({
+ messages: [{ role: 'system', content: `Summarize the following text in a concise paragraph:\n\n${combinedText}` }],
+ model: 'gpt-4o',
+ });
+ console.log('Summary generation complete.');
+ summary = completion.choices[0].message.content ?? 'Summary could not be generated.';
+ } catch (summaryError) {
+ console.error('Error generating summary:', summaryError);
+ summary = 'Summary could not be generated.';
+ }
+ // Step 5: Return the JSON result
+ res.send({ full: originalSegments, condensed: combinedSegments, summary });
+
// Step 5: Return the JSON result
- res.send(combinedSegments);
+ res.send({ full: originalSegments, condensed: combinedSegments, summary: summary });
} catch (error) {
console.error('Error processing media file:', error);
res.status(500).send({ error: 'Failed to process media file' });
@@ -380,6 +414,51 @@ export default class AssistantManager extends ApiManager {
}
};
+ register({
+ method: Method.POST,
+ subscription: '/generateImage',
+ secureHandler: async ({ req, res }) => {
+ const { image_prompt } = req.body;
+
+ if (!image_prompt) {
+ res.status(400).send({ error: 'No prompt provided' });
+ return;
+ }
+
+ try {
+ const image = await openai.images.generate({ model: 'dall-e-3', prompt: image_prompt, response_format: 'b64_json' });
+ console.log(image);
+
+ const base64String = image.data[0].b64_json;
+ if (!base64String) {
+ throw new Error('No base64 data received from image generation');
+ }
+ // Generate a UUID for the file to ensure unique naming
+ const uuidv4 = uuid.v4();
+ const fullFilename = `${uuidv4}.jpg`; // Prefix the file name with the UUID
+
+ // Get the full server path where the file will be saved
+ const serverFilePath = serverPathToFile(Directory.images, fullFilename);
+
+ const binaryData = Buffer.from(base64String, 'base64');
+
+ // Write the CSV data (which is a raw string) to the file
+ await writeFileAsync(serverFilePath, binaryData);
+
+ // Construct the client-accessible URL for the file
+ const fileUrl = clientPathToFile(Directory.images, fullFilename);
+
+ // Send the file URL and UUID back to the client
+ res.send({ base64_data: base64String, image_path: fileUrl });
+ } catch (error) {
+ console.error('Error fetching the URL:', error);
+ res.status(500).send({
+ error: 'Failed to fetch the URL',
+ });
+ }
+ },
+ });
+
// Register a proxy fetch API route
register({
method: Method.POST,