aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts158
1 files changed, 155 insertions, 3 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 4d2068014..1fd88cbd6 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -24,6 +24,11 @@ import { Method } from '../RouteManager';
import { filesDirectory, publicDirectory } from '../SocketData';
import ApiManager, { Registration } from './ApiManager';
import { getServerPath } from '../../client/util/reportManager/reportManagerUtils';
+import { file } from 'jszip';
+import ffmpegInstaller from '@ffmpeg-installer/ffmpeg';
+import ffmpeg from 'fluent-ffmpeg';
+import OpenAI from 'openai';
+import * as xmlbuilder from 'xmlbuilder';
// Enumeration of directories where different file types are stored
export enum Directory {
@@ -88,6 +93,7 @@ export default class AssistantManager extends ApiManager {
protected initialize(register: Registration): void {
// Initialize Google Custom Search API
const customsearch = google.customsearch('v1');
+ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
// Register Wikipedia summary API route
register({
@@ -197,6 +203,148 @@ export default class AssistantManager extends ApiManager {
}
},
});
+ function convertVideoToAudio(videoPath: string, outputAudioPath: string): Promise<void> {
+ return new Promise((resolve, reject) => {
+ const ffmpegProcess = spawn('ffmpeg', [
+ '-i',
+ videoPath, // Input file
+ '-vn', // No video
+ '-acodec',
+ 'pcm_s16le', // Audio codec
+ '-ac',
+ '1', // Number of audio channels
+ '-ar',
+ '16000', // Audio sampling frequency
+ '-f',
+ 'wav', // Output format
+ outputAudioPath, // Output file
+ ]);
+
+ ffmpegProcess.on('error', error => {
+ console.error('Error running ffmpeg:', error);
+ reject(error);
+ });
+
+ ffmpegProcess.on('close', code => {
+ if (code === 0) {
+ console.log('Audio extraction complete:', outputAudioPath);
+ resolve();
+ } else {
+ reject(new Error(`ffmpeg exited with code ${code}`));
+ }
+ });
+ });
+ }
+
+ register({
+ method: Method.POST,
+ subscription: '/processMediaFile',
+ secureHandler: async ({ req, res }) => {
+ const { fileName } = req.body;
+
+ // Ensure the filename is provided
+ if (!fileName) {
+ res.status(400).send({ error: 'Filename is required' });
+ return;
+ }
+
+ try {
+ // Determine the file type and location
+ const isAudio = fileName.toLowerCase().endsWith('.mp3');
+ const directory = isAudio ? Directory.audio : Directory.videos;
+ const filePath = serverPathToFile(directory, fileName);
+
+ // Check if the file exists
+ if (!fs.existsSync(filePath)) {
+ res.status(404).send({ error: 'File not found' });
+ return;
+ }
+
+ console.log(`Processing ${isAudio ? 'audio' : 'video'} file: ${fileName}`);
+
+ // Step 1: Extract audio if it's a video
+ let audioPath = filePath;
+ if (!isAudio) {
+ const audioFileName = `${path.basename(fileName, path.extname(fileName))}.wav`;
+ audioPath = path.join(pathToDirectory(Directory.audio), audioFileName);
+
+ console.log('Extracting audio from video...');
+ await convertVideoToAudio(filePath, audioPath);
+ }
+
+ // Step 2: Transcribe audio using OpenAI Whisper
+ console.log('Transcribing audio...');
+ const transcription = await openai.audio.transcriptions.create({
+ file: fs.createReadStream(audioPath) as any,
+ model: 'whisper-1',
+ response_format: 'verbose_json',
+ timestamp_granularities: ['segment'],
+ });
+
+ console.log('Audio transcription complete.');
+
+ // Step 3: Extract concise JSON
+ console.log('Extracting concise JSON...');
+ const conciseJSON = transcription.segments?.map((segment: any) => ({
+ text: segment.text,
+ start: segment.start,
+ end: segment.end,
+ }));
+
+ // Step 4: Combine segments with GPT-4
+ console.log('Combining segments with GPT-4...');
+ const schema = {
+ name: 'combine_segments_schema',
+ schema: {
+ type: 'object',
+ properties: {
+ combined_segments: {
+ type: 'array',
+ items: {
+ type: 'object',
+ properties: {
+ text: { type: 'string' },
+ start: { type: 'number' },
+ end: { type: 'number' },
+ },
+ required: ['text', 'start', 'end'],
+ },
+ },
+ },
+ required: ['combined_segments'],
+ },
+ };
+
+ const completion = await openai.chat.completions.create({
+ model: 'gpt-4o-2024-08-06',
+ messages: [
+ {
+ role: 'system',
+ content: 'Combine text segments into coherent sections, each between 5 and 10 seconds, based on their content. Return the result as JSON that follows the schema.',
+ },
+ {
+ role: 'user',
+ content: JSON.stringify(conciseJSON),
+ },
+ ],
+ response_format: {
+ type: 'json_schema',
+ json_schema: schema,
+ },
+ });
+
+ const combinedSegments = JSON.parse(completion.choices[0].message?.content ?? '{"combined_segments": []}').combined_segments;
+
+ console.log('Segments combined successfully.');
+
+ // Step 5: Return the JSON result
+ res.send(combinedSegments);
+ } catch (error) {
+ console.error('Error processing media file:', error);
+ res.status(500).send({ error: 'Failed to process media file' });
+ }
+ },
+ });
// Axios instance with custom headers for scraping
const axiosInstance = axios.create({
@@ -314,7 +462,7 @@ export default class AssistantManager extends ApiManager {
// Spawn the Python process and track its progress/output
// eslint-disable-next-line no-use-before-define
- spawnPythonProcess(jobId, file_name, file_data);
+ spawnPythonProcess(jobId, file_name, public_path);
// Send the job ID back to the client for tracking
res.send({ jobId });
@@ -388,6 +536,7 @@ export default class AssistantManager extends ApiManager {
if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
try {
const filePath = path.join(pathToDirectory(Directory.chunk_images), chunk.metadata.file_path); // Get the file path
+ console.log(filePath);
readFileAsync(filePath).then(imageBuffer => {
const base64Image = imageBuffer.toString('base64'); // Convert the image to base64
@@ -460,7 +609,7 @@ export default class AssistantManager extends ApiManager {
}
}
-function spawnPythonProcess(jobId: string, file_name: string, file_data: string) {
+function spawnPythonProcess(jobId: string, file_name: string, file_path: string) {
const venvPath = path.join(__dirname, '../chunker/venv');
const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
@@ -470,7 +619,7 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
function runPythonScript() {
const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3');
- const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data, outputDirectory]);
+ const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory]);
let pythonOutput = '';
let stderrOutput = '';
@@ -593,3 +742,6 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
runPythonScript();
}
}
+function customFfmpeg(filePath: string) {
+ throw new Error('Function not implemented.');
+}