diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-12-18 20:34:33 -0500 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-12-18 20:34:33 -0500 |
commit | 57e3c9b9977228a561e8972a469a67f17f4bcd9c (patch) | |
tree | 1a4f23921e121ca891b3fa6a49a30a92ea76d233 /src/server/ApiManagers/AssistantManager.ts | |
parent | ad1e0cf62187e0f8bbb19b4720b7681585361de9 (diff) |
trying new image generation plus new implementaion of video and audio
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 165 |
1 files changed, 122 insertions, 43 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 1fd88cbd6..83bb1b228 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -29,6 +29,7 @@ import ffmpegInstaller from '@ffmpeg-installer/ffmpeg'; import ffmpeg from 'fluent-ffmpeg'; import OpenAI from 'openai'; import * as xmlbuilder from 'xmlbuilder'; +import { last } from 'lodash'; // Enumeration of directories where different file types are stored export enum Directory { @@ -285,60 +286,93 @@ export default class AssistantManager extends ApiManager { // Step 3: Extract concise JSON console.log('Extracting concise JSON...'); - const conciseJSON = transcription.segments?.map((segment: any) => ({ + const originalSegments = transcription.segments?.map((segment: any, index: number) => ({ + index: index.toString(), text: segment.text, start: segment.start, end: segment.end, })); - // Step 4: Combine segments with GPT-4 - console.log('Combining segments with GPT-4...'); - const schema = { - name: 'combine_segments_schema', - schema: { - type: 'object', - properties: { - combined_segments: { - type: 'array', - items: { - type: 'object', - properties: { - text: { type: 'string' }, - start: { type: 'number' }, - end: { type: 'number' }, - }, - required: ['text', 'start', 'end'], - }, - }, - }, - required: ['combined_segments'], - }, - }; - - const completion = await openai.chat.completions.create({ - model: 'gpt-4o-2024-08-06', - messages: [ - { - role: 'system', - content: 'Combine text segments into coherent sections, each between 5 and 10 seconds, based on their content. Return the result as JSON that follows the schema.', - }, - { - role: 'user', - content: JSON.stringify(conciseJSON), - }, - ], - response_format: { - type: 'json_schema', - json_schema: schema, - }, + interface ConciseSegment { + text: string; + indexes: string[]; + start: number | null; + end: number | null; + } + + const combinedSegments = []; + let currentGroup: ConciseSegment = { text: '', indexes: [], start: null, end: null }; + let currentDuration = 0; + + originalSegments?.forEach(segment => { + const segmentDuration = segment.end - segment.start; + + if (currentDuration + segmentDuration <= 4000) { + // Add segment to the current group + currentGroup.text += (currentGroup.text ? ' ' : '') + segment.text; + currentGroup.indexes.push(segment.index); + if (currentGroup.start === null) { + currentGroup.start = segment.start; + } + currentGroup.end = segment.end; + currentDuration += segmentDuration; + } else { + // Push the current group and start a new one + combinedSegments.push({ ...currentGroup }); + currentGroup = { + text: segment.text, + indexes: [segment.index], + start: segment.start, + end: segment.end, + }; + currentDuration = segmentDuration; + } }); - const combinedSegments = JSON.parse(completion.choices[0].message?.content ?? '{"combined_segments": []}').combined_segments; + // Push the final group if it has content + if (currentGroup.text) { + combinedSegments.push({ ...currentGroup }); + } + const lastSegment = combinedSegments[combinedSegments.length - 1]; + + // Check if the last segment is too short and combine it with the second last + if (combinedSegments.length > 1 && lastSegment.end && lastSegment.start) { + const secondLastSegment = combinedSegments[combinedSegments.length - 2]; + const lastDuration = lastSegment.end - lastSegment.start; + + if (lastDuration < 30) { + // Combine the last segment with the second last + secondLastSegment.text += (secondLastSegment.text ? ' ' : '') + lastSegment.text; + secondLastSegment.indexes = secondLastSegment.indexes.concat(lastSegment.indexes); + secondLastSegment.end = lastSegment.end; + + // Remove the last segment from the array + combinedSegments.pop(); + } + } console.log('Segments combined successfully.'); + console.log('Generating summary using GPT-4...'); + const combinedText = combinedSegments.map(segment => segment.text).join(' '); + + let summary = ''; + try { + const completion = await openai.chat.completions.create({ + messages: [{ role: 'system', content: `Summarize the following text in a concise paragraph:\n\n${combinedText}` }], + model: 'gpt-4o', + }); + console.log('Summary generation complete.'); + summary = completion.choices[0].message.content ?? 'Summary could not be generated.'; + } catch (summaryError) { + console.error('Error generating summary:', summaryError); + summary = 'Summary could not be generated.'; + } + // Step 5: Return the JSON result + res.send({ full: originalSegments, condensed: combinedSegments, summary }); + // Step 5: Return the JSON result - res.send(combinedSegments); + res.send({ full: originalSegments, condensed: combinedSegments, summary: summary }); } catch (error) { console.error('Error processing media file:', error); res.status(500).send({ error: 'Failed to process media file' }); @@ -380,6 +414,51 @@ export default class AssistantManager extends ApiManager { } }; + register({ + method: Method.POST, + subscription: '/generateImage', + secureHandler: async ({ req, res }) => { + const { image_prompt } = req.body; + + if (!image_prompt) { + res.status(400).send({ error: 'No prompt provided' }); + return; + } + + try { + const image = await openai.images.generate({ model: 'dall-e-3', prompt: image_prompt, response_format: 'b64_json' }); + console.log(image); + + const base64String = image.data[0].b64_json; + if (!base64String) { + throw new Error('No base64 data received from image generation'); + } + // Generate a UUID for the file to ensure unique naming + const uuidv4 = uuid.v4(); + const fullFilename = `${uuidv4}.jpg`; // Prefix the file name with the UUID + + // Get the full server path where the file will be saved + const serverFilePath = serverPathToFile(Directory.images, fullFilename); + + const binaryData = Buffer.from(base64String, 'base64'); + + // Write the CSV data (which is a raw string) to the file + await writeFileAsync(serverFilePath, binaryData); + + // Construct the client-accessible URL for the file + const fileUrl = clientPathToFile(Directory.images, fullFilename); + + // Send the file URL and UUID back to the client + res.send({ base64_data: base64String, image_path: fileUrl }); + } catch (error) { + console.error('Error fetching the URL:', error); + res.status(500).send({ + error: 'Failed to fetch the URL', + }); + } + }, + }); + // Register a proxy fetch API route register({ method: Method.POST, |