1 files changed, 51 insertions, 0 deletions
diff --git a/src/client/views/nodes/VideoBox.tsx b/src/client/views/nodes/VideoBox.tsx
index fa099178c..0e7afbab1 100644
--- a/src/client/views/nodes/VideoBox.tsx
+++ b/src/client/views/nodes/VideoBox.tsx
@@ -30,6 +30,7 @@ import { StyleProp } from '../StyleProp';
 import { DocumentView } from './DocumentView';
 import { FieldView, FieldViewProps } from './FieldView';
 import { FocusViewOptions } from './FocusViewOptions';
+import { gptImageLabel } from '../../apis/gpt/GPT';
 import './VideoBox.scss';
 
 /**
@@ -109,6 +110,56 @@ export class VideoBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return this._videoRef;
     }
 
+
+     autoTag = async () => {
+    try {
+        if (!this.player) throw new Error('Video element not available.');
+
+        // 1) Extract a frame at the video's midpoint
+        const videoDuration = this.player.duration;
+        const snapshotTime = videoDuration / 2;
+
+        // Seek the video element to the midpoint
+        await new Promise<void>((resolve, reject) => {
+            const onSeeked = () => {
+                this.player!.removeEventListener('seeked', onSeeked);
+                resolve();
+            };
+            this.player!.addEventListener('seeked', onSeeked);
+            this.player!.currentTime = snapshotTime;
+        });
+
+        // 2) Draw the frame onto a canvas and get a base64 representation
+        const canvas = document.createElement('canvas');
+        canvas.width = this.player.videoWidth;
+        canvas.height = this.player.videoHeight;
+        const ctx = canvas.getContext('2d');
+        if (!ctx) throw new Error('Failed to create canvas context.');
+        ctx.drawImage(this.player, 0, 0, canvas.width, canvas.height);
+        const base64Image = canvas.toDataURL('image/png');
+
+        // 3) Send the image data to GPT for classification and descriptive tags
+        const raw = await gptImageLabel(
+            base64Image,
+            `Classify this video frame as either a PERSON or LANDSCAPE.
+            Then provide five additional descriptive tags (single words) separated by spaces.
+            Finally, add one detailed summary phrase using underscores.`
+        );
+
+        // 4) Normalize and store labels in the Document's tags
+        const label = raw.trim().toUpperCase();
+        const tokens = label.split(/\s+/);
+        this.Document.$tags_chat = new List<string>();
+        tokens.forEach(tok => (this.Document.$tags_chat as List<string>).push(tok));
+
+        // 5) Turn on tag display in layout
+        this.Document._layout_showTags = true;
+
+    } catch (err) {
+        console.error('Video autoTag failed:', err);
+    }
+};
+
     componentDidMount() {
         this.unmounting = false;
         this._props.setContentViewBox?.(this); // this tells the DocumentView that this VideoBox is the "content" of the document.  this allows the DocumentView to indirectly call getAnchor() on the VideoBox when making a link.