2 files changed, 68 insertions, 84 deletions
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx
index a0c7d8d22..5501f0a31 100644
--- a/src/client/views/nodes/PDFBox.tsx
+++ b/src/client/views/nodes/PDFBox.tsx
@@ -81,46 +81,35 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         }
     }
 
-     autoTag = async () => {
-     try {
-        if (!this._pdf) {
-            throw new Error('PDF not loaded');
-        }
-
-        // 1) Extract text from the first few pages (e.g., first 2 pages)
-        const maxPages = Math.min(2, this._pdf.numPages);
-        let textContent = '';
-        for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
-            const page = await this._pdf.getPage(pageNum);
-            const text = await page.getTextContent();
-            const pageText = text.items.map(item => ('str' in item ? item.str : '')).join(' ');
-            textContent += ` ${pageText}`;
-        }
-
-        if (!textContent.trim()) {
-            throw new Error('No text found in PDF');
-        }
-
-        // 2) Ask GPT to classify and provide descriptive tags
-        const raw = await gptAPICall(
-            `"${textContent.trim().slice(0, 2000)}"`, 
-            GPTCallType.CLASSIFYTEXTFULL
-        );
-
-        // 3) Normalize and store the labels
-        const label = raw.trim().toUpperCase();
+    autoTag = async () => {
+        if (!this.Document.$tags_chat && this._pdf) {
+            if (!this.dataDoc.text) {
+                // 1) Extract text from the first few pages (e.g., first 2 pages)
+                const maxPages = Math.min(2, this._pdf.numPages);
+                const promises: Promise<string>[] = [];
+                for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
+                    promises.push(
+                        this._pdf
+                            .getPage(pageNum)
+                            .then(page => page.getTextContent())
+                            .then(content => content.items.map(item => ('str' in item ? item.str : '')).join(' '))
+                    );
+                }
+                this.dataDoc.text = (await Promise.all(promises)).join(' ');
+            }
 
-        const tokens = label.split(/\s+/);
-        this.Document.$tags_chat = new List<string>();
-        tokens.forEach(tok => (this.Document.$tags_chat as List<string>).push(tok));
+            const text = StrCast(this.dataDoc.text).trim().slice(0, 2000);
+            if (text) {
+                // 2) Ask GPT to classify and provide descriptive tags, then normalize the results
+                const label = await gptAPICall(`"${text}"`, GPTCallType.CLASSIFYTEXTFULL).then(raw => raw.trim().toUpperCase());
 
-        // 4) Show tags in layout
-        this.Document._layout_showTags = true;
+                this.Document.$tags_chat = new List<string>(label.split(/\s+/));
 
-    } catch (err) {
-        console.error('PDF autoTag failed:', err);
-    }
-};
+                // 4) Show tags in layout
+                this.Document._layout_showTags = true;
+            }
+        }
+    };
 
     replaceCanvases = (oldDiv: HTMLElement, newDiv: HTMLElement) => {
         if (oldDiv.childNodes) {
diff --git a/src/client/views/nodes/VideoBox.tsx b/src/client/views/nodes/VideoBox.tsx
index 4d85b4942..f994bdbb5 100644
--- a/src/client/views/nodes/VideoBox.tsx
+++ b/src/client/views/nodes/VideoBox.tsx
@@ -110,56 +110,51 @@ export class VideoBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return this._videoRef;
     }
 
+    autoTag = async () => {
+        if (this.Document.$tags_chat) return;
+        try {
+            if (!this.player) throw new Error('Video element not available.');
+
+            // 1) Extract a frame at the video's midpoint
+            const videoDuration = this.player.duration;
+            const snapshotTime = videoDuration / 2;
+
+            // Seek the video element to the midpoint
+            await new Promise<void>(resolve => {
+                const onSeeked = () => {
+                    this.player!.removeEventListener('seeked', onSeeked);
+                    resolve();
+                };
+                this.player!.addEventListener('seeked', onSeeked);
+                this.player!.currentTime = snapshotTime;
+            });
 
-     autoTag = async () => {
-    try {
-        if (!this.player) throw new Error('Video element not available.');
-
-        // 1) Extract a frame at the video's midpoint
-        const videoDuration = this.player.duration;
-        const snapshotTime = videoDuration / 2;
-
-        // Seek the video element to the midpoint
-        await new Promise<void>((resolve, reject) => {
-            const onSeeked = () => {
-                this.player!.removeEventListener('seeked', onSeeked);
-                resolve();
-            };
-            this.player!.addEventListener('seeked', onSeeked);
-            this.player!.currentTime = snapshotTime;
-        });
-
-        // 2) Draw the frame onto a canvas and get a base64 representation
-        const canvas = document.createElement('canvas');
-        canvas.width = this.player.videoWidth;
-        canvas.height = this.player.videoHeight;
-        const ctx = canvas.getContext('2d');
-        if (!ctx) throw new Error('Failed to create canvas context.');
-        ctx.drawImage(this.player, 0, 0, canvas.width, canvas.height);
-        const base64Image = canvas.toDataURL('image/png');
-
-        // 3) Send the image data to GPT for classification and descriptive tags
-        const raw = await gptImageLabel(
-            base64Image,
-            `Classify this video frame as either a PERSON or LANDSCAPE.
+            // 2) Draw the frame onto a canvas and get a base64 representation
+            const canvas = document.createElement('canvas');
+            canvas.width = this.player.videoWidth;
+            canvas.height = this.player.videoHeight;
+            const ctx = canvas.getContext('2d');
+            if (!ctx) throw new Error('Failed to create canvas context.');
+            ctx.drawImage(this.player, 0, 0, canvas.width, canvas.height);
+            const base64Image = canvas.toDataURL('image/png');
+
+            // 3) Send the image data to GPT for classification and descriptive tags
+            const label = await gptImageLabel(
+                base64Image,
+                `Classify this video frame as either a PERSON or LANDSCAPE.
             Then provide five additional descriptive tags (single words) separated by spaces.
             Finally, add one detailed summary phrase using underscores.`
-        );
-
-        // 4) Normalize and store labels in the Document's tags
-        const label = raw.trim().toUpperCase();
-        const tokens = label.split(/\s+/);
-        this.Document.$tags_chat = new List<string>();
-        tokens.forEach(tok => (this.Document.$tags_chat as List<string>).push(tok));
-        const aspect = this.player!.videoWidth / (this.player!.videoHeight || 1);
-        (this.Document.$tags_chat as List<string>).push(`ASPECT_${aspect}`);  
-        // 5) Turn on tag display in layout
-        this.Document._layout_showTags = true;
-
-    } catch (err) {
-        console.error('Video autoTag failed:', err);
-    }
-};
+            ).then(raw => raw.trim().toUpperCase());
+
+            // 4) Normalize and store labels in the Document's tags
+            const aspect = this.player!.videoWidth / (this.player!.videoHeight || 1);
+            this.Document.$tags_chat = new List<string>([...label.split(/\s+/), `ASPECT_${aspect}`]);
+            // 5) Turn on tag display in layout
+            this.Document._layout_showTags = true;
+        } catch (err) {
+            console.error('Video autoTag failed:', err);
+        }
+    };
 
     componentDidMount() {
         this.unmounting = false;