diff options
-rw-r--r-- | src/client/views/nodes/PDFBox.tsx | 63 | ||||
-rw-r--r-- | src/client/views/nodes/VideoBox.tsx | 89 |
2 files changed, 68 insertions, 84 deletions
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx index a0c7d8d22..5501f0a31 100644 --- a/src/client/views/nodes/PDFBox.tsx +++ b/src/client/views/nodes/PDFBox.tsx @@ -81,46 +81,35 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { } } - autoTag = async () => { - try { - if (!this._pdf) { - throw new Error('PDF not loaded'); - } - - // 1) Extract text from the first few pages (e.g., first 2 pages) - const maxPages = Math.min(2, this._pdf.numPages); - let textContent = ''; - for (let pageNum = 1; pageNum <= maxPages; pageNum++) { - const page = await this._pdf.getPage(pageNum); - const text = await page.getTextContent(); - const pageText = text.items.map(item => ('str' in item ? item.str : '')).join(' '); - textContent += ` ${pageText}`; - } - - if (!textContent.trim()) { - throw new Error('No text found in PDF'); - } - - // 2) Ask GPT to classify and provide descriptive tags - const raw = await gptAPICall( - `"${textContent.trim().slice(0, 2000)}"`, - GPTCallType.CLASSIFYTEXTFULL - ); - - // 3) Normalize and store the labels - const label = raw.trim().toUpperCase(); + autoTag = async () => { + if (!this.Document.$tags_chat && this._pdf) { + if (!this.dataDoc.text) { + // 1) Extract text from the first few pages (e.g., first 2 pages) + const maxPages = Math.min(2, this._pdf.numPages); + const promises: Promise<string>[] = []; + for (let pageNum = 1; pageNum <= maxPages; pageNum++) { + promises.push( + this._pdf + .getPage(pageNum) + .then(page => page.getTextContent()) + .then(content => content.items.map(item => ('str' in item ? item.str : '')).join(' ')) + ); + } + this.dataDoc.text = (await Promise.all(promises)).join(' '); + } - const tokens = label.split(/\s+/); - this.Document.$tags_chat = new List<string>(); - tokens.forEach(tok => (this.Document.$tags_chat as List<string>).push(tok)); + const text = StrCast(this.dataDoc.text).trim().slice(0, 2000); + if (text) { + // 2) Ask GPT to classify and provide descriptive tags, then normalize the results + const label = await gptAPICall(`"${text}"`, GPTCallType.CLASSIFYTEXTFULL).then(raw => raw.trim().toUpperCase()); - // 4) Show tags in layout - this.Document._layout_showTags = true; + this.Document.$tags_chat = new List<string>(label.split(/\s+/)); - } catch (err) { - console.error('PDF autoTag failed:', err); - } -}; + // 4) Show tags in layout + this.Document._layout_showTags = true; + } + } + }; replaceCanvases = (oldDiv: HTMLElement, newDiv: HTMLElement) => { if (oldDiv.childNodes) { diff --git a/src/client/views/nodes/VideoBox.tsx b/src/client/views/nodes/VideoBox.tsx index 4d85b4942..f994bdbb5 100644 --- a/src/client/views/nodes/VideoBox.tsx +++ b/src/client/views/nodes/VideoBox.tsx @@ -110,56 +110,51 @@ export class VideoBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { return this._videoRef; } + autoTag = async () => { + if (this.Document.$tags_chat) return; + try { + if (!this.player) throw new Error('Video element not available.'); + + // 1) Extract a frame at the video's midpoint + const videoDuration = this.player.duration; + const snapshotTime = videoDuration / 2; + + // Seek the video element to the midpoint + await new Promise<void>(resolve => { + const onSeeked = () => { + this.player!.removeEventListener('seeked', onSeeked); + resolve(); + }; + this.player!.addEventListener('seeked', onSeeked); + this.player!.currentTime = snapshotTime; + }); - autoTag = async () => { - try { - if (!this.player) throw new Error('Video element not available.'); - - // 1) Extract a frame at the video's midpoint - const videoDuration = this.player.duration; - const snapshotTime = videoDuration / 2; - - // Seek the video element to the midpoint - await new Promise<void>((resolve, reject) => { - const onSeeked = () => { - this.player!.removeEventListener('seeked', onSeeked); - resolve(); - }; - this.player!.addEventListener('seeked', onSeeked); - this.player!.currentTime = snapshotTime; - }); - - // 2) Draw the frame onto a canvas and get a base64 representation - const canvas = document.createElement('canvas'); - canvas.width = this.player.videoWidth; - canvas.height = this.player.videoHeight; - const ctx = canvas.getContext('2d'); - if (!ctx) throw new Error('Failed to create canvas context.'); - ctx.drawImage(this.player, 0, 0, canvas.width, canvas.height); - const base64Image = canvas.toDataURL('image/png'); - - // 3) Send the image data to GPT for classification and descriptive tags - const raw = await gptImageLabel( - base64Image, - `Classify this video frame as either a PERSON or LANDSCAPE. + // 2) Draw the frame onto a canvas and get a base64 representation + const canvas = document.createElement('canvas'); + canvas.width = this.player.videoWidth; + canvas.height = this.player.videoHeight; + const ctx = canvas.getContext('2d'); + if (!ctx) throw new Error('Failed to create canvas context.'); + ctx.drawImage(this.player, 0, 0, canvas.width, canvas.height); + const base64Image = canvas.toDataURL('image/png'); + + // 3) Send the image data to GPT for classification and descriptive tags + const label = await gptImageLabel( + base64Image, + `Classify this video frame as either a PERSON or LANDSCAPE. Then provide five additional descriptive tags (single words) separated by spaces. Finally, add one detailed summary phrase using underscores.` - ); - - // 4) Normalize and store labels in the Document's tags - const label = raw.trim().toUpperCase(); - const tokens = label.split(/\s+/); - this.Document.$tags_chat = new List<string>(); - tokens.forEach(tok => (this.Document.$tags_chat as List<string>).push(tok)); - const aspect = this.player!.videoWidth / (this.player!.videoHeight || 1); - (this.Document.$tags_chat as List<string>).push(`ASPECT_${aspect}`); - // 5) Turn on tag display in layout - this.Document._layout_showTags = true; - - } catch (err) { - console.error('Video autoTag failed:', err); - } -}; + ).then(raw => raw.trim().toUpperCase()); + + // 4) Normalize and store labels in the Document's tags + const aspect = this.player!.videoWidth / (this.player!.videoHeight || 1); + this.Document.$tags_chat = new List<string>([...label.split(/\s+/), `ASPECT_${aspect}`]); + // 5) Turn on tag display in layout + this.Document._layout_showTags = true; + } catch (err) { + console.error('Video autoTag failed:', err); + } + }; componentDidMount() { this.unmounting = false; |