diff options
Diffstat (limited to 'src/client/views/nodes/PDFBox.tsx')
-rw-r--r-- | src/client/views/nodes/PDFBox.tsx | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx index 45fa5cc12..a0c7d8d22 100644 --- a/src/client/views/nodes/PDFBox.tsx +++ b/src/client/views/nodes/PDFBox.tsx @@ -33,6 +33,9 @@ import { ImageBox } from './ImageBox'; import { OpenWhere } from './OpenWhere'; import './PDFBox.scss'; import { CreateImage } from './WebBoxRenderer'; +import { gptAPICall } from '../../apis/gpt/GPT'; +import { List } from '../../../fields/List'; +import { GPTCallType } from '../../apis/gpt/GPT'; @observer export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @@ -78,6 +81,47 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { } } + autoTag = async () => { + try { + if (!this._pdf) { + throw new Error('PDF not loaded'); + } + + // 1) Extract text from the first few pages (e.g., first 2 pages) + const maxPages = Math.min(2, this._pdf.numPages); + let textContent = ''; + for (let pageNum = 1; pageNum <= maxPages; pageNum++) { + const page = await this._pdf.getPage(pageNum); + const text = await page.getTextContent(); + const pageText = text.items.map(item => ('str' in item ? item.str : '')).join(' '); + textContent += ` ${pageText}`; + } + + if (!textContent.trim()) { + throw new Error('No text found in PDF'); + } + + // 2) Ask GPT to classify and provide descriptive tags + const raw = await gptAPICall( + `"${textContent.trim().slice(0, 2000)}"`, + GPTCallType.CLASSIFYTEXTFULL + ); + + // 3) Normalize and store the labels + const label = raw.trim().toUpperCase(); + + const tokens = label.split(/\s+/); + this.Document.$tags_chat = new List<string>(); + tokens.forEach(tok => (this.Document.$tags_chat as List<string>).push(tok)); + + // 4) Show tags in layout + this.Document._layout_showTags = true; + + } catch (err) { + console.error('PDF autoTag failed:', err); + } +}; + replaceCanvases = (oldDiv: HTMLElement, newDiv: HTMLElement) => { if (oldDiv.childNodes) { for (let i = 0; i < oldDiv.childNodes.length; i++) { |