diff options
author | bobzel <zzzman@gmail.com> | 2025-06-19 15:30:54 -0400 |
---|---|---|
committer | bobzel <zzzman@gmail.com> | 2025-06-19 15:30:54 -0400 |
commit | abdfe5b109f42cd5da4aceef1bd667c8e1f6cbbd (patch) | |
tree | 26c0d1b4d4034a98003f97ecdc771066a0e04804 /src/client/views/nodes/PDFBox.tsx | |
parent | 562d6591a5e4feb7d44135445fd16056e0f8943f (diff) | |
parent | c0daf8b6aff92b71662d9791aeaa4bb63076dd02 (diff) |
Merge branch 'lanyi-branch'
Diffstat (limited to 'src/client/views/nodes/PDFBox.tsx')
-rw-r--r-- | src/client/views/nodes/PDFBox.tsx | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx index 45fa5cc12..5501f0a31 100644 --- a/src/client/views/nodes/PDFBox.tsx +++ b/src/client/views/nodes/PDFBox.tsx @@ -33,6 +33,9 @@ import { ImageBox } from './ImageBox'; import { OpenWhere } from './OpenWhere'; import './PDFBox.scss'; import { CreateImage } from './WebBoxRenderer'; +import { gptAPICall } from '../../apis/gpt/GPT'; +import { List } from '../../../fields/List'; +import { GPTCallType } from '../../apis/gpt/GPT'; @observer export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @@ -78,6 +81,36 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { } } + autoTag = async () => { + if (!this.Document.$tags_chat && this._pdf) { + if (!this.dataDoc.text) { + // 1) Extract text from the first few pages (e.g., first 2 pages) + const maxPages = Math.min(2, this._pdf.numPages); + const promises: Promise<string>[] = []; + for (let pageNum = 1; pageNum <= maxPages; pageNum++) { + promises.push( + this._pdf + .getPage(pageNum) + .then(page => page.getTextContent()) + .then(content => content.items.map(item => ('str' in item ? item.str : '')).join(' ')) + ); + } + this.dataDoc.text = (await Promise.all(promises)).join(' '); + } + + const text = StrCast(this.dataDoc.text).trim().slice(0, 2000); + if (text) { + // 2) Ask GPT to classify and provide descriptive tags, then normalize the results + const label = await gptAPICall(`"${text}"`, GPTCallType.CLASSIFYTEXTFULL).then(raw => raw.trim().toUpperCase()); + + this.Document.$tags_chat = new List<string>(label.split(/\s+/)); + + // 4) Show tags in layout + this.Document._layout_showTags = true; + } + } + }; + replaceCanvases = (oldDiv: HTMLElement, newDiv: HTMLElement) => { if (oldDiv.childNodes) { for (let i = 0; i < oldDiv.childNodes.length; i++) { |