aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsharkiecodes <lanyi_stroud@brown.edu>2025-04-29 23:01:11 -0400
committersharkiecodes <lanyi_stroud@brown.edu>2025-04-29 23:01:11 -0400
commit609ec6d37b5abf94f3ab84784544ebe47804cdf5 (patch)
tree1c9741a6569fa7fef3d0d0387504fc89555cc46e
parent2a36216359054532084be24ab9034cd08c1c8798 (diff)
Autotag
-rw-r--r--src/client/apis/gpt/GPT.ts35
-rw-r--r--src/client/views/collections/collectionFreeForm/ImageLabelBox.tsx2
-rw-r--r--src/client/views/nodes/ImageBox.tsx48
-rw-r--r--src/client/views/nodes/formattedText/FormattedTextBox.tsx18
-rw-r--r--src/client/views/nodes/scrapbook/ScrapbookBox.tsx50
-rw-r--r--src/client/views/search/FaceRecognitionHandler.tsx4
6 files changed, 137 insertions, 20 deletions
diff --git a/src/client/apis/gpt/GPT.ts b/src/client/apis/gpt/GPT.ts
index 9cb47995c..1956fef0c 100644
--- a/src/client/apis/gpt/GPT.ts
+++ b/src/client/apis/gpt/GPT.ts
@@ -11,6 +11,10 @@ export enum GPTDocCommand {
export const DescriptionSeperator = '======';
export const DocSeperator = '------';
+export enum TextClassifications {
+ Title = 'title', //a few words
+ Caption = 'caption', //few sentences
+ LengthyDescription = 'lengthy description' }
enum GPTCallType {
SUMMARY = 'summary',
@@ -36,6 +40,7 @@ enum GPTCallType {
SUBSETDOCS = 'subset_docs', // select a subset of documents based on their descriptions
DOCINFO = 'doc_info', // provide information about a document
SORTDOCS = 'sort_docs',
+ CLASSIFYTEXT = 'classify_text', // classify text into one of the three categories: title, caption, lengthy description
}
type GPTCallOpts = {
@@ -48,6 +53,23 @@ type GPTCallOpts = {
const callTypeMap: { [type in GPTCallType]: GPTCallOpts } = {
// newest model: gpt-4
summary: { model: 'gpt-4-turbo', maxTokens: 256, temp: 0.5, prompt: 'Summarize the text given in simpler terms.' },
+
+
+ sort_docs: {
+ model: 'gpt-4o',
+ maxTokens: 2048,
+ temp: 0.25,
+ prompt:
+ `The user is going to give you a list of descriptions.
+ Each one is separated by '${DescriptionSeperator}' on either side.
+ Descriptions will vary in length, so make sure to only separate when you see '${DescriptionSeperator}'.
+ Sort them by the user's specifications.
+ Make sure each description is only in the list once. Each item should be separated by '${DescriptionSeperator}'.
+ Immediately afterward, surrounded by '${DocSeperator}' on BOTH SIDES, provide some insight into your reasoning for the way you sorted (and mention nothing about the formatting details given in this description).
+ It is VERY important that you format it exactly as described, ensuring the proper number of '${DescriptionSeperator[0]}' and '${DocSeperator[0]}' (${DescriptionSeperator.length} of each) and NO commas`,
+ },
+
+
edit: { model: 'gpt-4-turbo', maxTokens: 256, temp: 0.5, prompt: 'Reword the text.' },
stack: {
model: 'gpt-4o',
@@ -69,17 +91,14 @@ const callTypeMap: { [type in GPTCallType]: GPTCallOpts } = {
temp: 0.5,
prompt: "You are a helpful resarch assistant. Analyze the user's data to find meaningful patterns and/or correlation. Please only return a JSON with a correlation column 1 propert, a correlation column 2 property, and an analysis property. ",
},
- sort_docs: {
+ //new
+ classify_text: {
model: 'gpt-4o',
maxTokens: 2048,
temp: 0.25,
- prompt: `The user is going to give you a list of descriptions.
- Each one is separated by '${DescriptionSeperator}' on either side.
- Descriptions will vary in length, so make sure to only separate when you see '${DescriptionSeperator}'.
- Sort them by the user's specifications.
- Make sure each description is only in the list once. Each item should be separated by '${DescriptionSeperator}'.
- Immediately afterward, surrounded by '${DocSeperator}' on BOTH SIDES, provide some insight into your reasoning for the way you sorted (and mention nothing about the formatting details given in this description).
- It is VERY important that you format it exactly as described, ensuring the proper number of '${DescriptionSeperator[0]}' and '${DocSeperator[0]}' (${DescriptionSeperator.length} of each) and NO commas`,
+ prompt: `Based on the content of the the text, classify it into the
+ most appropriate category: '${TextClassifications.Title}', '${TextClassifications.Caption}', or '${TextClassifications.LengthyDescription}'. Output exclusively the classification in your response.
+ `
},
describe: { model: 'gpt-4-vision-preview', maxTokens: 2048, temp: 0, prompt: 'Describe these images in 3-5 words' },
flashcard: {
diff --git a/src/client/views/collections/collectionFreeForm/ImageLabelBox.tsx b/src/client/views/collections/collectionFreeForm/ImageLabelBox.tsx
index bf674c025..038b1c6f9 100644
--- a/src/client/views/collections/collectionFreeForm/ImageLabelBox.tsx
+++ b/src/client/views/collections/collectionFreeForm/ImageLabelBox.tsx
@@ -160,7 +160,7 @@ export class ImageLabelBox extends ViewBoxBaseComponent<FieldViewProps>() {
classifyImagesInBox = async (selectedImages? : Doc[], prompt? : string) => {
this.startLoading();
-
+ alert('Classifying images...');
selectedImages ??= this._selectedImages;
// Converts the images into a Base64 format, afterwhich the information is sent to GPT to label them.
diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx
index 6e6ca3f73..0b6814c01 100644
--- a/src/client/views/nodes/ImageBox.tsx
+++ b/src/client/views/nodes/ImageBox.tsx
@@ -7,8 +7,9 @@ import { observer } from 'mobx-react';
import { extname } from 'path';
import * as React from 'react';
import { AiOutlineSend } from 'react-icons/ai';
+import { ImageLabelBoxData } from '../collections/collectionFreeForm/ImageLabelBox';
import ReactLoading from 'react-loading';
-import { ClientUtils, DashColor, returnEmptyString, returnFalse, returnOne, returnZero, setupMoveUpEvents, UpdateIcon } from '../../../ClientUtils';
+import { ClientUtils, imageUrlToBase64, DashColor, returnEmptyString, returnFalse, returnOne, returnZero, setupMoveUpEvents, UpdateIcon } from '../../../ClientUtils';
import { Doc, DocListCast, Opt } from '../../../fields/Doc';
import { DocData } from '../../../fields/DocSymbols';
import { Id } from '../../../fields/FieldSymbols';
@@ -16,7 +17,7 @@ import { InkTool } from '../../../fields/InkField';
import { List } from '../../../fields/List';
import { ObjectField } from '../../../fields/ObjectField';
import { ComputedField } from '../../../fields/ScriptField';
-import { Cast, DocCast, ImageCast, NumCast, RTFCast, StrCast } from '../../../fields/Types';
+import { Cast, DocCast, ImageCast, NumCast, RTFCast, StrCast, ImageCastWithSuffix } from '../../../fields/Types';
import { ImageField } from '../../../fields/URLField';
import { TraceMobx } from '../../../fields/util';
import { emptyFunction } from '../../../Utils';
@@ -46,6 +47,7 @@ import { FieldView, FieldViewProps } from './FieldView';
import { FocusViewOptions } from './FocusViewOptions';
import './ImageBox.scss';
import { OpenWhere } from './OpenWhere';
+import { gptImageLabel } from '../../apis/gpt/GPT';
import { ImageLabelBox } from '../collections/collectionFreeForm/ImageLabelBox';
const DefaultPath = '/assets/unknown-file-icon-hi.png';
@@ -122,11 +124,47 @@ export class ImageBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
};
autoTag = async () => {
- ImageLabelBox.Instance.classifyImagesInBox([this.Document], "Classify this image as a PERSON or LANDSCAPE. You may only respond with one of these two options");
-
- //Doc.getDescription(this.Document).then(desc => this.desc = desc)
+ try {
+ // 1) grab the full-size URL
+ const layoutKey = Doc.LayoutDataKey(this.Document);
+ const url = ImageCastWithSuffix(this.Document[layoutKey], '_o') ?? '';
+ if (!url) throw new Error('No image URL found');
+
+ // 2) convert to base64
+ const base64 = await imageUrlToBase64(url);
+ if (!base64) throw new Error('Failed to load image data');
+
+ // 3) ask GPT for exactly one label: PERSON or LANDSCAPE
+ const raw = await gptImageLabel(
+ base64,
+ 'Classify this image as PERSON or LANDSCAPE. You may only respond with one of these two options.'
+ );
+
+ // 4) normalize and prefix
+ const label = raw
+ .trim()
+ .toUpperCase()
+
+ // 5) stash it on the Doc
+ // overwrite any old tags so re-runs still work
+ this.Document.$tags_chat = new List<string>();
+ (this.Document.$tags_chat as List<string>).push(label);
+
+ // 6) flip on “show tags” in the layout
+ // (same flag that ImageLabelBox.toggleDisplayInformation uses)
+ //note to self: What if i used my own field (ex: Document.$auto_description or something
+ //Would i still have to toggle it on for it to show in the metadata?
+ this.Document._layout_showTags = true;
+
+ } catch (err) {
+ console.error('autoTag failed:', err);
+ } finally {
}
+ };
+
+ //Doc.getDescription(this.Document).then(desc => this.desc = desc)
+
diff --git a/src/client/views/nodes/formattedText/FormattedTextBox.tsx b/src/client/views/nodes/formattedText/FormattedTextBox.tsx
index 98e461a52..f4cbbcc9e 100644
--- a/src/client/views/nodes/formattedText/FormattedTextBox.tsx
+++ b/src/client/views/nodes/formattedText/FormattedTextBox.tsx
@@ -64,6 +64,7 @@ import { removeMarkWithAttrs } from './prosemirrorPatches';
import { RichTextMenu, RichTextMenuPlugin } from './RichTextMenu';
import { RichTextRules } from './RichTextRules';
import { schema } from './schema_rts';
+import { tickStep } from 'd3';
// import * as applyDevTools from 'prosemirror-dev-tools';
export interface FormattedTextBoxProps extends FieldViewProps {
@@ -304,6 +305,14 @@ export class FormattedTextBox extends ViewBoxAnnotatableComponent<FormattedTextB
}
};
+ autoTag = async () => {
+ this.Document.$tags_chat = new List<string>();
+ gptAPICall(RTFCast(this.Document[Doc.LayoutDataKey(this.Document)])?.Text ?? StrCast(this.Document[Doc.LayoutDataKey(this.Document)]),
+ GPTCallType.CLASSIFYTEXT).then(desc => (this.Document.$tags_chat as List<string>).push(desc));
+ this.Document._layout_showTags = true;
+ //or... then(desc => this.Document.$tags_chat = desc);
+ }
+
leafText = (node: Node) => {
if (node.type === this.EditorView?.state.schema.nodes.dashField) {
const refDoc = !node.attrs.docId ? this.rootDoc : (DocServer.GetCachedRefField(node.attrs.docId as string) as Doc);
@@ -1236,6 +1245,7 @@ export class FormattedTextBox extends ViewBoxAnnotatableComponent<FormattedTextB
},
{ fireImmediately: true }
);
+
this._disposers.search = reaction(
() => Doc.IsSearchMatch(this.Document),
@@ -1269,6 +1279,14 @@ export class FormattedTextBox extends ViewBoxAnnotatableComponent<FormattedTextB
{ fireImmediately: true }
);
+ this._disposers.tagger = reaction(
+ () => ({ title: this.Document.title, sel: this.props.isSelected() }),
+ action(() => {
+ this.autoTag();
+ }),
+ { fireImmediately: true }
+ );
+
if (!this._props.dontRegisterView) {
this._disposers.record = reaction(
() => this.recordingDictation,
diff --git a/src/client/views/nodes/scrapbook/ScrapbookBox.tsx b/src/client/views/nodes/scrapbook/ScrapbookBox.tsx
index 55d0df585..39729a1c5 100644
--- a/src/client/views/nodes/scrapbook/ScrapbookBox.tsx
+++ b/src/client/views/nodes/scrapbook/ScrapbookBox.tsx
@@ -1,6 +1,6 @@
import { action, makeObservable, observable } from 'mobx';
import * as React from 'react';
-import { Doc, DocListCast } from '../../../../fields/Doc';
+import { Doc, DocListCast, StrListCast } from '../../../../fields/Doc';
import { List } from '../../../../fields/List';
import { emptyFunction } from '../../../../Utils';
import { Docs } from '../../../documents/Documents';
@@ -100,20 +100,62 @@ export class ScrapbookBox extends ViewBoxAnnotatableComponent<FieldViewProps>()
};
filterAddDocument = (docIn: Doc | Doc[]) => {
- const docs = toList(docIn);
+ const docs = toList(docIn); //The docs being added to the scrapbook
+
+ // 1) Grab all template slots:
+ const slots = DocListCast(this.dataDoc[this.fieldKey]);
+
+ // 2) recursive unwrap:
+ const unwrap = (items: Doc[]): Doc[] =>
+ items.flatMap(d =>
+ d.$type === DocumentType.COL
+ ? unwrap(DocListCast(d[Doc.LayoutDataKey(d)]))
+ : [d]
+ );
+
+ // 3) produce a flat list of every doc, unwrapping any number of nested COLs
+ const allDocs: Doc[] = unwrap(slots);
+
+
if (docs?.length === 1) {
- const placeholder = DocListCast(this.dataDoc[this.fieldKey]).find(d =>
+ const placeholder = allDocs.filter(d =>
+
(d.accepts_docType === docs[0].$type || // match fields based on type, or by analyzing content .. simple example of matching text in placeholder to dropped doc's type
RTFCast(d[Doc.LayoutDataKey(d)])?.Text.includes(StrCast(docs[0].$type)))
); // prettier-ignore
+ //DocListCast(this.Document.items).map(doc => DocListCast(doc[Doc.LayoutDataKey(doc)])
+
if (placeholder) {
+ /**Look at the autotags and see what matches*RTFCast(d[Doc.LayoutDataKey(d)])?.Text*/
// ugh. we have to tell the underlying view not to add the Doc so that we can add it where we want it.
// However, returning 'false' triggers an undo. so this settimeout is needed to make the assignment happen after the undo.
setTimeout(
undoable(() => {
+
+ const slotTagsList: Set<string>[] = placeholder.map(doc =>
+ new Set<string>(StrListCast(doc.$tags_chat))
+ );
+ // turn docs[0].$tags_chat into a Set
+ const targetTags = new Set(StrListCast(docs[0].$tags_chat));
+
//StrListCast(placeholder.overrideFields).map(field => (docs[0][field] = placeholder[field])); // // shouldn't need to do this for layout fields since the placeholder already overrides its protos
- placeholder.proto = docs[0];
+
+ // find the first placeholder that shares *any* tag
+ const match = placeholder.find(ph =>
+ StrListCast(ph.$tags_chat).some(tag => targetTags.has(tag))
+ );
+ if (match) {
+ match.proto = docs[0];
+ }
+
+ /*const chosenPlaceholder = placeholder.find(d =>
+ pl = new Set<string>(StrListCast(d.$tags_chat)
+
+ d.$tags_chat && d.$tags_chat[0].equals(docs[0].$tags_chat)); //why [0]
+ if (chosenPlaceholder){
+ chosenPlaceholder.proto = docs[0];}*/
+ //excess if statement??
}, 'Scrapbook add')
);
return false;
diff --git a/src/client/views/search/FaceRecognitionHandler.tsx b/src/client/views/search/FaceRecognitionHandler.tsx
index 60744588f..256e68afd 100644
--- a/src/client/views/search/FaceRecognitionHandler.tsx
+++ b/src/client/views/search/FaceRecognitionHandler.tsx
@@ -210,8 +210,8 @@ export class FaceRecognitionHandler {
} else if (imgDoc.type === DocumentType.LOADING && !imgDoc.loadingError) {
setTimeout(() => this.classifyFacesInImage(imgDocView), 1000);
} else {
- reaction(() => ({sel:imgDocView.isSelected()}), ({sel}) => !sel && imgDoc.type == 'text'
- && imgDocView.ComponentView?.autoTag?.(), {fireImmediately: true}
+ reaction(() => ({sel:imgDocView.isSelected()}), ({sel}) => !sel &&
+ imgDocView.ComponentView?.autoTag?.(), {fireImmediately: true}
)
const imgUrl = ImageCast(imgDoc[Doc.LayoutDataKey(imgDoc)]);
if (imgUrl && !DocListCast(Doc.MyFaceCollection?.examinedFaceDocs).includes(imgDoc[DocData])) {