aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Utils.ts18
-rw-r--r--src/client/apis/gpt/GPT.ts62
-rw-r--r--src/client/views/MainView.tsx2
-rw-r--r--src/client/views/collections/collectionFreeForm/ImageLabelHandler.scss44
-rw-r--r--src/client/views/collections/collectionFreeForm/ImageLabelHandler.tsx120
-rw-r--r--src/client/views/collections/collectionFreeForm/MarqueeOptionsMenu.tsx3
-rw-r--r--src/client/views/collections/collectionFreeForm/MarqueeView.tsx129
7 files changed, 369 insertions, 9 deletions
diff --git a/src/Utils.ts b/src/Utils.ts
index 291d7c799..7bf6330d0 100644
--- a/src/Utils.ts
+++ b/src/Utils.ts
@@ -913,3 +913,21 @@ export function dateRangeStrToDates(dateStr: string) {
return [new Date(fromYear, fromMonth, fromDay), new Date(toYear, toMonth, toDay)];
}
+
+export async function convertImageToBase64(url: string): Promise<string> {
+ try {
+ const response = await fetch(url); // Fetch the image
+ if (!response.ok) throw new Error('Network response was not ok');
+ const blob = await response.blob(); // Convert response to Blob
+
+ return new Promise((resolve, reject) => {
+ const reader = new FileReader();
+ reader.readAsDataURL(blob); // Read blob as DataURL (Base64)
+ reader.onloadend = () => resolve(reader.result as string); // Resolve promise with Base64 string
+ reader.onerror = error => reject(error); // Reject promise on error
+ });
+ } catch (error) {
+ console.error('Error:', error);
+ throw error; // Rethrow the error after logging it
+ }
+}
diff --git a/src/client/apis/gpt/GPT.ts b/src/client/apis/gpt/GPT.ts
index 30194f9f8..cf84f4942 100644
--- a/src/client/apis/gpt/GPT.ts
+++ b/src/client/apis/gpt/GPT.ts
@@ -83,4 +83,64 @@ const gptImageCall = async (prompt: string, n?: number) => {
}
};
-export { gptAPICall, gptImageCall, GPTCallType };
+const gptGetEmbedding = async (src: string): Promise<number[]> => {
+ try {
+ const configuration: ClientOptions = {
+ apiKey: process.env.OPENAI_KEY,
+ dangerouslyAllowBrowser: true,
+ };
+ const openai = new OpenAI(configuration);
+ const embeddingResponse = await openai.embeddings.create({
+ model: 'text-embedding-3-large',
+ input: [src],
+ encoding_format: 'float',
+ dimensions: 256,
+ });
+
+ // Assume the embeddingResponse structure is correct; adjust based on actual API response
+ const embedding = embeddingResponse.data[0].embedding;
+ return embedding;
+ } catch (err) {
+ console.log(err);
+ return [];
+ }
+};
+
+const gptImageLabel = async (src: string): Promise<string> => {
+ try {
+ const configuration: ClientOptions = {
+ apiKey: process.env.OPENAI_KEY,
+ dangerouslyAllowBrowser: true,
+ };
+
+ const openai = new OpenAI(configuration);
+ const response = await openai.chat.completions.create({
+ model: 'gpt-4-vision-preview',
+ messages: [
+ {
+ role: 'user',
+ content: [
+ { type: 'text', text: 'Give three labels to describe this image.' },
+ {
+ type: 'image_url',
+ image_url: {
+ url: `${src}`,
+ detail: 'low',
+ },
+ },
+ ],
+ },
+ ],
+ });
+ if (response.choices[0].message.content) {
+ return response.choices[0].message.content;
+ } else {
+ return 'Missing labels';
+ }
+ } catch (err) {
+ console.log(err);
+ return 'Error connecting with API';
+ }
+};
+
+export { gptAPICall, gptImageCall, GPTCallType, gptImageLabel, gptGetEmbedding };
diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx
index 58b8d255a..e66982b3e 100644
--- a/src/client/views/MainView.tsx
+++ b/src/client/views/MainView.tsx
@@ -71,6 +71,7 @@ import { PresBox } from './nodes/trails';
import { AnchorMenu } from './pdf/AnchorMenu';
import { GPTPopup } from './pdf/GPTPopup/GPTPopup';
import { TopBar } from './topbar/TopBar';
+import { ImageLabelHandler } from './collections/collectionFreeForm/ImageLabelHandler';
const { LEFT_MENU_WIDTH, TOPBAR_HEIGHT } = require('./global/globalCssVariables.module.scss'); // prettier-ignore
const _global = (window /* browser */ || global) /* node */ as any;
@@ -1029,6 +1030,7 @@ export class MainView extends ObservableReactComponent<{}> {
<PreviewCursor />
<TaskCompletionBox />
<ContextMenu />
+ <ImageLabelHandler />
<AnchorMenu />
<MapAnchorMenu />
<DirectionsAnchorMenu />
diff --git a/src/client/views/collections/collectionFreeForm/ImageLabelHandler.scss b/src/client/views/collections/collectionFreeForm/ImageLabelHandler.scss
new file mode 100644
index 000000000..e7413bf8e
--- /dev/null
+++ b/src/client/views/collections/collectionFreeForm/ImageLabelHandler.scss
@@ -0,0 +1,44 @@
+#label-handler {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+
+ > div:first-child {
+ display: flex; // Puts the input and button on the same row
+ align-items: center; // Vertically centers items in the flex container
+
+ input {
+ color: black;
+ }
+
+ .IconButton {
+ margin-left: 8px; // Adds space between the input and the icon button
+ width: 19px;
+ }
+ }
+
+ > div:not(:first-of-type) {
+ display: flex;
+ flex-direction: column;
+ align-items: center; // Centers the content vertically in the flex container
+ width: 100%;
+
+ > div {
+ display: flex;
+ justify-content: space-between; // Puts the content and delete button on opposite ends
+ align-items: center;
+ width: 100%;
+ margin-top: 8px; // Adds space between label rows
+
+ p {
+ text-align: center; // Centers the text of the paragraph
+ flex-grow: 1; // Allows the paragraph to grow and occupy the available space
+ }
+
+ .IconButton {
+ // Styling for the delete button
+ margin-left: auto; // Pushes the button to the far right
+ }
+ }
+ }
+}
diff --git a/src/client/views/collections/collectionFreeForm/ImageLabelHandler.tsx b/src/client/views/collections/collectionFreeForm/ImageLabelHandler.tsx
new file mode 100644
index 000000000..46bc3d946
--- /dev/null
+++ b/src/client/views/collections/collectionFreeForm/ImageLabelHandler.tsx
@@ -0,0 +1,120 @@
+import { FontAwesomeIcon } from '@fortawesome/react-fontawesome';
+import { IconButton } from 'browndash-components';
+import { action, makeObservable, observable } from 'mobx';
+import { observer } from 'mobx-react';
+import React from 'react';
+import { SettingsManager } from '../../../util/SettingsManager';
+import { ObservableReactComponent } from '../../ObservableReactComponent';
+import { MarqueeOptionsMenu } from './MarqueeOptionsMenu';
+import './ImageLabelHandler.scss';
+
+@observer
+export class ImageLabelHandler extends ObservableReactComponent<{}> {
+ static Instance: ImageLabelHandler;
+
+ @observable _display: boolean = false;
+ @observable _pageX: number = 0;
+ @observable _pageY: number = 0;
+ @observable _yRelativeToTop: boolean = true;
+ @observable _currentLabel: string = '';
+ @observable _labelGroups: string[] = [];
+
+ constructor(props: any) {
+ super(props);
+ makeObservable(this);
+ ImageLabelHandler.Instance = this;
+ console.log('Instantiated label handler!');
+ }
+
+ @action
+ displayLabelHandler = (x: number, y: number) => {
+ this._pageX = x;
+ this._pageY = y;
+ this._display = true;
+ this._labelGroups = [];
+ };
+
+ @action
+ hideLabelhandler = () => {
+ this._display = false;
+ this._labelGroups = [];
+ };
+
+ @action
+ addLabel = (label: string) => {
+ label = label.toUpperCase().trim();
+ if (label.length > 0) {
+ if (!this._labelGroups.includes(label)) {
+ this._labelGroups = [...this._labelGroups, label];
+ }
+ }
+ };
+
+ @action
+ removeLabel = (label: string) => {
+ label = label.toUpperCase();
+ this._labelGroups = this._labelGroups.filter(group => group !== label);
+ };
+
+ @action
+ groupImages = () => {
+ MarqueeOptionsMenu.Instance.groupImages();
+ this._display = false;
+ };
+
+ render() {
+ if (this._display) {
+ return (
+ <div
+ id="label-handler"
+ className="contextMenu-cont"
+ style={{
+ display: this._display ? '' : 'none',
+ left: this._pageX,
+ ...(this._yRelativeToTop ? { top: Math.max(0, this._pageY) } : { bottom: this._pageY }),
+ background: SettingsManager.userBackgroundColor,
+ color: SettingsManager.userColor,
+ }}>
+ <div>
+ <IconButton tooltip={'Cancel'} onPointerDown={this.hideLabelhandler} icon={<FontAwesomeIcon icon="eye-slash" />} color={MarqueeOptionsMenu.Instance.userColor} style={{ width: '19px' }} />
+ <input aria-label="label-input" id="new-label" type="text" style={{ color: 'black' }} />
+ <IconButton
+ tooltip={'Add Label'}
+ onPointerDown={() => {
+ const input = document.getElementById('new-label') as HTMLInputElement;
+ const newLabel = input.value;
+ this.addLabel(newLabel);
+ this._currentLabel = '';
+ input.value = '';
+ }}
+ icon={<FontAwesomeIcon icon="plus" />}
+ color={MarqueeOptionsMenu.Instance.userColor}
+ style={{ width: '19px' }}
+ />
+ <IconButton tooltip={'Group Images'} onPointerDown={this.groupImages} icon={<FontAwesomeIcon icon="object-group" />} color={MarqueeOptionsMenu.Instance.userColor} style={{ width: '19px' }} />
+ </div>
+ <div>
+ {this._labelGroups.map(group => {
+ return (
+ <div>
+ <p>{group}</p>
+ <IconButton
+ tooltip={'Remove Label'}
+ onPointerDown={() => {
+ this.removeLabel(group);
+ }}
+ icon={'x'}
+ color={MarqueeOptionsMenu.Instance.userColor}
+ style={{ width: '19px' }}
+ />
+ </div>
+ );
+ })}
+ </div>
+ </div>
+ );
+ } else {
+ return <></>;
+ }
+ }
+}
diff --git a/src/client/views/collections/collectionFreeForm/MarqueeOptionsMenu.tsx b/src/client/views/collections/collectionFreeForm/MarqueeOptionsMenu.tsx
index 79cc534dc..414858aee 100644
--- a/src/client/views/collections/collectionFreeForm/MarqueeOptionsMenu.tsx
+++ b/src/client/views/collections/collectionFreeForm/MarqueeOptionsMenu.tsx
@@ -17,6 +17,8 @@ export class MarqueeOptionsMenu extends AntimodeMenu<AntimodeMenuProps> {
public showMarquee: () => void = unimplementedFunction;
public hideMarquee: () => void = unimplementedFunction;
public pinWithView: (e: KeyboardEvent | React.PointerEvent | undefined) => void = unimplementedFunction;
+ public classifyImages: (e: React.MouseEvent | undefined) => void = unimplementedFunction;
+ public groupImages: () => void = unimplementedFunction;
public isShown = () => this._opacity > 0;
constructor(props: any) {
super(props);
@@ -37,6 +39,7 @@ export class MarqueeOptionsMenu extends AntimodeMenu<AntimodeMenuProps> {
<IconButton tooltip={'Summarize Documents'} onPointerDown={this.summarize} icon={<FontAwesomeIcon icon="compress-arrows-alt" />} color={this.userColor} />
<IconButton tooltip={'Delete Documents'} onPointerDown={this.delete} icon={<FontAwesomeIcon icon="trash-alt" />} color={this.userColor} />
<IconButton tooltip={'Pin selected region'} onPointerDown={this.pinWithView} icon={<FontAwesomeIcon icon="map-pin" />} color={this.userColor} />
+ <IconButton tooltip={'Classify Images'} onPointerDown={this.classifyImages} icon={<FontAwesomeIcon icon="object-group" />} color={this.userColor} />
</>
);
return this.getElement(buttons);
diff --git a/src/client/views/collections/collectionFreeForm/MarqueeView.tsx b/src/client/views/collections/collectionFreeForm/MarqueeView.tsx
index 6eca91e9d..887fa17a8 100644
--- a/src/client/views/collections/collectionFreeForm/MarqueeView.tsx
+++ b/src/client/views/collections/collectionFreeForm/MarqueeView.tsx
@@ -1,15 +1,15 @@
import { action, computed, makeObservable, observable } from 'mobx';
import { observer } from 'mobx-react';
import * as React from 'react';
-import { Utils, intersectRect, lightOrDark, returnFalse } from '../../../../Utils';
-import { Doc, Opt } from '../../../../fields/Doc';
+import { Utils, intersectRect, lightOrDark, returnFalse, convertImageToBase64 } from '../../../../Utils';
+import { Doc, FieldResult, NumListCast, Opt } from '../../../../fields/Doc';
import { AclAdmin, AclAugment, AclEdit, DocData } from '../../../../fields/DocSymbols';
import { Id } from '../../../../fields/FieldSymbols';
import { InkData, InkField, InkTool } from '../../../../fields/InkField';
import { List } from '../../../../fields/List';
import { RichTextField } from '../../../../fields/RichTextField';
import { Cast, FieldValue, NumCast, StrCast } from '../../../../fields/Types';
-import { ImageField } from '../../../../fields/URLField';
+import { ImageField, URLField } from '../../../../fields/URLField';
import { GetEffectiveAcl } from '../../../../fields/util';
import { CognitiveServices } from '../../../cognitive_services/CognitiveServices';
import { DocumentType } from '../../../documents/DocumentTypes';
@@ -28,6 +28,10 @@ import { FormattedTextBox } from '../../nodes/formattedText/FormattedTextBox';
import { SubCollectionViewProps } from '../CollectionSubView';
import { MarqueeOptionsMenu } from './MarqueeOptionsMenu';
import './MarqueeView.scss';
+import { ObjectField } from '../../../../fields/ObjectField';
+import { gptGetEmbedding, gptImageLabel } from '../../../apis/gpt/GPT';
+import { ImageLabelHandler } from './ImageLabelHandler';
+import { listSpec } from '../../../../fields/Schema';
interface MarqueeViewProps {
getContainerTransform: () => Transform;
getTransform: () => Transform;
@@ -64,11 +68,13 @@ export class MarqueeView extends ObservableReactComponent<SubCollectionViewProps
}
private _commandExecuted = false;
+ private _selectedDocs: Doc[] = [];
@observable _lastX: number = 0;
@observable _lastY: number = 0;
@observable _downX: number = 0;
@observable _downY: number = 0;
@observable _visible: boolean = false; // selection rentangle for marquee selection/free hand lasso is visible
+ @observable _labelsVisibile: boolean = false;
@observable _lassoPts: [number, number][] = [];
@observable _lassoFreehand: boolean = false;
@@ -270,6 +276,8 @@ export class MarqueeView extends ObservableReactComponent<SubCollectionViewProps
MarqueeOptionsMenu.Instance.hideMarquee = this.hideMarquee;
MarqueeOptionsMenu.Instance.jumpTo(e.clientX, e.clientY);
MarqueeOptionsMenu.Instance.pinWithView = this.pinWithView;
+ MarqueeOptionsMenu.Instance.classifyImages = this.classifyImages;
+ MarqueeOptionsMenu.Instance.groupImages = this.groupImages;
document.addEventListener('pointerdown', hideMarquee, true);
document.addEventListener('wheel', hideMarquee, true);
} else {
@@ -416,6 +424,100 @@ export class MarqueeView extends ObservableReactComponent<SubCollectionViewProps
this.hideMarquee();
});
+ /**
+ * Classifies images and assigns the labels as document fields.
+ * TODO: Turn into lists of labels instead of individual fields.
+ */
+ @undoBatch
+ classifyImages = action(async (e: React.MouseEvent | undefined) => {
+ const selected = this.marqueeSelect(false, DocumentType.IMG);
+ this._selectedDocs = selected;
+
+ const imagePromises = selected.map(doc => {
+ let href = (doc['data'] as URLField).url.href;
+ let hrefParts = href.split('.');
+ let hrefComplete = `${hrefParts[0]}_o.${hrefParts[1]}`;
+ return convertImageToBase64(hrefComplete).then(hrefBase64 => {
+ return gptImageLabel(hrefBase64).then(response => {
+ console.log(response);
+ const labels = response.split('\n');
+ console.log(labels);
+ doc.image_labels = new List<string>(Array.from(labels!));
+ return Promise.all(labels!.map(label => gptGetEmbedding(label))).then(embeddings => {
+ return { doc, embeddings };
+ });
+ });
+ });
+ });
+
+ let docsAndEmbeddings = await Promise.all(imagePromises);
+
+ for (const docAndEmbedding of docsAndEmbeddings) {
+ if (Array.isArray(docAndEmbedding.embeddings)) {
+ let doc = docAndEmbedding.doc;
+ for (let i = 0; i < 3; i++) {
+ doc[`label_embedding_${i + 1}`] = new List<number>(docAndEmbedding.embeddings[i]);
+ }
+ }
+ }
+
+ if (e) {
+ ImageLabelHandler.Instance.displayLabelHandler(e.pageX, e.pageY);
+ }
+ });
+
+ /**
+ * Groups images to most similar labels.
+ */
+ @undoBatch
+ groupImages = action(async () => {
+ const labelGroups: string[] = ImageLabelHandler.Instance._labelGroups;
+ const labelToCollection: Map<string, Doc> = new Map();
+ const labelToEmbedding: Map<string, number[]> = new Map();
+ var similarity = require('compute-cosine-similarity');
+
+ // Create new collections associated with each label and get the embeddings for the labels.
+ for (const label of labelGroups) {
+ const newCollection = this.getCollection([], undefined, false);
+ newCollection._freeform_panX = this.Bounds.left + this.Bounds.width / 2;
+ newCollection._freeform_panY = this.Bounds.top + this.Bounds.height / 2;
+ labelToCollection.set(label, newCollection);
+ this._props.addDocument?.(newCollection);
+ const labelEmbedding = await gptGetEmbedding(label);
+ if (Array.isArray(labelEmbedding)) {
+ labelToEmbedding.set(label, labelEmbedding);
+ }
+ }
+
+ // For each image, loop through the labels, and calculate similarity. Associate it with the
+ // most similar one.
+ this._selectedDocs.forEach(doc => {
+ let mostSimilarLabel: string | undefined;
+ let maxSimilarity: number = 0;
+ const embeddingAsList1 = NumListCast(doc.label_embedding_1);
+ const embeddingAsList2 = NumListCast(doc.label_embedding_2);
+ const embeddingAsList3 = NumListCast(doc.label_embedding_3);
+
+ labelGroups.forEach(label => {
+ let curSimilarity1 = similarity(labelToEmbedding.get(label)!, Array.from(embeddingAsList1));
+ let curSimilarity2 = similarity(labelToEmbedding.get(label)!, Array.from(embeddingAsList2));
+ let curSimilarity3 = similarity(labelToEmbedding.get(label)!, Array.from(embeddingAsList3));
+ let maxCurSimilarity = Math.max(curSimilarity1, curSimilarity2, curSimilarity3);
+ if (maxCurSimilarity >= 0.3 && maxCurSimilarity > maxSimilarity) {
+ mostSimilarLabel = label;
+ maxSimilarity = maxCurSimilarity;
+ }
+
+ console.log('Doc with labels ' + doc.image_labels + 'has similarity score ' + maxCurSimilarity + ' to ' + mostSimilarLabel);
+ });
+
+ if (mostSimilarLabel) {
+ Doc.AddDocToList(labelToCollection.get(mostSimilarLabel)!, undefined, doc);
+ this._props.removeDocument?.(doc);
+ }
+ });
+ });
+
@undoBatch
syntaxHighlight = action((e: KeyboardEvent | React.PointerEvent | undefined) => {
const selected = this.marqueeSelect(false);
@@ -574,7 +676,10 @@ export class MarqueeView extends ObservableReactComponent<SubCollectionViewProps
return false;
}
- marqueeSelect(selectBackgrounds: boolean = false) {
+ /**
+ * When this is called, returns the list of documents that have been selected by the marquee box.
+ */
+ marqueeSelect(selectBackgrounds: boolean = false, docType: DocumentType | undefined = undefined) {
const selection: Doc[] = [];
const selectFunc = (doc: Doc) => {
const layoutDoc = Doc.Layout(doc);
@@ -584,11 +689,19 @@ export class MarqueeView extends ObservableReactComponent<SubCollectionViewProps
} else {
(this.touchesLine(bounds) || this.boundingShape(bounds)) && selection.push(doc);
}
+ console.log(doc['type']);
};
- this._props
- .activeDocuments()
- .filter(doc => !doc.z && !doc._lockedPosition)
- .map(selectFunc);
+ if (docType) {
+ this._props
+ .activeDocuments()
+ .filter(doc => !doc.z && !doc._lockedPosition && doc['type'] === docType)
+ .map(selectFunc);
+ } else {
+ this._props
+ .activeDocuments()
+ .filter(doc => !doc.z && !doc._lockedPosition)
+ .map(selectFunc);
+ }
if (!selection.length && selectBackgrounds)
this._props
.activeDocuments()