4 files changed, 152 insertions, 81 deletions
diff --git a/src/client/apis/gpt/GPT.ts b/src/client/apis/gpt/GPT.ts
index 0a4dc2423..1bec2fb11 100644
--- a/src/client/apis/gpt/GPT.ts
+++ b/src/client/apis/gpt/GPT.ts
@@ -32,7 +32,7 @@ const callTypeMap: { [type: string]: GPTCallOpts } = {
         model: 'gpt-4o',
         maxTokens: 2048,
         temp: 0.7,
-        prompt: 'Create a stack of flashcards out of this text with each question and answer labeled as question and answer. For some questions, ask "what is this image of" and write a keyword that represents the image and label it "keyword". Otherwise, write none. Do not label each flashcard and do not include asterisks.',
+        prompt: 'Create a stack of flashcards out of this text with each question and answer labeled as question and answer. For some questions, ask "what is this image of" but tailored to stacks theme and the image and write a keyword that represents the image and label it "keyword". Otherwise, write none. Do not label each flashcard and do not include asterisks.',
     },
     completion: { model: 'gpt-4-turbo', maxTokens: 256, temp: 0.5, prompt: "You are a helpful assistant. Answer the user's prompt." },
     mermaid: {
@@ -63,8 +63,8 @@ const callTypeMap: { [type: string]: GPTCallOpts } = {
     },
     pronunciation: {
         model: 'gpt-4-turbo',
-        maxTokens: 4096,
-        temp: 0.3,
+        maxTokens: 1024,
+        temp: 0.3, //0.3
         prompt: '',
     },
 };
diff --git a/src/client/views/nodes/ComparisonBox.tsx b/src/client/views/nodes/ComparisonBox.tsx
index b28ef24ec..0c42f662b 100644
--- a/src/client/views/nodes/ComparisonBox.tsx
+++ b/src/client/views/nodes/ComparisonBox.tsx
@@ -250,9 +250,10 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent<FieldViewProps>()
         // console.log('PHONETIC TRANSCRIPTION: ' + DocCast(this._audio)[DocData]);
         // this.Document.audio = this._audio;
         console.log('Phonetic transcription: ' + DocCast(this.Document.audio).phoneticTranscription);
-        // const phonTrans = DocCast(this.Document.audio).phoneticTranscription;
-        const phonTrans = 's';
+        const phonTrans = DocCast(this.Document.audio).phoneticTranscription;
+        // const phonTrans = 's';
         if (phonTrans) {
+            // console.log(phonTrans.toString());
             this._inputValue = StrCast(phonTrans);
             console.log('INPUT:' + this._inputValue);
             this.askGPTPhonemes(this._inputValue);
@@ -262,67 +263,50 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent<FieldViewProps>()
     };
 
     askGPTPhonemes = async (phonemes: string) => {
+        const sentence = StrCast(RTFCast(DocCast(this.dataDoc[this.fieldKey + '_1']).text)?.Text);
         const phon = 'w ʌ ɪ z j ɔː ɹ n e ɪ m ';
         const phon2 = 'h ʌ ɛ r j ʌ t ʌ d eɪ';
         const phon3 = 'ʃ eɪ oʊ s i ʃ oʊ z b aɪ ð ə s iː ʃ oʊ';
-        const phon4 = 'k a m o e s t a s h ɔi';
-        const phon5 = 'e l s e n a l';
-        const question10 =
-            'Break into syllables by putting some of the phonemes together if they create a syllable or alone if it does not in "ʃ eɪ oʊ s i ʃ oʊ z b a ɪ ð ə s iː ʃ oʊ". Then align the syllables with the words of "she sells sea shells by the sea shore". Multiple syllables can be part of one word. Then consider all possible phonetic transcriptions of "she sells seashells by the sea shore" that is standard in speech without showing the user. Align the phoneme syllables with each word based on what is similar; if it seems like a part of the word is missing, check the phonemes beforehand and after to see if they should be part of that word and reevaluate to see if those phonemes should be a part of that word. Note if a word or sound missing, including missing vowels and consonants. Compare the phonemes in each word with those phonetic transcriptions without displaying anything to the user. phonemes. If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in American speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference. If there is a sound missing, note that. If nothing is wrong, say "good job" for the word. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r". Interpret "ʌ" as the same as "ə". If "ɚ", "ɔː", and "ɔ" are options for pronunciation, do not choose "ɚ". Ignore differences with colons. Ignore redundant letters and the splitting of words since that is your issue, not the users.';
-        const question11 =
-            "Consider all possible phonetic transcriptions of the sentence 'cómo' as commonly spoken. Compare these with provided phonetic transcriptions: " +
-            phon3 +
-            ". Align phonemes with each word to approximate its pronunciation without revealing details. Ensure correct syllable alignment and identify any missing sounds or mismatched vowels/consonants. Provide feedback per word on pronunciation accuracy. Note specific mismatches or missing sounds, and clarify pronunciation guidelines (e.g., 'i' sounds like 'ee'). Just so you know, 'i' sounds like 'ee' as in 'bee', not 'ih' as an 'lick'. Interpret 'ɹ' as the same as 'r'. Interpret 'ʌ' as the same as 'ə'. If 'ɚ', 'ɔː', and 'ɔ' are options for pronunciation, do not choose 'ɚ'. Ignore differences with colons. Ignore redundant letters and words and sounds and the splitting of words; do not mention this since there could be repeated words in the sentence.'";
-        const questionEng =
-            'Consider all possible phonetic transcriptions of the intended sentence “vamos por un café" that is standard in speech without showing the user. Compare the phonemes in each word with those phonetic transcriptions without displaying anything to the user: "' +
+        const phon4 = 'kamo estas hɔi';
+        const phon5 = 'la s e n a l';
+        console.log('REG' + this.recognition.lang);
+        const promptEng =
+            'Consider all possible phonetic transcriptions of the intended sentence "' +
+            sentence +
+            '" that is standard in American speech without showing the user. Compare each word in the following phonemes with those phonetic transcriptions without displaying anything to the user: "' +
             phonemes +
-            '". Steps to do this: Align the list phonemes with each word in the intended sentence by combining the phonemes to get a pronunciation that resembles the word in order. Individual phonemes can represent a word - they do not have to be combined. If it seems like a part of the word is missing, check the phonemes beforehand and after to see if they should instead be part of that word and reevaluate to see if those phonemes should be a part of that word; consider syllables of the words to do this alignment but do not process the letters in the wrong order. Do not use the same instance of a letter twice for the words - figure out where it belongs by evaluating which combinations with the letters before and after match better. Note if a word or sound missing, including missing vowels and consonants. If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in American speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference and explain how it is supposed to sound. If there is a sound missing, note that. If nothing is wrong, say "good job" for the word. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r". Interpret "ʌ" as the same as "ə". If "ɚ", "ɔː", and "ɔ" are options for pronunciation, do not choose "ɚ". Ignore differences with colons. Ignore redundant letters and words and sounds and the splitting of words; do not mention this since there could be repeated words in the sentence.';
-        const question =
-            'Consider all possible phonetic transcriptions of the intended sentence “la señal" that is standard in Spanish speech without showing the user. Compare the phonemes in each word with those phonetic transcriptions without displaying anything to the user: "' +
-            phon5 +
-            '". Steps to do this: Align the list phonemes with each word in the intended sentence by combining the phonemes to get a pronunciation that resembles the word in order. Individual phonemes can represent a word - they do not have to be combined. If it seems like a part of the word is missing, check the phonemes beforehand and after to see if they should instead be part of that word and reevaluate to see if those phonemes should be a part of that word; consider syllables of the words to do this alignment but do not process the letters in the wrong order. Only combine phonemes that are next to each other and do not take phonemes out of order. For example, if "al" is at the end, it should not be matched to "la" in the beginning. Do not use the same instance of a letter twice for the words - figure out where it belongs by evaluating which combinations with the letters before and after match better. Note if a word or sound missing, including missing vowels and consonants. If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in Spanish speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference and explain how it is supposed to sound. If there is a sound missing, note that. If nothing is wrong, say "good job" for the word. Ignore redundant letters and words and sounds and the splitting of words; do not mention this since there could be repeated words in the sentence. Do not make "θ" and "f" interchangable. Do not make "n" and "ɲ" interchangable. Do not make "e" and "i" interchangable.';
-        const questionL =
-            'Consider all possible phonetic transcriptions of "she sells seashells by the sea shore" that is standard in speech without showing the user. Align the phonemes with each word based on what is similar in order; if it seems like a part of the word is missing, check the phonemes beforehand and after to see if they should be part of that word and reevaluate to see if those phonemes should be a part of that word; consider syllables of the words to do this alignment but do not process the letters in the wrong order. Note if a word or sound missing, including missing vowels and consonants. Compare the phonemes in each word with those phonetic transcriptions without displaying anything to the user: "' +
-            'ʃ eɪ oʊ s i ʃ oʊ z b aɪ ð ə s iː ʃ oʊ' +
-            '". If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in American speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference. If there is a sound missing, note that. If nothing is wrong, say "good job" for the word. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r". Interpret "ʌ" as the same as "ə" - do not distinguish between the schwa and "ʌ". If "ɚ", "ɔː", and "ɔ" are options for pronunciation, do not choose "ɚ". Ignore differences with colons. Ignore redundant letters and words and sounds and the splitting of words; do not mention this.';
-        const question5 =
-            'Consider all possible phonetic transcriptions of "how are you today" that is standard in speech without showing the user. Compare these phonemes with those phonetic transcriptions without displaying anything to the user: "' +
+            '". Steps to do this: Align the words with each word in the intended sentence by combining the phonemes to get a pronunciation that resembles the word in order. Do not describe phonetic corrections with the phonetic alphabet - describe it by providing other examples of how it should sound. Note if a word or sound missing, including missing vowels and consonants. If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in American speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference and explain how it is supposed to sound. Only note the difference if they are not allophones of the same phoneme and if they are far away on the vowel chart. The goal is to be understood, not sound like a native speaker. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r". Interpret "ʌ" as the same as "ə". If "ɚ", "ɔː", and "ɔ" are options for pronunciation, do not choose "ɚ". Ignore differences with colons. Ignore redundant letters and words and sounds and the splitting of words; do not mention this since there could be repeated words in the sentence. Provide a response like this: "Lets work on improving the pronunciation of "coffee." You said "ceeffee," which is close, but we need to adjust the vowel sound. In American English, "coffee" is pronounced /ˈkɔːfi/, with a long "aw" sound. Try saying "kah-fee." Your intonation is good, but try putting a bit more stress on "like" in the sentence "I would like a coffee with milk." This will make your speech sound more natural. Keep practicing, and lets try saying the whole sentence again!"';
+        const promptSpa =
+            'Consider all possible phonetic transcriptions of the intended sentence "' +
+            'como estás hoy' +
+            '" that is standard in Spanish speech without showing the user. Compare each word in the following phonemes with those phonetic transcriptions without displaying anything to the user: "' +
+            phon4 +
+            '". Steps to do this: Align the words with each word in the intended sentence by combining the phonemes to get a pronunciation that resembles the word in order. Do not describe phonetic corrections with the phonetic alphabet - describe it by providing other examples of how it should sound. Note if a word or sound missing, including missing vowels and consonants. If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in Spanish speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference and explain how it is supposed to sound.  Only note the difference if they are not allophones of the same phoneme and if they are far away on the vowel chart; say good job if it would be understood by a native Spanish speaker. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r". Interpret "ʌ" as the same as "ə". Do not make "θ" and "f" interchangable. Do not make "n" and "ɲ" interchangable. Do not make "e" and "i" interchangable. If "ɚ", "ɔː", and "ɔ" are options for pronunciation, do not choose "ɚ". Ignore differences with colons. Ignore redundant letters and words and sounds and the splitting of words; do not mention this since there could be repeated words in the sentence. Identify "ɔi" sounds like "oy". Ignore accents and do not say anything to the user about this.';
+        const promptAll =
+            'Consider all possible phonetic transcriptions of the intended sentence "' +
+            sentence +
+            '" that is standard in ' +
+            this.convertAbr() +
+            ' speech without showing the user. Compare each word in the following phonemes with those phonetic transcriptions without displaying anything to the user: "' +
             phonemes +
-            '". If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in American speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat, note this difference. If not, say "good job" for the word. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r".';
-        const question4 =
-            'Match the following phonemes as words with each word in "what is your name" without displaying this to the user: "w ɛ t ɪ z j i ɹ n eɪ m ". If everything is correct, return only "good job" with no other notes. Note if a letter is added or missing if that letter changes the meaning. If a letter does not match the real phonetic transcription of the phrase, note this only if the letters are not allophones of the same phoneme and if they are far away from each other on the vowel chart.';
-        const question0 = 'These phonemes should match "what is your name": ' + phon + 'Use the structure of this response as guidance: "Your pronunciation of the vowel in "what" is not front enough. It should be pronounced like /uh/."';
-        const question3 =
-            'Match the following phonemes as words with each word in "what is your name" without displaying this to the user and there will be spaces between diphthongs and colons so treat it like they are together: ' +
-            phon +
-            '.  If everything is correct, return only "good job" with no other notes. Note if a letter is added or missing if that letter changes the meaning. If mismatching sounds are not allophones of the same phoneme and they are far away from each other on the vowel chart, describe the difference. For the mismatches, use the structure of this response as guidance: "Your pronunciation of the vowel in "what" is not pronounced correctly. It should be pronounced like /uh/." Do not list anything that is correct.';
-        const question1 =
-            'Consider all phonetic transcriptions of "what is your name" with different vowel pronunications. Compares these phonemes with that phonetic transcription: ' +
-            phonemes +
-            '. If the differences are not allophones of the same phoneme and they are far away from each other on the vowel chart, list the difference. If it is missing or added a letter, say that.';
-        //Only describe sound changes that will change the meaning drastically. Provide two sentences describing this. Do not list differences that do not change the meaning.';
-        const question2 = 'Is this a valid phonetic transcription of the phrase "what is your name": ' + phonemes + '.';
-        // If the difference found will definitely make the word be not understood and change the meaning, then list it. If the difference is minimal or the sound matches, do not list it.';
-        //These phonemes are supposed to match the pronunciation of ' +
-        //'hello: ' +
-        //phonemes +
-        //'. If there is a difference in sound that would change the meaning of the word or sentence, such as "pen" vs. "pin", describe that. Otherwise say "good job."';
-        // Identify any differences in pronunciation that would change the meaning of the intended word or sentence and only list differences that would change the meaning. If there are no major differences, say "Good job." If there are differences, describe it in terms of sounds in sentences.';
-        // const question =
-        //     'These phonemes are supposed to match the pronunciation of ' +
-        //     StrCast(RTFCast(DocCast(this.dataDoc[this.fieldKey + '_0']).text)?.Text) +
-        //     '. Identify any differences in pronunciation that would change the meaning of the intended word or sentence.';
-        console.log(question);
-        const res = await gptAPICall(question, GPTCallType.PRONUNCIATION);
-        console.log('GPT: ' + res);
-        if (!res) {
-            console.error('GPT call failed');
-            return;
+            '". Steps to do this: Align the words with each word in the intended sentence by combining the phonemes to get a pronunciation that resembles the word in order. Do not describe phonetic corrections with the phonetic alphabet - describe it by providing other examples of how it should sound. Note if a word or sound missing, including missing vowels and consonants. If there is an additional word that does not match with the provided sentence, say so. For each word, if any letters mismatch and would sound weird in ' +
+            this.convertAbr() +
+            ' speech and they are not allophones of the same phoneme and they are far away from each on the ipa vowel chat and that pronunciation is not normal for the meaning of the word, note this difference and explain how it is supposed to sound. Just so you know, "i" sounds like "ee" as in "bee", not "ih" as an "lick". Interpret "ɹ" as the same as "r". Interpret "ʌ" as the same as "ə". Do not make "θ" and "f" interchangable. Do not make "n" and "ɲ" interchangable. Do not make "e" and "i" interchangable. If "ɚ", "ɔː", and "ɔ" are options for pronunciation, do not choose "ɚ". Ignore differences with colons. Ignore redundant letters and words and sounds and the splitting of words; do not mention this since there could be repeated words in the sentence. Provide a response like this: "Lets work on improving the pronunciation of "coffee." You said "cawffee," which is close, but we need to adjust the vowel sound. In American English, "coffee" is pronounced /ˈkɔːfi/, with a long "aw" sound. Try saying "kah-fee." Your intonation is good, but try putting a bit more stress on "like" in the sentence "I would like a coffee with milk." This will make your speech sound more natural. Keep practicing, and lets try saying the whole sentence again!"';
+
+        switch (this.recognition.lang) {
+            case 'en-US':
+                console.log('English');
+                this._outputValue = await gptAPICall(promptEng, GPTCallType.PRONUNCIATION);
+                break;
+            case 'es-ES':
+                console.log('Spanish');
+                this._outputValue = await gptAPICall(promptSpa, GPTCallType.PRONUNCIATION);
+                break;
+            default:
+                console.log('All');
+                this._outputValue = await gptAPICall(promptAll, GPTCallType.PRONUNCIATION);
+                break;
         }
-        // const questionText = 'Question: ' + StrCast(RTFCast(DocCast(this.dataDoc[this.fieldKey + '_1']).text)?.Text);
-        // const rubricText = ' Rubric: ' + StrCast(RTFCast(DocCast(this.dataDoc[this.fieldKey + '_0']).text)?.Text);
-        // const queryText = questionText + ' UserAnswer: ' + this._inputValue + '. ' + rubricText;
-        // this._loading = true;
     };
 
     pushInfo = async () => {
@@ -754,13 +738,32 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent<FieldViewProps>()
         this._listening = false;
     };
 
-    openContextMenu = (x: number, y: number) => {
+    convertAbr = () => {
+        switch (this.recognition.lang) {
+            case 'en-US':
+                return 'English';
+            case 'es-ES':
+                return 'Spanish';
+            case 'fr-FR':
+                return 'French';
+            case 'it-IT':
+                return 'Italian';
+            case 'zh-CH':
+                return 'Mandarin Chinese';
+            case 'ja':
+                return 'Japanese';
+            default:
+                return 'Korean';
+        }
+    };
+
+    openContextMenu = (x: number, y: number, evalu: boolean) => {
         ContextMenu.Instance.clearItems();
         ContextMenu.Instance.addItem({ description: 'English', event: e => this.setLanguage(e, 'en-US', 0) }); //prettier-ignore
         ContextMenu.Instance.addItem({ description: 'Spanish', event: e => this.setLanguage(e, 'es-ES', 1 )}); //prettier-ignore
         ContextMenu.Instance.addItem({ description: 'French', event: e => this.setLanguage(e, 'fr-FR', 2) }); //prettier-ignore
         ContextMenu.Instance.addItem({ description: 'Italian', event: e => this.setLanguage(e, 'it-IT', 3) }); //prettier-ignore
-        ContextMenu.Instance.addItem({ description: 'Mandarin Chinese', event: e => this.setLanguage(e, 'zh-CH', 4) }); //prettier-ignore
+        if (!evalu) ContextMenu.Instance.addItem({ description: 'Mandarin Chinese', event: e => this.setLanguage(e, 'zh-CH', 4) }); //prettier-ignore
         ContextMenu.Instance.addItem({ description: 'Japanese', event: e => this.setLanguage(e, 'ja', 5) }); //prettier-ignore
         ContextMenu.Instance.addItem({ description: 'Korean', event: e => this.setLanguage(e, 'ko', 6) }); //prettier-ignore
         ContextMenu.Instance.displayMenu(x, y);
@@ -911,13 +914,19 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent<FieldViewProps>()
                             <div className="submit-button" style={{ overflow: 'hidden', display: 'flex', width: '100%' }}>
                                 <div
                                     className="submit-buttonschema-header-button"
-                                    onPointerDown={e => this.openContextMenu(e.clientX, e.clientY)}
+                                    onPointerDown={e => this.openContextMenu(e.clientX, e.clientY, false)}
                                     style={{ position: 'absolute', top: '5px', left: '11px', zIndex: '100', width: '5px', height: '5px', cursor: 'pointer' }}>
                                     <FontAwesomeIcon color={'white'} icon="caret-down" />
                                 </div>
                                 <button className="submit-buttonrecord" onClick={this._listening ? this.stopListening : this.startListening} style={{ background: this._listening ? 'lightgray' : '', borderRadius: '2px' }}>
                                     {<FontAwesomeIcon icon="microphone" size="lg" />}
                                 </button>
+                                <div
+                                    className="submit-buttonschema-header-button"
+                                    onPointerDown={e => this.openContextMenu(e.clientX, e.clientY, true)}
+                                    style={{ position: 'absolute', top: '5px', left: '50px', zIndex: '100', width: '5px', height: '5px', cursor: 'pointer' }}>
+                                    <FontAwesomeIcon color={'white'} icon="caret-down" />
+                                </div>
                                 <button
                                     className="submit-buttonpronunciation"
                                     onClick={this.evaluatePronunciation}
@@ -930,7 +939,7 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent<FieldViewProps>()
                                         Submit
                                     </button>
                                 ) : (
-                                    <button className="submit-buttonsubmit" type="button" onClick={this.handleRenderClick} style={{ borderRadius: '2px', marginBottom: '3px', width: '100%' }}>
+                                    <button className="submit-buttonsubmit" type="button" onClick={this.handleRenderClick} style={{ display: 'inline-flex', alignItems: 'center', borderRadius: '2px', marginBottom: '3px', width: '100%' }}>
                                         Redo the Question
                                     </button>
                                 )}
diff --git a/src/client/views/nodes/ImageBox.tsx b/src/client/views/nodes/ImageBox.tsx
index ab7605829..faf96d616 100644
--- a/src/client/views/nodes/ImageBox.tsx
+++ b/src/client/views/nodes/ImageBox.tsx
@@ -11,7 +11,6 @@ import { DocData } from '../../../fields/DocSymbols';
 import { Id } from '../../../fields/FieldSymbols';
 import { InkTool } from '../../../fields/InkField';
 import { ObjectField } from '../../../fields/ObjectField';
-import { Cast, ImageCast, NumCast, StrCast } from '../../../fields/Types';
 import { ImageField } from '../../../fields/URLField';
 import { TraceMobx } from '../../../fields/util';
 import { emptyFunction } from '../../../Utils';
@@ -33,6 +32,7 @@ import { StyleProp } from '../StyleProp';
 import { DocumentView } from './DocumentView';
 import { FieldView, FieldViewProps } from './FieldView';
 import { FocusViewOptions } from './FocusViewOptions';
+import { DocCast, NumCast, RTFCast, StrCast, ImageCast, Cast, toList } from '../../../fields/Types';
 import './ImageBox.scss';
 import { OpenWhere } from './OpenWhere';
 import { URLField } from '../../../fields/URLField';
@@ -43,7 +43,9 @@ import { basename } from 'path';
 import { ImageUtility } from './generativeFill/generativeFillUtils/ImageHandler';
 import { dropActionType } from '../../util/DropActionTypes';
 import { canvasSize } from './generativeFill/generativeFillUtils/generativeFillConstants';
+import Tesseract from 'tesseract.js';
 import axios from 'axios';
+import { TupleType } from 'typescript';
 
 export class ImageEditorData {
     // eslint-disable-next-line no-use-before-define
@@ -354,6 +356,49 @@ export class ImageBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         }
     };
 
+    pushInfo = async () => {
+        const formData = new FormData();
+
+        const img = {
+            file: this.paths[0],
+        };
+        const response = await axios.post('http://localhost:105/labels/', img, {
+            headers: {
+                'Content-Type': 'application/json',
+            },
+        });
+
+        console.log('RESPONSE:');
+        console.log(response.data['boxes']);
+        console.log(response.data['text']);
+        this.createBoxes(response.data['boxes'], response.data['text']);
+    };
+
+    createBoxes = (boxes: [[[number, number]]], texts: [string]) => {
+        for (var i = 0; i < boxes.length; i++) {
+            const coords = boxes[i] ? boxes[i] : [];
+            const width = coords[1][0] - coords[0][0];
+            const height = coords[2][1] - coords[0][1];
+            const text = texts[i];
+
+            const newCol = Docs.Create.TextDocument('', {
+                _width: width,
+                //width * NumCast(this.dataDoc[this.fieldKey + '_nativeWidth']),
+                _height: height,
+                //height * NumCast(this.dataDoc[this.fieldKey + '_nativeHeight']),
+                _layout_fitWidth: true,
+                // _layout_autoHeight: true,
+            });
+            newCol.x = coords[0][0];
+            newCol.y = coords[0][1];
+            // newCol.x = x * NumCast(this.dataDoc[this.fieldKey + '_nativeWidth']);
+            // newCol.y = y * NumCast(this.dataDoc[this.fieldKey + '_nativeHeight']);
+            newCol.zIndex = 1000;
+            newCol.forceActive = true;
+            newCol.quiz = text;
+            this.addDocument(newCol);
+        }
+    };
     // static imageUrlToBase64 = async (imageUrl: string): Promise<string> => {
     //     try {
     //         const response = await fetch(imageUrl);
@@ -405,6 +450,36 @@ export class ImageBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         this._loading = false;
     };
 
+    getText = () => {
+        console.log(StrCast(RTFCast(DocCast(this.dataDoc[this.fieldKey + '_1']).text)?.Text));
+    };
+
+    getImageLabels2 = async () => {
+        this._loading = true;
+        try {
+            // const hrefBase64 = await this.createCanvas();
+            // const hw = await gptImageLabel(hrefBase64, 'Find the image dimensions. Return as height and width.');
+            // const response = await gptImageLabel(
+            //     hrefBase64,
+            //     //'What is the height and width of the image'
+            //     'For each group of words in the image, find the x-coordinate and ycoordinate of the top left corner. Find the width and height of the group. Return this information in this format with the correct information replacing the underscores: "observed word: _, x: _, y: _, width: _, height: _," No additional text, asterisks and put it all in one line. Divide the x and width by the width of the image. Divide the y and the height by the height of the image.'
+            // );
+            // console.log(hw);
+            // console.log('RESPONSE: ' + response);
+            // this.createTextboxes(response);
+            Tesseract.recognize(this.paths[0], 'eng', {
+                logger: m => console.log(m),
+            }).then(({ data: { text, words } }) => {
+                console.log('OCR Result:', text);
+                console.log('Words with bounding boxes:', words);
+            });
+            //AnchorMenu.Instance.transferToFlashcard(response, NumCast(this.layoutDoc['x']), NumCast(this.layoutDoc['y']));
+        } catch (error) {
+            console.log('Error');
+        }
+        this._loading = false;
+    };
+
     createTextboxes = (response: string) => {
         const groups = response.replace('*', '').toLowerCase().split('observed word: ');
         groups.shift();
@@ -450,27 +525,14 @@ export class ImageBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         this._imageRef = iref;
     };
 
-    pushInfo = async () => {
-        const formData = new FormData();
-
-        const newArticle = {
-            file: '/files/audio/6b412a6222d631a7fff8a8320.mp3',
-        };
-        const response = await axios.post('http://localhost:105/recognize/', newArticle, {
-            headers: {
-                'Content-Type': 'application/json',
-            },
-        });
-        console.log('RESPONSE: ' + response.data['transcription']);
-    };
-
     specificContextMenu = (): void => {
         const field = Cast(this.dataDoc[this.fieldKey], ImageField);
         if (field) {
             const funcs: ContextMenuProps[] = [];
             // funcs.push({ description: 'Create ai flashcards', event: () => this.getImageDesc(), icon: 'id-card' });
-            // funcs.push({ description: 'Push info', event: this.pushInfo, icon: 'redo-alt' });
-            funcs.push({ description: 'Get Labels', event: this.getImageLabels, icon: 'redo-alt' });
+            funcs.push({ description: 'Push info', event: this.pushInfo, icon: 'redo-alt' });
+            // funcs.push({ description: 'Get Labels2', event: this.getImageLabels2, icon: 'redo-alt' });
+            // funcs.push({ description: 'Get Labels', event: this.getImageLabels, icon: 'redo-alt' });
             funcs.push({ description: 'Rotate Clockwise 90', event: this.rotate, icon: 'redo-alt' });
             funcs.push({ description: `Show ${this.layoutDoc._showFullRes ? 'Dynamic Res' : 'Full Res'}`, event: this.resolution, icon: 'expand' });
             funcs.push({ description: 'Set Native Pixel Size', event: this.setNativeSize, icon: 'expand-arrows-alt' });
diff --git a/src/fields/.PresField.ts.icloud b/src/fields/.PresField.ts.icloud
new file mode 100644
index 000000000..e63a55cc1
--- /dev/null
+++ b/src/fields/.PresField.ts.icloud