aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/ChatBox/AnswerParser.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/ChatBox/AnswerParser.ts')
-rw-r--r--src/client/views/nodes/ChatBox/AnswerParser.ts84
1 files changed, 63 insertions, 21 deletions
diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts
index 1162d46b0..4b6c817fd 100644
--- a/src/client/views/nodes/ChatBox/AnswerParser.ts
+++ b/src/client/views/nodes/ChatBox/AnswerParser.ts
@@ -1,12 +1,13 @@
-import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types';
+import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType } from './types';
import { v4 as uuid } from 'uuid';
export class AnswerParser {
static parse(xml: string): AssistantMessage {
const answerRegex = /<answer>([\s\S]*?)<\/answer>/;
- const citationRegex = /<citation chunk_id="([^"]+)" type="([^"]+)">(.*?)<\/citation>/g;
+ const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g;
const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/;
const questionRegex = /<question>(.*?)<\/question>/g;
+ const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g;
const answerMatch = answerRegex.exec(xml);
const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml);
@@ -16,45 +17,86 @@ export class AnswerParser {
}
const rawTextContent = answerMatch[1].trim();
- const textContentWithCitations = rawTextContent.replace(citationRegex, '');
- const textContent = textContentWithCitations.replace(followUpQuestionsRegex, '').trim();
-
+ let textContent: AssistantMessage['content'] = [];
let citations: Citation[] = [];
- let match: RegExpExecArray | null;
-
- let plainTextOffset = 0;
- let citationOffset = 0;
-
- while ((match = citationRegex.exec(rawTextContent)) !== null) {
- const [fullMatch, chunk_id, type, direct_text] = match;
- const citationStartIndex = match.index;
- const citationPlainStart = citationStartIndex - citationOffset;
+ let contentIndex = 0;
+ // Parse citations
+ let citationMatch;
+ while ((citationMatch = citationRegex.exec(rawTextContent)) !== null) {
+ const [_, index, chunk_id, type, direct_text] = citationMatch;
citations.push({
direct_text: direct_text.trim(),
type: getChunkType(type),
- chunk_id: chunk_id,
- text_location: citationPlainStart,
+ chunk_id,
citation_id: uuid(),
});
+ }
- citationOffset += fullMatch.length;
+ // Parse text content (normal and grounded)
+ let lastIndex = 0;
+ let matches = [];
+
+ // Find all grounded text matches
+ let groundedTextMatch;
+ while ((groundedTextMatch = groundedTextRegex.exec(rawTextContent)) !== null) {
+ matches.push({
+ type: 'grounded',
+ index: groundedTextMatch.index,
+ length: groundedTextMatch[0].length,
+ citationIndexes: groundedTextMatch[1],
+ text: groundedTextMatch[2],
+ });
+ }
+
+ // Sort matches by their index in the original text
+ matches.sort((a, b) => a.index - b.index);
+
+ // Process normal and grounded text in order
+ for (let i = 0; i <= matches.length; i++) {
+ const currentMatch = matches[i];
+ const nextMatchIndex = currentMatch ? currentMatch.index : rawTextContent.length;
+
+ // Add normal text before the current grounded text (or end of content)
+ if (nextMatchIndex > lastIndex) {
+ const normalText = rawTextContent.slice(lastIndex, nextMatchIndex).trim();
+ if (normalText) {
+ textContent.push({
+ index: contentIndex++,
+ type: TEXT_TYPE.NORMAL,
+ text: normalText,
+ citation_ids: null,
+ });
+ }
+ }
+
+ // Add grounded text if there's a match
+ if (currentMatch) {
+ const citationIds = currentMatch.citationIndexes.split(',').map(index => citations[parseInt(index) - 1].citation_id);
+ textContent.push({
+ index: contentIndex++,
+ type: TEXT_TYPE.GROUNDED,
+ text: currentMatch.text.trim(),
+ citation_ids: citationIds,
+ });
+ lastIndex = currentMatch.index + currentMatch.length;
+ }
}
let followUpQuestions: string[] = [];
if (followUpQuestionsMatch) {
const questionsText = followUpQuestionsMatch[1];
- let questionMatch: RegExpExecArray | null;
-
+ let questionMatch;
while ((questionMatch = questionRegex.exec(questionsText)) !== null) {
followUpQuestions.push(questionMatch[1].trim());
}
}
+
const assistantResponse: AssistantMessage = {
role: ASSISTANT_ROLE.ASSISTANT,
- text_content: textContent,
+ content: textContent,
follow_up_questions: followUpQuestions,
- citations: citations,
+ citations,
};
return assistantResponse;