diff options
Diffstat (limited to 'src/client/views/nodes/ChatBox/AnswerParser.ts')
-rw-r--r-- | src/client/views/nodes/ChatBox/AnswerParser.ts | 84 |
1 files changed, 63 insertions, 21 deletions
diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 1162d46b0..4b6c817fd 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -1,12 +1,13 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType } from './types'; import { v4 as uuid } from 'uuid'; export class AnswerParser { static parse(xml: string): AssistantMessage { const answerRegex = /<answer>([\s\S]*?)<\/answer>/; - const citationRegex = /<citation chunk_id="([^"]+)" type="([^"]+)">(.*?)<\/citation>/g; + const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g; const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/; const questionRegex = /<question>(.*?)<\/question>/g; + const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g; const answerMatch = answerRegex.exec(xml); const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); @@ -16,45 +17,86 @@ export class AnswerParser { } const rawTextContent = answerMatch[1].trim(); - const textContentWithCitations = rawTextContent.replace(citationRegex, ''); - const textContent = textContentWithCitations.replace(followUpQuestionsRegex, '').trim(); - + let textContent: AssistantMessage['content'] = []; let citations: Citation[] = []; - let match: RegExpExecArray | null; - - let plainTextOffset = 0; - let citationOffset = 0; - - while ((match = citationRegex.exec(rawTextContent)) !== null) { - const [fullMatch, chunk_id, type, direct_text] = match; - const citationStartIndex = match.index; - const citationPlainStart = citationStartIndex - citationOffset; + let contentIndex = 0; + // Parse citations + let citationMatch; + while ((citationMatch = citationRegex.exec(rawTextContent)) !== null) { + const [_, index, chunk_id, type, direct_text] = citationMatch; citations.push({ direct_text: direct_text.trim(), type: getChunkType(type), - chunk_id: chunk_id, - text_location: citationPlainStart, + chunk_id, citation_id: uuid(), }); + } - citationOffset += fullMatch.length; + // Parse text content (normal and grounded) + let lastIndex = 0; + let matches = []; + + // Find all grounded text matches + let groundedTextMatch; + while ((groundedTextMatch = groundedTextRegex.exec(rawTextContent)) !== null) { + matches.push({ + type: 'grounded', + index: groundedTextMatch.index, + length: groundedTextMatch[0].length, + citationIndexes: groundedTextMatch[1], + text: groundedTextMatch[2], + }); + } + + // Sort matches by their index in the original text + matches.sort((a, b) => a.index - b.index); + + // Process normal and grounded text in order + for (let i = 0; i <= matches.length; i++) { + const currentMatch = matches[i]; + const nextMatchIndex = currentMatch ? currentMatch.index : rawTextContent.length; + + // Add normal text before the current grounded text (or end of content) + if (nextMatchIndex > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, nextMatchIndex).trim(); + if (normalText) { + textContent.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text if there's a match + if (currentMatch) { + const citationIds = currentMatch.citationIndexes.split(',').map(index => citations[parseInt(index) - 1].citation_id); + textContent.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: currentMatch.text.trim(), + citation_ids: citationIds, + }); + lastIndex = currentMatch.index + currentMatch.length; + } } let followUpQuestions: string[] = []; if (followUpQuestionsMatch) { const questionsText = followUpQuestionsMatch[1]; - let questionMatch: RegExpExecArray | null; - + let questionMatch; while ((questionMatch = questionRegex.exec(questionsText)) !== null) { followUpQuestions.push(questionMatch[1].trim()); } } + const assistantResponse: AssistantMessage = { role: ASSISTANT_ROLE.ASSISTANT, - text_content: textContent, + content: textContent, follow_up_questions: followUpQuestions, - citations: citations, + citations, }; return assistantResponse; |