diff options
Diffstat (limited to 'src/client/views/nodes/ChatBox/AnswerParser.ts')
-rw-r--r-- | src/client/views/nodes/ChatBox/AnswerParser.ts | 93 |
1 files changed, 63 insertions, 30 deletions
diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index dd7ec3499..9956792d8 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -4,62 +4,95 @@ import { v4 as uuid } from 'uuid'; export class AnswerParser { static parse(xml: string): AssistantMessage { const answerRegex = /<answer>([\s\S]*?)<\/answer>/; + const citationsRegex = /<citations>([\s\S]*?)<\/citations>/; const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g; const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/; const questionRegex = /<question>(.*?)<\/question>/g; const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g; const answerMatch = answerRegex.exec(xml); + const citationsMatch = citationsRegex.exec(xml); const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); if (!answerMatch) { throw new Error('Invalid XML: Missing <answer> tag.'); } - const rawTextContent = answerMatch[1].trim(); + let rawTextContent = answerMatch[1].trim(); let content: AssistantMessage['content'] = []; let citations: Citation[] = []; let contentIndex = 0; + // Remove citations and follow-up questions from rawTextContent + if (citationsMatch) { + rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); + } + if (followUpQuestionsMatch) { + rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); + } + // Parse citations let citationMatch; const citationMap = new Map<string, string>(); - while ((citationMatch = citationRegex.exec(rawTextContent)) !== null) { - const [_, index, chunk_id, type, direct_text] = citationMatch; - const citation_id = uuid(); - citationMap.set(index, citation_id); - citations.push({ - direct_text: direct_text.trim(), - type: getChunkType(type), - chunk_id, - citation_id, - }); + if (citationsMatch) { + const citationsContent = citationsMatch[1]; + while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { + const [_, index, chunk_id, type, direct_text] = citationMatch; + const citation_id = uuid(); + citationMap.set(index, citation_id); + citations.push({ + direct_text: direct_text.trim(), + type: getChunkType(type), + chunk_id, + citation_id, + }); + } } - // Parse grounded text content - const parseGroundedText = (text: string): AssistantMessage['content'] => { - const result: AssistantMessage['content'] = []; - let lastIndex = 0; - let match; + // Parse text content (normal and grounded) + let lastIndex = 0; + let match; - while ((match = groundedTextRegex.exec(text)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); - - result.push({ - index: contentIndex++, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids, - }); + while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; - lastIndex = match.index + fullMatch.length; + // Add normal text before the grounded text + if (match.index > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, match.index).trim(); + if (normalText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } } - return result; - }; + // Add grounded text + const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); + content.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids, + }); + + lastIndex = match.index + fullMatch.length; + } - content = parseGroundedText(rawTextContent); + // Add any remaining normal text after the last grounded text + if (lastIndex < rawTextContent.length) { + const remainingText = rawTextContent.slice(lastIndex).trim(); + if (remainingText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } let followUpQuestions: string[] = []; if (followUpQuestionsMatch) { |