diff --git a/content/yarn-spinner.md b/content/yarn-spinner.md index 69a41eb..5dfef30 100644 --- a/content/yarn-spinner.md +++ b/content/yarn-spinner.md @@ -2,7 +2,7 @@ Yarn Spinner是一种文字冒险文本格式,可用于描述ttrpg冒险。 -不要使用`markdown`格式语法,如标题,加粗等。只使用纯文本描述冒险内容。 +不要使用`markdown`格式语法,如标题,加粗等。冒号也有特殊含义(代表台词)。只使用纯文本描述冒险内容。 ## 基本结构 diff --git a/src/yarn-spinner/compile/compiler.ts b/src/yarn-spinner/compile/compiler.ts new file mode 100644 index 0000000..a8fce24 --- /dev/null +++ b/src/yarn-spinner/compile/compiler.ts @@ -0,0 +1,182 @@ +import type { YarnDocument, Statement, Line, Option } from "../model/ast"; +import type { IRProgram, IRNode, IRNodeGroup, IRInstruction } from "./ir"; + +export interface CompileOptions { + generateOnceIds?: (ctx: { node: string; index: number }) => string; +} + +export function compile(doc: YarnDocument, opts: CompileOptions = {}): IRProgram { + const program: IRProgram = { enums: {}, nodes: {} }; + // Store enum definitions + for (const enumDef of doc.enums) { + program.enums[enumDef.name] = enumDef.cases; + } + const genOnce = opts.generateOnceIds ?? ((x) => `${x.node}#once#${x.index}`); + let globalLineCounter = 0; + + function ensureLineId(tags?: string[]): string[] | undefined { + const t = tags ? [...tags] : []; + if (!t.some((x) => x.startsWith("line:"))) { + t.push(`line:${(globalLineCounter++).toString(16)}`); + } + return t; + } + + // Group nodes by title to handle node groups + const nodesByTitle = new Map(); + for (const node of doc.nodes) { + if (!nodesByTitle.has(node.title)) { + nodesByTitle.set(node.title, []); + } + nodesByTitle.get(node.title)!.push(node); + } + + for (const [title, nodesWithSameTitle] of nodesByTitle) { + // If only one node with this title, treat as regular node + if (nodesWithSameTitle.length === 1) { + const node = nodesWithSameTitle[0]; + const instructions: IRInstruction[] = []; + let onceCounter = 0; + function emitBlock(stmts: Statement[]): IRInstruction[] { + const block: IRInstruction[] = []; + for (const s of stmts) { + switch (s.type) { + case "Line": + { + const line = s as Line; + block.push({ op: "line", speaker: line.speaker, text: line.text, tags: ensureLineId(line.tags), markup: line.markup }); + } + break; + case "Command": + block.push({ op: "command", content: s.content }); + break; + case "Jump": + block.push({ op: "jump", target: s.target }); + break; + case "Detour": + block.push({ op: "detour", target: s.target }); + break; + case "OptionGroup": { + // Add #lastline tag to the most recent line, if present + for (let i = block.length - 1; i >= 0; i--) { + const ins = block[i]; + if (ins.op === "line") { + const tags = new Set(ins.tags ?? []); + if (![...tags].some((x) => x === "lastline" || x === "#lastline")) { + tags.add("lastline"); + } + ins.tags = Array.from(tags); + break; + } + if (ins.op !== "command") break; // stop if non-line non-command before options + } + block.push({ + op: "options", + options: s.options.map((o: Option) => ({ text: o.text, tags: ensureLineId(o.tags), css: (o as any).css, markup: o.markup, condition: o.condition, block: emitBlock(o.body) })), + }); + break; + } + case "If": + block.push({ + op: "if", + branches: s.branches.map((b) => ({ condition: b.condition, block: emitBlock(b.body) })), + }); + break; + case "Once": + block.push({ op: "once", id: genOnce({ node: node.title, index: onceCounter++ }), block: emitBlock(s.body) }); + break; + case "Enum": + // Enums are metadata, skip during compilation (already stored in program.enums) + break; + } + } + return block; + } + instructions.push(...emitBlock(node.body)); + const irNode: IRNode = { + title: node.title, + instructions, + when: node.when, + css: (node as any).css, + scene: node.headers.scene?.trim() || undefined + }; + program.nodes[node.title] = irNode; + } else { + // Multiple nodes with same title - create node group + const groupNodes: IRNode[] = []; + for (const node of nodesWithSameTitle) { + const instructions: IRInstruction[] = []; + let onceCounter = 0; + function emitBlock(stmts: Statement[]): IRInstruction[] { + const block: IRInstruction[] = []; + for (const s of stmts) { + switch (s.type) { + case "Line": + { + const line = s as Line; + block.push({ op: "line", speaker: line.speaker, text: line.text, tags: ensureLineId(line.tags), markup: line.markup }); + } + break; + case "Command": + block.push({ op: "command", content: s.content }); + break; + case "Jump": + block.push({ op: "jump", target: s.target }); + break; + case "Detour": + block.push({ op: "detour", target: s.target }); + break; + case "OptionGroup": { + for (let i = block.length - 1; i >= 0; i--) { + const ins = block[i]; + if (ins.op === "line") { + const tags = new Set(ins.tags ?? []); + if (![...tags].some((x) => x === "lastline" || x === "#lastline")) { + tags.add("lastline"); + } + ins.tags = Array.from(tags); + break; + } + if (ins.op !== "command") break; + } + block.push({ + op: "options", + options: s.options.map((o: Option) => ({ text: o.text, tags: ensureLineId(o.tags), css: (o as any).css, markup: o.markup, condition: o.condition, block: emitBlock(o.body) })), + }); + break; + } + case "If": + block.push({ + op: "if", + branches: s.branches.map((b) => ({ condition: b.condition, block: emitBlock(b.body) })), + }); + break; + case "Once": + block.push({ op: "once", id: genOnce({ node: node.title, index: onceCounter++ }), block: emitBlock(s.body) }); + break; + case "Enum": + break; + } + } + return block; + } + instructions.push(...emitBlock(node.body)); + groupNodes.push({ + title: node.title, + instructions, + when: node.when, + css: (node as any).css, + scene: node.headers.scene?.trim() || undefined + }); + } + const group: IRNodeGroup = { + title, + nodes: groupNodes + }; + program.nodes[title] = group; + } + } + + return program; +} + diff --git a/src/yarn-spinner/compile/ir.ts b/src/yarn-spinner/compile/ir.ts new file mode 100644 index 0000000..38c8183 --- /dev/null +++ b/src/yarn-spinner/compile/ir.ts @@ -0,0 +1,28 @@ +import type { MarkupParseResult } from "../markup/types"; +export type IRProgram = { + enums: Record; // enum name -> cases + nodes: Record; // can be single node or group +}; + +export type IRNode = { + title: string; + instructions: IRInstruction[]; + when?: string[]; // Array of when conditions + css?: string; + scene?: string; // Scene name from node header +}; + +export type IRNodeGroup = { + title: string; + nodes: IRNode[]; // Multiple nodes with same title, different when conditions +}; + +export type IRInstruction = + | { op: "line"; speaker?: string; text: string; tags?: string[]; markup?: MarkupParseResult } + | { op: "command"; content: string } + | { op: "jump"; target: string } + | { op: "detour"; target: string } + | { op: "options"; options: Array<{ text: string; tags?: string[]; css?: string; markup?: MarkupParseResult; condition?: string; block: IRInstruction[] }> } + | { op: "if"; branches: Array<{ condition: string | null; block: IRInstruction[] }> } + | { op: "once"; id: string; block: IRInstruction[] }; + diff --git a/src/yarn-spinner/index.ts b/src/yarn-spinner/index.ts new file mode 100644 index 0000000..9da04a6 --- /dev/null +++ b/src/yarn-spinner/index.ts @@ -0,0 +1 @@ +export {parseYarn} from './parse/parser'; \ No newline at end of file diff --git a/src/yarn-spinner/markup/parser.ts b/src/yarn-spinner/markup/parser.ts new file mode 100644 index 0000000..93c7641 --- /dev/null +++ b/src/yarn-spinner/markup/parser.ts @@ -0,0 +1,381 @@ +import type { MarkupParseResult, MarkupSegment, MarkupValue, MarkupWrapper } from "./types"; + +const DEFAULT_HTML_TAGS = new Set(["b", "em", "small", "strong", "sub", "sup", "ins", "del", "mark", "br"]); +const SELF_CLOSING_TAGS = new Set(["br"]); + +interface StackEntry { + name: string; + type: MarkupWrapper["type"]; + properties: Record; + originalText: string; +} + +interface ParsedTag { + kind: "open" | "close" | "self"; + name: string; + properties: Record; +} + +const SELF_CLOSING_SPACE_REGEX = /\s+\/$/; +const ATTRIBUTE_REGEX = + /^([a-zA-Z_][a-zA-Z0-9_-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"']+)))?/; + +export function parseMarkup(input: string): MarkupParseResult { + const segments: MarkupSegment[] = []; + const stack: StackEntry[] = []; + const chars: string[] = []; + let currentSegment: MarkupSegment | null = null; + let nomarkupDepth = 0; + + const pushSegment = (segment: MarkupSegment) => { + if (segment.selfClosing || segment.end > segment.start) { + segments.push(segment); + } + }; + + const wrappersEqual = (a: MarkupWrapper[], b: MarkupWrapper[]) => { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + const wa = a[i]; + const wb = b[i]; + if (wa.name !== wb.name || wa.type !== wb.type) return false; + const keysA = Object.keys(wa.properties); + const keysB = Object.keys(wb.properties); + if (keysA.length !== keysB.length) return false; + for (const key of keysA) { + if (wa.properties[key] !== wb.properties[key]) return false; + } + } + return true; + }; + + const flushCurrentSegment = () => { + if (currentSegment) { + segments.push(currentSegment); + currentSegment = null; + } + }; + + const cloneWrappers = (): MarkupWrapper[] => + stack.map((entry) => ({ + name: entry.name, + type: entry.type, + properties: { ...entry.properties }, + })); + + const appendChar = (char: string) => { + const index = chars.length; + chars.push(char); + const wrappers = cloneWrappers(); + if (currentSegment && wrappersEqual(currentSegment.wrappers, wrappers)) { + currentSegment.end = index + 1; + } else { + flushCurrentSegment(); + currentSegment = { + start: index, + end: index + 1, + wrappers, + }; + } + }; + + const appendLiteral = (literal: string) => { + for (const ch of literal) { + appendChar(ch); + } + }; + + const parseTag = (contentRaw: string): ParsedTag | null => { + let content = contentRaw.trim(); + if (!content) return null; + + if (content.startsWith("/")) { + const name = content.slice(1).trim().toLowerCase(); + if (!name) return null; + return { kind: "close", name, properties: {} }; + } + + let kind: ParsedTag["kind"] = "open"; + if (content.endsWith("/")) { + content = content.replace(SELF_CLOSING_SPACE_REGEX, "").trim(); + if (content.endsWith("/")) { + content = content.slice(0, -1).trim(); + } + kind = "self"; + } + + const nameMatch = content.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)/); + if (!nameMatch) return null; + const name = nameMatch[1].toLowerCase(); + let rest = content.slice(nameMatch[0].length).trim(); + + const properties: Record = {}; + while (rest.length > 0) { + const attrMatch = rest.match(ATTRIBUTE_REGEX); + if (!attrMatch) { + break; + } + const [, keyRaw, doubleQuoted, singleQuoted, bare] = attrMatch; + const key = keyRaw.toLowerCase(); + let value: MarkupValue = true; + const rawValue = doubleQuoted ?? singleQuoted ?? bare; + if (rawValue !== undefined) { + value = parseAttributeValue(rawValue); + } + properties[key] = value; + rest = rest.slice(attrMatch[0].length).trim(); + } + + const finalKind: ParsedTag["kind"] = kind === "self" || SELF_CLOSING_TAGS.has(name) ? "self" : kind; + return { kind: finalKind, name, properties }; + }; + + const parseAttributeValue = (raw: string): MarkupValue => { + const trimmed = raw.trim(); + if (/^(true|false)$/i.test(trimmed)) { + return /^true$/i.test(trimmed); + } + if (/^[+-]?\d+(\.\d+)?$/.test(trimmed)) { + const num = Number(trimmed); + if (!Number.isNaN(num)) { + return num; + } + } + return trimmed; + }; + + const handleSelfClosing = (tag: ParsedTag) => { + const wrapper: MarkupWrapper = { + name: tag.name, + type: DEFAULT_HTML_TAGS.has(tag.name) ? "default" : "custom", + properties: tag.properties, + }; + const position = chars.length; + pushSegment({ + start: position, + end: position, + wrappers: [wrapper], + selfClosing: true, + }); + }; + + let i = 0; + while (i < input.length) { + const char = input[i]; + if (char === "\\" && i + 1 < input.length) { + const next = input[i + 1]; + if (next === "[" || next === "]" || next === "\\") { + appendChar(next); + i += 2; + continue; + } + } + + if (char === "[") { + const closeIndex = findClosingBracket(input, i + 1); + if (closeIndex === -1) { + appendChar(char); + i += 1; + continue; + } + const content = input.slice(i + 1, closeIndex); + const originalText = input.slice(i, closeIndex + 1); + + const parsed = parseTag(content); + if (!parsed) { + appendLiteral(originalText); + i = closeIndex + 1; + continue; + } + + if (parsed.name === "nomarkup") { + if (parsed.kind === "open") { + nomarkupDepth += 1; + } else if (parsed.kind === "close" && nomarkupDepth > 0) { + nomarkupDepth -= 1; + } + i = closeIndex + 1; + continue; + } + + if (nomarkupDepth > 0) { + appendLiteral(originalText); + i = closeIndex + 1; + continue; + } + + if (parsed.kind === "open") { + const entry: StackEntry = { + name: parsed.name, + type: DEFAULT_HTML_TAGS.has(parsed.name) ? "default" : "custom", + properties: parsed.properties, + originalText, + }; + stack.push(entry); + flushCurrentSegment(); + i = closeIndex + 1; + continue; + } + + if (parsed.kind === "self") { + handleSelfClosing(parsed); + i = closeIndex + 1; + continue; + } + + // closing tag + if (stack.length === 0) { + if (SELF_CLOSING_TAGS.has(parsed.name)) { + i = closeIndex + 1; + continue; + } + appendLiteral(originalText); + i = closeIndex + 1; + continue; + } + const top = stack[stack.length - 1]; + if (top.name === parsed.name) { + flushCurrentSegment(); + stack.pop(); + i = closeIndex + 1; + continue; + } + if (SELF_CLOSING_TAGS.has(parsed.name)) { + i = closeIndex + 1; + continue; + } + // mismatched closing; treat as literal + appendLiteral(originalText); + i = closeIndex + 1; + continue; + } + + appendChar(char); + i += 1; + } + + flushCurrentSegment(); + + // If any tags remain open, treat them as literal text appended at end + while (stack.length > 0) { + const entry = stack.pop()!; + appendLiteral(entry.originalText); + } + flushCurrentSegment(); + + const text = chars.join(""); + return { + text, + segments: mergeSegments(segments, text.length), + }; +} + +function mergeSegments(segments: MarkupSegment[], textLength: number): MarkupSegment[] { + const sorted = [...segments].sort((a, b) => a.start - b.start || a.end - b.end); + const merged: MarkupSegment[] = []; + let last: MarkupSegment | null = null; + + for (const seg of sorted) { + if (seg.start === seg.end && !seg.selfClosing) { + continue; + } + if (last && !seg.selfClosing && last.end === seg.start && wrappersMatch(last.wrappers, seg.wrappers)) { + last.end = seg.end; + } else { + last = { + start: seg.start, + end: seg.end, + wrappers: seg.wrappers, + selfClosing: seg.selfClosing, + }; + merged.push(last); + } + } + + if (merged.length === 0 && textLength > 0) { + merged.push({ + start: 0, + end: textLength, + wrappers: [], + }); + } + + return merged; +} + +function wrappersMatch(a: MarkupWrapper[], b: MarkupWrapper[]): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i].name !== b[i].name || a[i].type !== b[i].type) return false; + const keysA = Object.keys(a[i].properties); + const keysB = Object.keys(b[i].properties); + if (keysA.length !== keysB.length) return false; + for (const key of keysA) { + if (a[i].properties[key] !== b[i].properties[key]) return false; + } + } + return true; +} + +function findClosingBracket(text: string, start: number): number { + for (let i = start; i < text.length; i++) { + if (text[i] === "]") { + let backslashCount = 0; + let j = i - 1; + while (j >= 0 && text[j] === "\\") { + backslashCount++; + j--; + } + if (backslashCount % 2 === 0) { + return i; + } + } + } + return -1; +} + +export function sliceMarkup(result: MarkupParseResult, start: number, end?: number): MarkupParseResult { + const textLength = result.text.length; + const sliceStart = Math.max(0, Math.min(start, textLength)); + const sliceEnd = end === undefined ? textLength : Math.max(sliceStart, Math.min(end, textLength)); + const slicedSegments: MarkupSegment[] = []; + + for (const seg of result.segments) { + const segStart = Math.max(seg.start, sliceStart); + const segEnd = Math.min(seg.end, sliceEnd); + if (seg.selfClosing) { + if (segStart >= sliceStart && segStart <= sliceEnd) { + slicedSegments.push({ + start: segStart - sliceStart, + end: segStart - sliceStart, + wrappers: seg.wrappers, + selfClosing: true, + }); + } + continue; + } + if (segEnd <= segStart) continue; + slicedSegments.push({ + start: segStart - sliceStart, + end: segEnd - sliceStart, + wrappers: seg.wrappers.map((wrapper) => ({ + name: wrapper.name, + type: wrapper.type, + properties: { ...wrapper.properties }, + })), + }); + } + + if (slicedSegments.length === 0 && sliceEnd - sliceStart > 0) { + slicedSegments.push({ + start: 0, + end: sliceEnd - sliceStart, + wrappers: [], + }); + } + + return { + text: result.text.slice(sliceStart, sliceEnd), + segments: mergeSegments(slicedSegments, sliceEnd - sliceStart), + }; +} diff --git a/src/yarn-spinner/markup/types.ts b/src/yarn-spinner/markup/types.ts new file mode 100644 index 0000000..bab3cbf --- /dev/null +++ b/src/yarn-spinner/markup/types.ts @@ -0,0 +1,21 @@ +export type MarkupValue = string | number | boolean; + +export type MarkupWrapperType = "default" | "custom"; + +export interface MarkupWrapper { + name: string; + type: MarkupWrapperType; + properties: Record; +} + +export interface MarkupSegment { + start: number; + end: number; + wrappers: MarkupWrapper[]; + selfClosing?: boolean; +} + +export interface MarkupParseResult { + text: string; + segments: MarkupSegment[]; +} diff --git a/src/yarn-spinner/model/ast.ts b/src/yarn-spinner/model/ast.ts new file mode 100644 index 0000000..ecbd1d3 --- /dev/null +++ b/src/yarn-spinner/model/ast.ts @@ -0,0 +1,97 @@ +export type Position = { line: number; column: number }; + +export interface NodeHeaderMap { + [key: string]: string; +} + +export interface YarnDocument { + type: "Document"; + enums: EnumDefinition[]; + nodes: YarnNode[]; +} + +export interface EnumDefinition { + type: "Enum"; + name: string; + cases: string[]; +} + +export interface YarnNode { + type: "Node"; + title: string; + headers: NodeHeaderMap; + nodeTags?: string[]; + when?: string[]; // Array of when conditions (can be "once", "always", or expression like "$has_sword") + css?: string; // Custom CSS style for node + body: Statement[]; +} + +export type Statement = + | Line + | Command + | OptionGroup + | IfBlock + | OnceBlock + | Jump + | Detour + | EnumBlock; + +import type { MarkupParseResult } from "../markup/types.js"; + +export interface Line { + type: "Line"; + speaker?: string; + text: string; + tags?: string[]; + markup?: MarkupParseResult; +} + +export interface Command { + type: "Command"; + content: string; // inside << >> +} + +export interface Jump { + type: "Jump"; + target: string; +} + +export interface Detour { + type: "Detour"; + target: string; +} + +export interface OptionGroup { + type: "OptionGroup"; + options: Option[]; +} + +export interface Option { + type: "Option"; + text: string; + body: Statement[]; // executed if chosen + tags?: string[]; + css?: string; // Custom CSS style for option + markup?: MarkupParseResult; + condition?: string; +} + +export interface IfBlock { + type: "If"; + branches: Array<{ + condition: string | null; // null for else + body: Statement[]; + }>; +} + +export interface OnceBlock { + type: "Once"; + body: Statement[]; +} + +export interface EnumBlock { + type: "Enum"; + name: string; + cases: string[]; +} + diff --git a/src/yarn-spinner/parse/lexer.ts b/src/yarn-spinner/parse/lexer.ts new file mode 100644 index 0000000..d4a95c8 --- /dev/null +++ b/src/yarn-spinner/parse/lexer.ts @@ -0,0 +1,107 @@ +export interface Token { + type: + | "HEADER_KEY" + | "HEADER_VALUE" + | "NODE_START" // --- + | "NODE_END" // === + | "OPTION" // -> + | "COMMAND" // <<...>> (single-line) + | "TEXT" // any non-empty content line + | "EMPTY" + | "INDENT" + | "DEDENT" + | "EOF"; + text: string; + line: number; + column: number; +} + +// Minimal indentation-sensitive lexer to support options and their bodies. +export function lex(input: string): Token[] { + const lines = input.replace(/\r\n?/g, "\n").split("\n"); + const tokens: Token[] = []; + const indentStack: number[] = [0]; + + let inHeaders = true; + + function push(type: Token["type"], text: string, line: number, column: number) { + tokens.push({ type, text, line, column }); + } + + for (let i = 0; i < lines.length; i++) { + const raw = lines[i]; + const lineNum = i + 1; + const indent = raw.match(/^[ \t]*/)?.[0] ?? ""; + const content = raw.slice(indent.length); + + if (content.trim() === "") { + push("EMPTY", "", lineNum, 1); + continue; + } + + // Manage indentation tokens only within node bodies and on non-empty lines + if (!inHeaders) { + const prev = indentStack[indentStack.length - 1]; + if (indent.length > prev) { + indentStack.push(indent.length); + push("INDENT", "", lineNum, 1); + } else if (indent.length < prev) { + while (indentStack.length && indent.length < indentStack[indentStack.length - 1]) { + indentStack.pop(); + push("DEDENT", "", lineNum, 1); + } + } + } + + if (content === "---") { + inHeaders = false; + push("NODE_START", content, lineNum, indent.length + 1); + continue; + } + if (content === "===") { + inHeaders = true; + // flush indentation to root + while (indentStack.length > 1) { + indentStack.pop(); + push("DEDENT", "", lineNum, 1); + } + push("NODE_END", content, lineNum, indent.length + 1); + continue; + } + + // Header: key: value (only valid while inHeaders) + if (inHeaders) { + const m = content.match(/^([A-Za-z_][A-Za-z0-9_]*)\s*:\s*(.*)$/); + if (m) { + push("HEADER_KEY", m[1], lineNum, indent.length + 1); + push("HEADER_VALUE", m[2], lineNum, indent.length + 1 + m[0].indexOf(m[2])); + continue; + } + } + + if (content.startsWith("->")) { + push("OPTION", content.slice(2).trim(), lineNum, indent.length + 1); + continue; + } + + // Commands like <<...>> (single line) + const cmd = content.match(/^<<(.+?)>>\s*$/); + if (cmd) { + push("COMMAND", cmd[1].trim(), lineNum, indent.length + 1); + continue; + } + + // Plain text line + push("TEXT", content, lineNum, indent.length + 1); + } + + // close remaining indentation at EOF + while (indentStack.length > 1) { + indentStack.pop(); + tokens.push({ type: "DEDENT", text: "", line: lines.length, column: 1 }); + } + + tokens.push({ type: "EOF", text: "", line: lines.length + 1, column: 1 }); + return tokens; +} + diff --git a/src/yarn-spinner/parse/parser.ts b/src/yarn-spinner/parse/parser.ts new file mode 100644 index 0000000..afa3a43 --- /dev/null +++ b/src/yarn-spinner/parse/parser.ts @@ -0,0 +1,497 @@ +import { lex, Token } from "./lexer"; +import { parseMarkup, sliceMarkup } from "../markup/parser"; +import type { MarkupParseResult } from "../markup/types"; +import type { + YarnDocument, + YarnNode, + Statement, + Line, + Command, + OptionGroup, + Option, + IfBlock, + OnceBlock, + Jump, + Detour, + EnumBlock, +} from "../model/ast"; + +export class ParseError extends Error {} + +export function parseYarn(text: string): YarnDocument { + const tokens = lex(text); + const p = new Parser(tokens); + return p.parseDocument(); +} + +class Parser { + private i = 0; + constructor(private readonly tokens: Token[]) {} + + private peek(offset = 0) { + return this.tokens[this.i + offset]; + } + private at(type: Token["type"]) { + return this.peek()?.type === type; + } + private take(type: Token["type"], err?: string): Token { + const t = this.peek(); + if (!t || t.type !== type) throw new ParseError(err ?? `Expected ${type}, got ${t?.type}`); + this.i++; + return t; + } + private takeIf(type: Token["type"]) { + if (this.at(type)) return this.take(type); + return null; + } + + parseDocument(): YarnDocument { + const enums: EnumBlock[] = []; + const nodes: YarnNode[] = []; + while (!this.at("EOF")) { + // Skip empties + while (this.at("EMPTY")) this.i++; + if (this.at("EOF")) break; + + // Check if this is an enum definition (top-level) + if (this.at("COMMAND")) { + const cmd = this.peek().text.trim(); + if (cmd.startsWith("enum ")) { + const enumCmd = this.take("COMMAND").text; // consume the enum command + const enumName = enumCmd.slice(5).trim(); + const enumDef = this.parseEnumBlock(enumName); + enums.push(enumDef); + continue; + } + } + + nodes.push(this.parseNode()); + } + return { type: "Document", enums, nodes }; + } + + private parseNode(): YarnNode { + const headers: Record = {}; + let title: string | null = null; + let nodeTags: string[] | undefined; + let whenConditions: string[] = []; + let nodeCss: string | undefined; + + // headers + while (!this.at("NODE_START")) { + const keyTok = this.take("HEADER_KEY", "Expected node header before '---'"); + const valTok = this.take("HEADER_VALUE", "Expected header value"); + if (keyTok.text === "title") title = valTok.text.trim(); + if (keyTok.text === "tags") { + const raw = valTok.text.trim(); + nodeTags = raw.split(/\s+/).filter(Boolean); + } + if (keyTok.text === "when") { + // Each when: header adds one condition (can have multiple when: headers) + const raw = valTok.text.trim(); + whenConditions.push(raw); + } + // Capture &css{ ... } styles in any header value + const rawVal = valTok.text.trim(); + if (rawVal.startsWith("&css{")) { + // Collect until closing '}' possibly spanning multiple lines before '---' + let cssContent = rawVal.replace(/^&css\{/, ""); + let closed = cssContent.includes("}"); + if (closed) { + cssContent = cssContent.split("}")[0]; + } else { + // Consume subsequent TEXT or HEADER_VALUE tokens until we find a '}' + while (!this.at("NODE_START") && !this.at("EOF")) { + const next = this.peek(); + if (next.type === "TEXT" || next.type === "HEADER_VALUE") { + const t = this.take(next.type).text; + if (t.includes("}")) { + cssContent += (cssContent ? "\n" : "") + t.split("}")[0]; + closed = true; + break; + } else { + cssContent += (cssContent ? "\n" : "") + t; + } + } else if (next.type === "EMPTY") { + this.i++; + } else { + break; + } + } + } + nodeCss = (cssContent || "").trim(); + } + headers[keyTok.text] = valTok.text; + // allow empty lines + while (this.at("EMPTY")) this.i++; + } + if (!title) throw new ParseError("Every node must have a title header"); + this.take("NODE_START"); + // allow optional empties after --- + while (this.at("EMPTY")) this.i++; + + const body: Statement[] = this.parseStatementsUntil("NODE_END"); + this.take("NODE_END", "Expected node end '==='"); + return { + type: "Node", + title, + headers, + nodeTags, + when: whenConditions.length > 0 ? whenConditions : undefined, + css: nodeCss, + body + }; + } + + private parseStatementsUntil(endType: Token["type"]): Statement[] { + const out: Statement[] = []; + while (!this.at(endType) && !this.at("EOF")) { + // skip extra empties + while (this.at("EMPTY")) this.i++; + if (this.at(endType) || this.at("EOF")) break; + + if (this.at("OPTION")) { + out.push(this.parseOptionGroup()); + continue; + } + + const stmt = this.parseStatement(); + out.push(stmt); + } + return out; + } + + private parseStatement(): Statement { + const t = this.peek(); + if (!t) throw new ParseError("Unexpected EOF"); + + if (t.type === "COMMAND") { + const cmd = this.take("COMMAND").text; + if (cmd.startsWith("jump ")) return { type: "Jump", target: cmd.slice(5).trim() } as Jump; + if (cmd.startsWith("detour ")) return { type: "Detour", target: cmd.slice(7).trim() } as Detour; + if (cmd.startsWith("if ")) return this.parseIfCommandBlock(cmd); + if (cmd === "once") return this.parseOnceBlock(); + if (cmd.startsWith("enum ")) { + const enumName = cmd.slice(5).trim(); + return this.parseEnumBlock(enumName); + } + return { type: "Command", content: cmd } as Command; + } + if (t.type === "TEXT") { + const raw = this.take("TEXT").text; + const { cleanText: textWithoutTags, tags } = this.extractTags(raw); + const markup = parseMarkup(textWithoutTags); + const speakerMatch = markup.text.match(/^([^:\s][^:]*)\s*:\s*(.*)$/); + if (speakerMatch) { + const messageText = speakerMatch[2]; + const messageOffset = markup.text.length - messageText.length; + const slicedMarkup = sliceMarkup(markup, messageOffset); + const normalizedMarkup = this.normalizeMarkup(slicedMarkup); + return { + type: "Line", + speaker: speakerMatch[1].trim(), + text: messageText, + tags, + markup: normalizedMarkup, + } as Line; + } + // If/Else blocks use inline markup {if ...} + const trimmed = markup.text.trim(); + if (trimmed.startsWith("{if ") || trimmed === "{else}" || trimmed.startsWith("{else if ") || trimmed === "{endif}") { + return this.parseIfFromText(markup.text); + } + return { + type: "Line", + text: markup.text, + tags, + markup: this.normalizeMarkup(markup), + } as Line; + } + throw new ParseError(`Unexpected token ${t.type}`); + } + + private parseOptionGroup(): OptionGroup { + const options: Option[] = []; + // One or more OPTION lines, with bodies under INDENT + while (this.at("OPTION")) { + const raw = this.take("OPTION").text; + const { cleanText: textWithAttrs, tags } = this.extractTags(raw); + const { text: textWithCondition, css } = this.extractCss(textWithAttrs); + const { text: optionText, condition } = this.extractOptionCondition(textWithCondition); + const markup = parseMarkup(optionText); + let body: Statement[] = []; + if (this.at("INDENT")) { + this.take("INDENT"); + body = this.parseStatementsUntil("DEDENT"); + this.take("DEDENT"); + while (this.at("EMPTY")) this.i++; + } + options.push({ + type: "Option", + text: markup.text, + body, + tags, + css, + markup: this.normalizeMarkup(markup), + condition, + }); + // Consecutive options belong to the same group; break on non-OPTION + while (this.at("EMPTY")) this.i++; + } + return { type: "OptionGroup", options }; + } + + private normalizeMarkup(result: MarkupParseResult): MarkupParseResult | undefined { + if (!result) return undefined; + if (result.segments.length === 0) { + return undefined; + } + const hasFormatting = result.segments.some( + (segment) => segment.wrappers.length > 0 || segment.selfClosing + ); + if (!hasFormatting) { + return undefined; + } + return { + text: result.text, + segments: result.segments.map((segment) => ({ + start: segment.start, + end: segment.end, + wrappers: segment.wrappers.map((wrapper) => ({ + name: wrapper.name, + type: wrapper.type, + properties: { ...wrapper.properties }, + })), + selfClosing: segment.selfClosing, + })), + }; + } + + private extractTags(input: string): { cleanText: string; tags?: string[] } { + const tags: string[] = []; + // Match tags that are space-separated and not part of hex colors or CSS + // Tags are like "#tag" preceded by whitespace and not followed by hex digits + const re = /\s#([a-zA-Z_][a-zA-Z0-9_]*)(?!\w)/g; + let text = input; + let m: RegExpExecArray | null; + while ((m = re.exec(input))) { + tags.push(m[1]); + } + if (tags.length > 0) { + // Only remove tags that match the pattern (not hex colors in CSS) + text = input.replace(/\s#([a-zA-Z_][a-zA-Z0-9_]*)(?!\w)/g, "").trimEnd(); + return { cleanText: text, tags }; + } + return { cleanText: input }; + } + + private extractCss(input: string): { text: string; css?: string } { + const cssMatch = input.match(/\s*&css\{([^}]*)\}\s*$/); + if (cssMatch) { + const css = cssMatch[1].trim(); + const text = input.replace(cssMatch[0], "").trimEnd(); + return { text, css }; + } + return { text: input }; + } + + private extractOptionCondition(input: string): { text: string; condition?: string } { + const match = input.match(/\s\[\s*if\s+([^\]]+)\]\s*$/i); + if (match) { + const text = input.slice(0, match.index).trimEnd(); + return { text, condition: match[1].trim() }; + } + return { text: input }; + } + + private parseStatementsUntilStop(shouldStop: () => boolean): Statement[] { + const out: Statement[] = []; + while (!this.at("EOF")) { + // Check stop condition at root level only + if (shouldStop()) break; + while (this.at("EMPTY")) this.i++; + if (this.at("EOF") || shouldStop()) break; + // Handle indentation - if we see INDENT, parse the indented block + if (this.at("INDENT")) { + this.take("INDENT"); + // Parse statements at this indent level until DEDENT (don't check stop condition inside) + while (!this.at("DEDENT") && !this.at("EOF")) { + while (this.at("EMPTY")) this.i++; + if (this.at("DEDENT") || this.at("EOF")) break; + if (this.at("OPTION")) { + out.push(this.parseOptionGroup()); + continue; + } + out.push(this.parseStatement()); + } + if (this.at("DEDENT")) { + this.take("DEDENT"); + while (this.at("EMPTY")) this.i++; + } + continue; + } + if (this.at("OPTION")) { + out.push(this.parseOptionGroup()); + continue; + } + out.push(this.parseStatement()); + } + return out; + } + + private parseOnceBlock(): OnceBlock { + // Already consumed <>; expect body under INDENT then <> as COMMAND + let body: Statement[] = []; + if (this.at("INDENT")) { + this.take("INDENT"); + body = this.parseStatementsUntil("DEDENT"); + this.take("DEDENT"); + } else { + // Alternatively, body until explicit <> command on single line + body = []; + } + // consume closing command if present on own line + if (this.at("COMMAND") && this.peek().text === "endonce") { + this.take("COMMAND"); + } + return { type: "Once", body }; + } + + private parseIfFromText(firstLine: string): IfBlock { + const branches: IfBlock["branches"] = []; + // expecting state not required in current implementation + + let cursor = firstLine.trim(); + function parseCond(text: string) { + const mIf = text.match(/^\{if\s+(.+?)\}$/); + if (mIf) return mIf[1]; + const mElIf = text.match(/^\{else\s+if\s+(.+?)\}$/); + if (mElIf) return mElIf[1]; + return null; + } + + while (true) { + const cond = parseCond(cursor); + if (cursor === "{else}") { + branches.push({ condition: null, body: this.parseIfBlockBody() }); + // next must be {endif} + const endLine = this.take("TEXT", "Expected {endif}").text.trim(); + if (endLine !== "{endif}") throw new ParseError("Expected {endif}"); + break; + } else if (cond) { + branches.push({ condition: cond, body: this.parseIfBlockBody() }); + // next control line + const next = this.take("TEXT", "Expected {else}, {else if}, or {endif}").text.trim(); + if (next === "{endif}") break; + cursor = next; + continue; + } else if (cursor === "{endif}") { + break; + } else { + throw new ParseError("Invalid if/else control line"); + } + } + return { type: "If", branches }; + } + + private parseEnumBlock(enumName: string): EnumBlock { + const cases: string[] = []; + + // Parse cases until <> + while (!this.at("EOF")) { + while (this.at("EMPTY")) this.i++; + if (this.at("COMMAND")) { + const cmd = this.peek().text.trim(); + if (cmd === "endenum") { + this.take("COMMAND"); + break; + } + if (cmd.startsWith("case ")) { + this.take("COMMAND"); + const caseName = cmd.slice(5).trim(); + cases.push(caseName); + } else { + // Unknown command, might be inside enum block - skip or break? + break; + } + } else { + // Skip non-command lines + if (this.at("TEXT")) this.take("TEXT"); + } + } + + return { type: "Enum", name: enumName, cases }; + } + + private parseIfCommandBlock(firstCmd: string): IfBlock { + const branches: IfBlock["branches"] = []; + const firstCond = firstCmd.slice(3).trim(); + // Body until next elseif/else/endif command (check at root level, not inside indented blocks) + const firstBody = this.parseStatementsUntilStop(() => { + // Only stop at root level commands, not inside indented blocks + return this.at("COMMAND") && /^(elseif\s|else$|endif$)/.test(this.peek().text); + }); + branches.push({ condition: firstCond, body: firstBody }); + + while (!this.at("EOF")) { + if (!this.at("COMMAND")) break; + const t = this.peek(); + const txt = t.text.trim(); + if (txt.startsWith("elseif ")) { + this.take("COMMAND"); + const cond = txt.slice(7).trim(); + const body = this.parseStatementsUntilStop(() => this.at("COMMAND") && /^(elseif\s|else$|endif$)/.test(this.peek().text)); + branches.push({ condition: cond, body }); + continue; + } + if (txt === "else") { + this.take("COMMAND"); + const body = this.parseStatementsUntilStop(() => this.at("COMMAND") && /^(endif$)/.test(this.peek().text)); + branches.push({ condition: null, body }); + // require endif after else body + if (this.at("COMMAND") && this.peek().text.trim() === "endif") { + this.take("COMMAND"); + } + break; + } + if (txt === "endif") { + this.take("COMMAND"); + break; + } + break; + } + + return { type: "If", branches }; + } + + private parseIfBlockBody(): Statement[] { + // Body is indented lines until next control line or DEDENT boundary; to keep this simple + // we consume subsequent lines until encountering a control TEXT or EOF/OPTION/NODE_END. + const body: Statement[] = []; + while (!this.at("EOF") && !this.at("NODE_END")) { + // Stop when next TEXT is a control or when OPTION starts (new group) + if (this.at("TEXT")) { + const look = this.peek().text.trim(); + if (look === "{else}" || look === "{endif}" || look.startsWith("{else if ") || look.startsWith("{if ")) break; + } + if (this.at("OPTION")) break; + // Support indented bodies inside if-branches + if (this.at("INDENT")) { + this.take("INDENT"); + const nested = this.parseStatementsUntil("DEDENT"); + this.take("DEDENT"); + body.push(...nested); + // continue scanning after dedent + while (this.at("EMPTY")) this.i++; + continue; + } + if (this.at("EMPTY")) { + this.i++; + continue; + } + body.push(this.parseStatement()); + } + return body; + } +} +