import { parse } from "csv-parse/sync"; import { parseSchema, createValidator, parseValue, schemaToTypeString, } from "../index.js"; import type { Schema, ReferenceSchema, ReverseReferenceSchema, } from "../types.js"; import type { CsvLoaderOptions, ReferenceFieldInfo, CsvParseResult, PropertyConfig, ReverseReferenceDeclaration, TypeDeclaration, } from "./types.js"; import { ParseError } from "../parser.js"; import { hasNestedReferences, loadReferenceTable, resolveReferenceId, parseReferenceIds, parseValueWithReferenceIds, extractNestedReferenceIds, collectReferenceFields, parseValueWithReferences, resolveReverseReference, resolveNestedReferences, parseReferenceValue, } from "./reference-resolver.js"; import { generateTypeDefinition } from "./type-gen.js"; import { csvToModule } from "./module-gen.js"; import * as fs from "fs"; import * as path from "path"; /** * Parse a type declaration from a comment line. * Format: # TypeName := schema * Examples: * # Trigger := 'onPlay' | 'onDraw' | 'onDiscard' * # Effect := [Trigger, @effect, int] * Returns null if the line is not a type declaration. */ function parseTypeDeclaration( line: string, commentChar: string = "#", ): { typeName: string; schemaString: string } | null { const trimmed = line.trim(); // Must start with the comment character if (!trimmed.startsWith(commentChar)) return null; const content = trimmed.slice(commentChar.length).trim(); // Match pattern: TypeName := schema const match = content.match(/^([A-Z][a-zA-Z0-9]*)\s*:=\s*(.+)$/); if (!match) return null; const [, typeName, schemaString] = match; return { typeName, schemaString }; } /** * Expand a type name to its schema by replacing the type name with its schema inline. * Returns the schema string with type names expanded, or null if not a type name. */ function expandTypeName( schemaString: string, declaredTypes: Map, ): string | null { const trimmed = schemaString.trim(); if (declaredTypes.has(trimmed)) { return declaredTypes.get(trimmed)!; } return null; } /** * Recursively expand all type name references in a schema string. * Handles unions, tuples, arrays, and nested structures. */ function expandSchemaString( schemaString: string, declaredTypes: Map, ): string { let result = schemaString; // Keep expanding until no more changes (handles recursive dependencies) let prev = ""; while (prev !== result) { prev = result; result = expandSchemaInString(result, declaredTypes); } return result; } /** * Single pass of type name expansion in a schema string. */ function expandSchemaInString( schemaString: string, declaredTypes: Map, ): string { // Check if the entire string is a type name const expanded = expandTypeName(schemaString.trim(), declaredTypes); if (expanded !== null) { return expanded; } // Handle union types (recursively expand each member) if (schemaString.includes("|")) { // Split by | but respect quotes const parts = splitByToken(schemaString, "|"); if (parts.length > 1) { const expandedParts = parts.map((part) => expandSchemaInString(part.trim(), declaredTypes), ); return expandedParts.join(" | "); } } // Handle tuple/array syntax [el1; el2; ...] or [elements] // Check if it's a bracketed structure if (schemaString.startsWith("[") && schemaString.endsWith("]")) { const inner = schemaString.slice(1, -1); // Check if it's semicolon-separated (tuple syntax) if (inner.includes(";")) { const elements = splitByToken(inner, ";"); const expandedElements = elements.map((el) => expandSchemaInString(el.trim(), declaredTypes), ); return `[${expandedElements.join("; ")}]`; } // Otherwise it's a simple array, expand recursively return `[${expandSchemaInString(inner, declaredTypes)}]`; } // Check if it's a type name reference (only uppercase start to avoid conflicts with primitives) const typeNameMatch = schemaString.trim().match(/^[A-Z][a-zA-Z0-9]*$/); if (typeNameMatch) { const expanded = expandTypeName(schemaString.trim(), declaredTypes); if (expanded !== null) { return expanded; } } return schemaString; } /** * Split a string by a token, respecting quoted strings. */ function splitByToken(str: string, token: string): string[] { const result: string[] = []; let current = ""; let inQuote: string | null = null; for (let i = 0; i < str.length; i++) { const char = str[i]; if (inQuote) { if (char === inQuote && str[i - 1] !== "\\") { inQuote = null; } current += char; } else if (char === '"' || char === "'") { inQuote = char; current += char; } else if (char === token && inQuote === null) { result.push(current); current = ""; } else { current += char; } } if (current.length > 0 || str.endsWith(token)) { result.push(current); } return result; } /** * Resolve type name references within a schema using declared types. * For example, if "Trigger" is a declared type, references to "Trigger" in * other schemas will be replaced with the actual Trigger schema definition. */ function resolveTypeReferences( schema: Schema, declaredTypes: Map, ): Schema { switch (schema.type) { case "union": return { type: "union", members: schema.members.map((m) => resolveTypeReferences(m, declaredTypes), ), }; case "tuple": return { type: "tuple", elements: schema.elements.map((el) => ({ name: el.name, schema: resolveTypeReferences(el.schema, declaredTypes), })), }; case "array": return { type: "array", element: resolveTypeReferences(schema.element, declaredTypes), }; case "reference": // Don't resolve references to other tables return schema; default: return schema; } } /** * Resolve type name references in a type declaration's schema string. * Called after all type names are known. */ function resolveTypeDeclarationSchema( schemaString: string, declaredTypes: Map, ): Schema { const schema = parseSchema(schemaString.trim()); return resolveTypeReferences(schema, declaredTypes); } /** * Parse a reverse reference declaration from a comment line. * Format: # fieldName := ~tableName(foreignKey) * Returns null if the line is not a reverse reference declaration. */ function parseReverseReferenceDeclaration( line: string, commentChar: string = "#", ): ReverseReferenceDeclaration | null { const trimmed = line.trim(); // Must start with the comment character if (!trimmed.startsWith(commentChar)) return null; const content = trimmed.slice(commentChar.length).trim(); // Match pattern: fieldName := ~tableName(foreignKey) const match = content.match(/^(\w+)\s*:=\s*~(\w+)\((\w+)\)(\?)?$/); if (!match) return null; const [, fieldName, tableName, foreignKey, optionalMark] = match; const isOptional = optionalMark === "?"; const schema: ReverseReferenceSchema = { type: "reverseReference", tableName, foreignKey, isOptional, }; return { fieldName, tableName, foreignKey, isOptional, schema, }; } /** * Parse CSV content string into structured data with schema validation. * This is a standalone function that doesn't depend on webpack/rspack LoaderContext. * * @param content - CSV content string (must have at least headers + schema row + 1 data row) * @param options - Parsing options * @returns CsvParseResult containing parsed data and optional type definitions */ export function parseCsv( content: string, options: CsvLoaderOptions & { resourceName?: string } = {}, ): CsvParseResult { const delimiter = options.delimiter ?? ","; const quote = options.quote ?? '"'; const escape = options.escape ?? "\\"; const bom = options.bom ?? true; const comment = options.comment === false ? undefined : (options.comment ?? "#"); const trim = options.trim ?? true; const emitTypes = options.emitTypes ?? true; const refBaseDir = options.refBaseDir; const defaultPrimaryKey = options.defaultPrimaryKey ?? "id"; // Pre-strip comment lines from content before passing to csv-parse, // to avoid quote parsing errors in comment lines containing double quotes. const reverseReferences: ReverseReferenceDeclaration[] = []; // Store raw type declarations (name + schema string) first, resolve after all names are known const typeDeclarationsRaw: { typeName: string; schemaString: string }[] = []; let filteredContent = content; if (comment) { const lines = content.split(/\r?\n/); const nonCommentLines: string[] = []; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith(comment)) { // Try to parse as type declaration first const typeDecl = parseTypeDeclaration(trimmed, comment); if (typeDecl) { typeDeclarationsRaw.push(typeDecl); continue; // Skip type declaration lines } // Try to parse as reverse reference const decl = parseReverseReferenceDeclaration(trimmed, comment); if (decl) { reverseReferences.push(decl); continue; // Skip reverse reference lines } // Regular comment line - strip it (csv-parse can't handle quotes in comments) continue; } nonCommentLines.push(line); } filteredContent = nonCommentLines.join("\n"); } const records = parse(filteredContent, { delimiter, quote, escape, bom, comment: undefined, trim, skip_empty_lines: true, relax_column_count: true, }); // Comment lines were already filtered out before parsing const filteredRecords = records; if (filteredRecords.length < 2) { throw new Error("CSV must have at least 2 rows: headers and schemas"); } const headers = filteredRecords[0]; const schemas = filteredRecords[1]; if (headers.length !== schemas.length) { throw new Error( `Header count (${headers.length}) does not match schema count (${schemas.length})`, ); } const dataRows = filteredRecords.slice(2); // Also check schema row cells for comment-prefixed type declarations // and reverse reference declarations for (let col = 0; col < schemas.length; col++) { const cell = (schemas[col] ?? "").trim(); if (comment && cell.startsWith(comment)) { // Try type declaration first const typeDecl = parseTypeDeclaration(cell, comment); if (typeDecl) { typeDeclarationsRaw.push(typeDecl); continue; } // Try reverse reference const decl = parseReverseReferenceDeclaration(cell, comment); if (decl) { reverseReferences.push(decl); } } } // Build a map of declared type names first const declaredTypeNames = new Set(); for (const decl of typeDeclarationsRaw) { declaredTypeNames.add(decl.typeName); } // Build a map of schema strings for expansion (only stores string schemas initially) const declaredSchemaStrings = new Map(); for (const decl of typeDeclarationsRaw) { // If the schema is a string literal union, store it for expansion declaredSchemaStrings.set(decl.typeName, decl.schemaString); } // Parse type declarations with expansion of type name references const typeDeclarationsParsed: { name: string; schema: Schema }[] = []; for (const decl of typeDeclarationsRaw) { // Expand any type name references before parsing const expandedSchema = expandSchemaString( decl.schemaString, declaredSchemaStrings, ); const schema = parseSchema(expandedSchema.trim()); typeDeclarationsParsed.push({ name: decl.typeName, schema }); } // Build declared types map const declaredTypes = new Map(); for (const decl of typeDeclarationsParsed) { declaredTypes.set(decl.name, decl.schema); } // Now resolve all type references within type declarations (for nested type refs) const typeDeclarations: TypeDeclaration[] = []; for (const decl of typeDeclarationsParsed) { const resolvedSchema = resolveTypeReferences(decl.schema, declaredTypes); typeDeclarations.push({ name: decl.name, schema: resolvedSchema }); } // Update declaredTypes with resolved schemas for column schema lookup for (const decl of typeDeclarations) { declaredTypes.set(decl.name, decl.schema); } const resolveReferences = options.resolveReferences ?? true; const propertyConfigs: PropertyConfig[] = headers.map( (header: string, index: number) => { const schemaString = schemas[index]; // Check if schema string matches a declared type name let schema: Schema; let declaredTypeName: string | undefined; if (declaredTypes.has(schemaString)) { schema = declaredTypes.get(schemaString)!; declaredTypeName = schemaString; } else { schema = parseSchema(schemaString); } const config: PropertyConfig = { name: header, schema, validator: createValidator(schema), parser: (valueString: string) => parseValue(schema, valueString), declaredTypeName, }; if (schema.type === "reference") { config.isReference = true; config.referenceTableName = schema.tableName; config.referenceIsArray = schema.isArray; if (resolveReferences) { config.parser = (valueString: string) => { return parseReferenceValue( schema, valueString, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); }; } else { config.parser = (valueString: string) => { return parseReferenceIds(schema, valueString); }; } } else if (hasNestedReferences(schema)) { config.isReference = true; if (resolveReferences) { config.parser = (valueString: string) => { return parseValueWithReferences( valueString, schema, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); }; } else { config.parser = (valueString: string) => { return parseValueWithReferenceIds(valueString, schema); }; } } return config; }, ); // Add reverse reference property configs for (const decl of reverseReferences) { const config: PropertyConfig = { name: decl.fieldName, schema: decl.schema, validator: createValidator(decl.schema), parser: (_valueString: string) => { // Reverse references are resolved after all rows are parsed return null; }, isReference: true, isReverseReference: true, referenceTableName: decl.tableName, referenceIsArray: true, reverseReferenceForeignKey: decl.foreignKey, }; propertyConfigs.push(config); } // Collect all referenced tables (including nested references in tuples/arrays) const references = new Set(); function collectReferences(schema: Schema): void { if (schema.type === "reference") { references.add(schema.tableName); } else if (schema.type === "reverseReference") { references.add(schema.tableName); } else if (schema.type === "tuple") { schema.elements.forEach((el) => collectReferences(el.schema)); } else if (schema.type === "array") { collectReferences(schema.element); } else if (schema.type === "union") { schema.members.forEach((m) => collectReferences(m)); } } propertyConfigs.forEach((config) => { if (config.isReference && config.referenceTableName) { references.add(config.referenceTableName); } collectReferences(config.schema); }); const objects = dataRows.map((row: string[], rowIndex: number) => { const obj: Record = {}; propertyConfigs.forEach((config, colIndex) => { // Skip reverse reference columns — they don't have CSV cell data if (config.isReverseReference) { return; } const rawValue = row[colIndex] ?? ""; try { const parsed = config.parser(rawValue); // Skip validation for reference fields (validation happens during reference resolution) if (!config.isReference && !config.validator(parsed)) { throw new Error( `Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}`, ); } obj[config.name] = parsed; } catch (error) { if (error instanceof Error) { throw new Error( `Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}`, ); } throw error; } }); return obj; }); // Resolve reverse references after all rows are parsed if (resolveReferences) { for (const decl of reverseReferences) { for (const obj of objects) { const pkValue = obj[defaultPrimaryKey]; if (pkValue !== undefined) { const resolved = resolveReverseReference( decl.schema, pkValue, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); obj[decl.fieldName] = decl.isOptional && resolved.length === 0 ? null : resolved; } else { obj[decl.fieldName] = decl.isOptional ? null : []; } } } } const referenceFields: ReferenceFieldInfo[] = []; if (!resolveReferences) { for (const config of propertyConfigs) { if (hasNestedReferences(config.schema)) { referenceFields.push( ...collectReferenceFields(config.schema, config.name), ); } } // Reverse reference fields are already included by collectReferenceFields // above (which handles the reverseReference schema type), so no additional // loop is needed here. } const result: CsvParseResult = { data: objects, propertyConfigs, references, referenceFields, reverseReferences, typeDeclarations, }; if (emitTypes) { result.typeDefinition = generateTypeDefinition( options.resourceName || "", propertyConfigs, references, options.currentFilePath, typeDeclarations, ); } return result; }