import { parse } from "csv-parse/sync"; import { parseSchema, createValidator, parseValue, schemaToTypeString, } from "../index.js"; import type { Schema, ReferenceSchema, ReverseReferenceSchema, } from "../types.js"; import type { CsvLoaderOptions, ReferenceFieldInfo, CsvParseResult, PropertyConfig, ReverseReferenceDeclaration, } from "./types.js"; import { hasNestedReferences, loadReferenceTable, resolveReferenceId, parseReferenceIds, parseValueWithReferenceIds, extractNestedReferenceIds, collectReferenceFields, parseValueWithReferences, resolveReverseReference, resolveNestedReferences, parseReferenceValue, } from "./reference-resolver.js"; import { generateTypeDefinition } from "./type-gen.js"; import { csvToModule } from "./module-gen.js"; import * as fs from "fs"; import * as path from "path"; /** * Parse a reverse reference declaration from a comment line. * Format: # fieldName := ~tableName(foreignKey) * Returns null if the line is not a reverse reference declaration. */ function parseReverseReferenceDeclaration( line: string, commentChar: string = "#", ): ReverseReferenceDeclaration | null { const trimmed = line.trim(); // Must start with the comment character if (!trimmed.startsWith(commentChar)) return null; const content = trimmed.slice(commentChar.length).trim(); // Match pattern: fieldName := ~tableName(foreignKey) const match = content.match(/^(\w+)\s*:=\s*~(\w+)\((\w+)\)(\?)?$/); if (!match) return null; const [, fieldName, tableName, foreignKey, optionalMark] = match; const isOptional = optionalMark === "?"; const schema: ReverseReferenceSchema = { type: "reverseReference", tableName, foreignKey, isOptional, }; return { fieldName, tableName, foreignKey, isOptional, schema, }; } /** * Parse CSV content string into structured data with schema validation. * This is a standalone function that doesn't depend on webpack/rspack LoaderContext. * * @param content - CSV content string (must have at least headers + schema row + 1 data row) * @param options - Parsing options * @returns CsvParseResult containing parsed data and optional type definitions */ export function parseCsv( content: string, options: CsvLoaderOptions & { resourceName?: string } = {}, ): CsvParseResult { const delimiter = options.delimiter ?? ","; const quote = options.quote ?? '"'; const escape = options.escape ?? "\\"; const bom = options.bom ?? true; const comment = options.comment === false ? undefined : (options.comment ?? "#"); const trim = options.trim ?? true; const emitTypes = options.emitTypes ?? true; const refBaseDir = options.refBaseDir; const defaultPrimaryKey = options.defaultPrimaryKey ?? "id"; // Pre-strip comment lines from content before passing to csv-parse, // to avoid quote parsing errors in comment lines containing double quotes. const reverseReferences: ReverseReferenceDeclaration[] = []; let filteredContent = content; if (comment) { const lines = content.split(/\r?\n/); const nonCommentLines: string[] = []; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith(comment)) { const decl = parseReverseReferenceDeclaration(trimmed, comment); if (decl) { reverseReferences.push(decl); } // Skip comment lines } else { nonCommentLines.push(line); } } filteredContent = nonCommentLines.join("\n"); } const records = parse(filteredContent, { delimiter, quote, escape, bom, comment: undefined, trim, skip_empty_lines: true, relax_column_count: true, }); // Comment lines were already filtered out before parsing const filteredRecords = records; if (filteredRecords.length < 2) { throw new Error("CSV must have at least 2 rows: headers and schemas"); } const headers = filteredRecords[0]; const schemas = filteredRecords[1]; if (headers.length !== schemas.length) { throw new Error( `Header count (${headers.length}) does not match schema count (${schemas.length})`, ); } const dataRows = filteredRecords.slice(2); // Also check schema row cells for comment-prefixed reverse reference declarations // (in case they appear as schema cells rather than separate rows) for (let col = 0; col < schemas.length; col++) { const cell = (schemas[col] ?? "").trim(); if (comment && cell.startsWith(comment)) { const decl = parseReverseReferenceDeclaration(cell, comment); if (decl) { reverseReferences.push(decl); } } } const resolveReferences = options.resolveReferences ?? true; const propertyConfigs: PropertyConfig[] = headers.map( (header: string, index: number) => { const schemaString = schemas[index]; const schema = parseSchema(schemaString); const config: PropertyConfig = { name: header, schema, validator: createValidator(schema), parser: (valueString: string) => parseValue(schema, valueString), }; if (schema.type === "reference") { config.isReference = true; config.referenceTableName = schema.tableName; config.referenceIsArray = schema.isArray; if (resolveReferences) { config.parser = (valueString: string) => { return parseReferenceValue( schema, valueString, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); }; } else { config.parser = (valueString: string) => { return parseReferenceIds(schema, valueString); }; } } else if (hasNestedReferences(schema)) { config.isReference = true; if (resolveReferences) { config.parser = (valueString: string) => { return parseValueWithReferences( valueString, schema, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); }; } else { config.parser = (valueString: string) => { return parseValueWithReferenceIds(valueString, schema); }; } } return config; }, ); // Add reverse reference property configs for (const decl of reverseReferences) { const config: PropertyConfig = { name: decl.fieldName, schema: decl.schema, validator: createValidator(decl.schema), parser: (_valueString: string) => { // Reverse references are resolved after all rows are parsed return null; }, isReference: true, isReverseReference: true, referenceTableName: decl.tableName, referenceIsArray: true, reverseReferenceForeignKey: decl.foreignKey, }; propertyConfigs.push(config); } // Collect all referenced tables (including nested references in tuples/arrays) const references = new Set(); function collectReferences(schema: Schema): void { if (schema.type === "reference") { references.add(schema.tableName); } else if (schema.type === "reverseReference") { references.add(schema.tableName); } else if (schema.type === "tuple") { schema.elements.forEach((el) => collectReferences(el.schema)); } else if (schema.type === "array") { collectReferences(schema.element); } else if (schema.type === "union") { schema.members.forEach((m) => collectReferences(m)); } } propertyConfigs.forEach((config) => { if (config.isReference && config.referenceTableName) { references.add(config.referenceTableName); } collectReferences(config.schema); }); const objects = dataRows.map((row: string[], rowIndex: number) => { const obj: Record = {}; propertyConfigs.forEach((config, colIndex) => { // Skip reverse reference columns — they don't have CSV cell data if (config.isReverseReference) { return; } const rawValue = row[colIndex] ?? ""; try { const parsed = config.parser(rawValue); // Skip validation for reference fields (validation happens during reference resolution) if (!config.isReference && !config.validator(parsed)) { throw new Error( `Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}`, ); } obj[config.name] = parsed; } catch (error) { if (error instanceof Error) { throw new Error( `Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}`, ); } throw error; } }); return obj; }); // Resolve reverse references after all rows are parsed if (resolveReferences) { for (const decl of reverseReferences) { for (const obj of objects) { const pkValue = obj[defaultPrimaryKey]; if (pkValue !== undefined) { const resolved = resolveReverseReference( decl.schema, pkValue, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); obj[decl.fieldName] = decl.isOptional && resolved.length === 0 ? null : resolved; } else { obj[decl.fieldName] = decl.isOptional ? null : []; } } } } const referenceFields: ReferenceFieldInfo[] = []; if (!resolveReferences) { for (const config of propertyConfigs) { if (hasNestedReferences(config.schema)) { referenceFields.push( ...collectReferenceFields(config.schema, config.name), ); } } // Reverse reference fields are already included by collectReferenceFields // above (which handles the reverseReference schema type), so no additional // loop is needed here. } const result: CsvParseResult = { data: objects, propertyConfigs, references, referenceFields, reverseReferences, }; if (emitTypes) { result.typeDefinition = generateTypeDefinition( options.resourceName || "", propertyConfigs, references, options.currentFilePath, ); } return result; }