import { parse } from "csv-parse/sync"; import { parseSchema, createValidator, parseValue, schemaToTypeString, } from "../index.js"; import type { Schema, ReferenceSchema, ReverseReferenceSchema, } from "../types.js"; import type { CsvLoaderOptions, ReferenceFieldInfo, CsvParseResult, PropertyConfig, ReverseReferenceDeclaration, TypeDeclaration, } from "./types.js"; import { ParseError } from "../parser.js"; import { hasNestedReferences, loadReferenceTable, resolveReferenceId, parseReferenceIds, parseValueWithReferenceIds, extractNestedReferenceIds, collectReferenceFields, parseValueWithReferences, resolveReverseReference, resolveNestedReferences, parseReferenceValue, } from "./reference-resolver.js"; import { generateTypeDefinition } from "./type-gen.js"; import { csvToModule } from "./module-gen.js"; import * as fs from "fs"; import * as path from "path"; import { parseTypeDeclaration, parseReverseReferenceDeclaration, expandSchemaString, resolveTypeReferences, } from "./type-declarations.js"; /** * Parse CSV content string into structured data with schema validation. * This is a standalone function that doesn't depend on webpack/rspack LoaderContext. * * @param content - CSV content string (must have at least headers + schema row + 1 data row) * @param options - Parsing options * @returns CsvParseResult containing parsed data and optional type definitions */ export function parseCsv( content: string, options: CsvLoaderOptions & { resourceName?: string } = {}, ): CsvParseResult { const delimiter = options.delimiter ?? ","; const quote = options.quote ?? '"'; const escape = options.escape ?? "\\"; const bom = options.bom ?? true; const comment = options.comment === false ? undefined : (options.comment ?? "#"); const trim = options.trim ?? true; const emitTypes = options.emitTypes ?? true; const refBaseDir = options.refBaseDir; const defaultPrimaryKey = options.defaultPrimaryKey ?? "id"; // Pre-strip comment lines from content before passing to csv-parse, // to avoid quote parsing errors in comment lines containing double quotes. const reverseReferences: ReverseReferenceDeclaration[] = []; // Store raw type declarations (name + schema string) first, resolve after all names are known const typeDeclarationsRaw: { typeName: string; schemaString: string }[] = []; let filteredContent = content; if (comment) { const lines = content.split(/\r?\n/); const nonCommentLines: string[] = []; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith(comment)) { // Try to parse as type declaration first const typeDecl = parseTypeDeclaration(trimmed, comment); if (typeDecl) { typeDeclarationsRaw.push(typeDecl); continue; // Skip type declaration lines } // Try to parse as reverse reference const decl = parseReverseReferenceDeclaration(trimmed, comment); if (decl) { reverseReferences.push(decl); continue; // Skip reverse reference lines } // Regular comment line - strip it (csv-parse can't handle quotes in comments) continue; } nonCommentLines.push(line); } filteredContent = nonCommentLines.join("\n"); } const records = parse(filteredContent, { delimiter, quote, escape, bom, comment: undefined, trim, skip_empty_lines: true, relax_column_count: true, }); // Comment lines were already filtered out before parsing const filteredRecords = records; if (filteredRecords.length < 2) { throw new Error("CSV must have at least 2 rows: headers and schemas"); } const headers = filteredRecords[0]; const schemas = filteredRecords[1]; if (headers.length !== schemas.length) { throw new Error( `Header count (${headers.length}) does not match schema count (${schemas.length})`, ); } const dataRows = filteredRecords.slice(2); // Also check schema row cells for comment-prefixed type declarations // and reverse reference declarations for (let col = 0; col < schemas.length; col++) { const cell = (schemas[col] ?? "").trim(); if (comment && cell.startsWith(comment)) { // Try type declaration first const typeDecl = parseTypeDeclaration(cell, comment); if (typeDecl) { typeDeclarationsRaw.push(typeDecl); continue; } // Try reverse reference const decl = parseReverseReferenceDeclaration(cell, comment); if (decl) { reverseReferences.push(decl); } } } // Build a map of declared type names first const declaredTypeNames = new Set(); for (const decl of typeDeclarationsRaw) { declaredTypeNames.add(decl.typeName); } // Build a map of schema strings for expansion (only stores string schemas initially) const declaredSchemaStrings = new Map(); for (const decl of typeDeclarationsRaw) { // If the schema is a string literal union, store it for expansion declaredSchemaStrings.set(decl.typeName, decl.schemaString); } // Parse type declarations with expansion of type name references const typeDeclarationsParsed: { name: string; schema: Schema; schemaString: string; }[] = []; for (const decl of typeDeclarationsRaw) { // Expand any type name references before parsing const expandedSchema = expandSchemaString( decl.schemaString, declaredSchemaStrings, ); const schema = parseSchema(expandedSchema.trim()); typeDeclarationsParsed.push({ name: decl.typeName, schema, schemaString: decl.schemaString, }); } // Build declared types map const declaredTypes = new Map(); for (const decl of typeDeclarationsParsed) { declaredTypes.set(decl.name, decl.schema); } // Now resolve all type references within type declarations (for nested type refs) const typeDeclarations: TypeDeclaration[] = []; for (const decl of typeDeclarationsParsed) { const resolvedSchema = resolveTypeReferences(decl.schema, declaredTypes); typeDeclarations.push({ name: decl.name, schema: resolvedSchema, schemaString: decl.schemaString, }); } // Update declaredTypes with resolved schemas for column schema lookup for (const decl of typeDeclarations) { declaredTypes.set(decl.name, decl.schema); } const resolveReferences = options.resolveReferences ?? true; const propertyConfigs: PropertyConfig[] = headers.map( (header: string, index: number) => { const schemaString = schemas[index]; // Check if schema string matches a declared type name let schema: Schema; let declaredTypeName: string | undefined; let columnSchemaString: string | undefined; if (declaredTypes.has(schemaString)) { schema = declaredTypes.get(schemaString)!; declaredTypeName = schemaString; } else { // Expand any custom type name references before parsing const expandedSchema = expandSchemaString( schemaString, declaredSchemaStrings, ); schema = parseSchema(expandedSchema.trim()); // Only preserve the original schema string if expansion actually changed it if (expandedSchema !== schemaString) { columnSchemaString = schemaString; } } const config: PropertyConfig = { name: header, schema, validator: createValidator(schema), parser: (valueString: string) => parseValue(schema, valueString), declaredTypeName, schemaString: columnSchemaString, }; if (schema.type === "reference") { config.isReference = true; config.referenceTableName = schema.tableName; config.referenceIsArray = schema.isArray; if (resolveReferences) { config.parser = (valueString: string) => { return parseReferenceValue( schema, valueString, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); }; } else { config.parser = (valueString: string) => { return parseReferenceIds(schema, valueString); }; } } else if (hasNestedReferences(schema)) { config.isReference = true; if (resolveReferences) { config.parser = (valueString: string) => { return parseValueWithReferences( valueString, schema, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); }; } else { config.parser = (valueString: string) => { return parseValueWithReferenceIds(valueString, schema); }; } } return config; }, ); // Add reverse reference property configs for (const decl of reverseReferences) { const config: PropertyConfig = { name: decl.fieldName, schema: decl.schema, validator: createValidator(decl.schema), parser: (_valueString: string) => { // Reverse references are resolved after all rows are parsed return null; }, isReference: true, isReverseReference: true, referenceTableName: decl.tableName, referenceIsArray: true, reverseReferenceForeignKey: decl.foreignKey, }; propertyConfigs.push(config); } // Collect all referenced tables (including nested references in tuples/arrays) const references = new Set(); function collectReferences(schema: Schema): void { if (schema.type === "reference") { references.add(schema.tableName); } else if (schema.type === "reverseReference") { references.add(schema.tableName); } else if (schema.type === "tuple") { schema.elements.forEach((el) => collectReferences(el.schema)); } else if (schema.type === "array") { collectReferences(schema.element); } else if (schema.type === "union") { schema.members.forEach((m) => collectReferences(m)); } } propertyConfigs.forEach((config) => { if (config.isReference && config.referenceTableName) { references.add(config.referenceTableName); } collectReferences(config.schema); }); const objects = dataRows.map((row: string[], rowIndex: number) => { const obj: Record = {}; propertyConfigs.forEach((config, colIndex) => { // Skip reverse reference columns — they don't have CSV cell data if (config.isReverseReference) { return; } const rawValue = row[colIndex] ?? ""; try { const parsed = config.parser(rawValue); // Skip validation for reference fields (validation happens during reference resolution) if (!config.isReference && !config.validator(parsed)) { throw new Error( `Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}`, ); } obj[config.name] = parsed; } catch (error) { if (error instanceof Error) { throw new Error( `Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}`, ); } throw error; } }); return obj; }); // Resolve reverse references after all rows are parsed if (resolveReferences) { for (const decl of reverseReferences) { for (const obj of objects) { const pkValue = obj[defaultPrimaryKey]; if (pkValue !== undefined) { const resolved = resolveReverseReference( decl.schema, pkValue, refBaseDir, defaultPrimaryKey, options.currentFilePath, ); obj[decl.fieldName] = decl.isOptional && resolved.length === 0 ? null : resolved; } else { obj[decl.fieldName] = decl.isOptional ? null : []; } } } } const referenceFields: ReferenceFieldInfo[] = []; if (!resolveReferences) { for (const config of propertyConfigs) { if (hasNestedReferences(config.schema)) { referenceFields.push( ...collectReferenceFields(config.schema, config.name), ); } } // Reverse reference fields are already included by collectReferenceFields // above (which handles the reverseReference schema type), so no additional // loop is needed here. } const result: CsvParseResult = { data: objects, propertyConfigs, references, referenceFields, reverseReferences, typeDeclarations, }; if (emitTypes) { result.typeDefinition = generateTypeDefinition( options.resourceName || "", propertyConfigs, references, options.currentFilePath, typeDeclarations, ); } return result; }