import { parse } from 'csv-parse/sync'; import { parseSchema, createValidator, parseValue } from '../index.js'; import type { Schema, ReferenceSchema } from '../types.js'; import * as fs from 'fs'; import * as path from 'path'; function hasNestedReferences(schema: Schema): boolean { switch (schema.type) { case 'reference': return true; case 'tuple': return schema.elements.some(el => hasNestedReferences(el.schema)); case 'array': return hasNestedReferences(schema.element); case 'union': return schema.members.some(m => hasNestedReferences(m)); default: return false; } } function loadReferenceTable( schema: ReferenceSchema, refBaseDir: string | undefined, defaultPrimaryKey: string, currentFilePath: string | undefined ): { lookup: Map>; refTable: Record[] } { const baseDir = refBaseDir || (currentFilePath ? path.dirname(currentFilePath) : process.cwd()); const fileName = `${schema.tableName}.csv`; const refFilePath = path.isAbsolute(fileName) ? fileName : path.join(baseDir, fileName); let refTable: Record[]; if (referenceTableCache.has(refFilePath)) { refTable = referenceTableCache.get(refFilePath)!; } else { if (loadingFiles.has(refFilePath)) { throw new Error( `Circular reference detected: table "${schema.tableName}" (${refFilePath}) is already being loaded` ); } loadingFiles.add(refFilePath); try { const refContent = fs.readFileSync(refFilePath, 'utf-8'); const refResult = parseCsv(refContent, { currentFilePath: refFilePath, emitTypes: false, }); refTable = refResult.data; referenceTableCache.set(refFilePath, refTable); } catch (error) { throw new Error( `Failed to load referenced table "${schema.tableName}" from ${refFilePath}: ${error instanceof Error ? error.message : String(error)}` ); } finally { loadingFiles.delete(refFilePath); } } const lookup = new Map>(); refTable.forEach(row => { const pkValue = row[defaultPrimaryKey]; if (pkValue !== undefined) { lookup.set(String(pkValue), row); } }); return { lookup, refTable }; } function resolveReferenceId( id: string, lookup: Map>, tableName: string ): Record { const obj = lookup.get(id); if (!obj) { throw new Error(`Reference to "${tableName}" with id="${id}" not found`); } return obj; } function parseReferenceIds(schema: ReferenceSchema, valueString: string): unknown { const valueParser = new ReferenceValueParser(valueString.trim()); const ids = valueParser.parseIds(schema.isArray); if (schema.isArray) { return ids; } return ids[0]; } function parseValueWithReferenceIds( valueString: string, schema: Schema ): unknown { if (!hasNestedReferences(schema)) { return parseValue(schema, valueString); } switch (schema.type) { case 'reference': return parseReferenceIds(schema, valueString); case 'tuple': { const parsed = parseValue(schema, valueString) as unknown[]; return schema.elements.map((el, i) => hasNestedReferences(el.schema) ? extractNestedReferenceIds(parsed[i], el.schema) : parsed[i] ); } case 'array': { const parsed = parseValue(schema, valueString) as unknown[]; return parsed.map(item => hasNestedReferences(schema.element) ? extractNestedReferenceIds(item, schema.element) : item ); } case 'union': { for (const member of schema.members) { if (hasNestedReferences(member)) { try { const parsed = parseValue(member, valueString); return extractNestedReferenceIds(parsed, member); } catch {} } } return parseValue(schema, valueString); } default: return parseValue(schema, valueString); } } function extractNestedReferenceIds(value: unknown, schema: Schema): unknown { switch (schema.type) { case 'reference': if (schema.isArray) { const ids = Array.isArray(value) ? value : [value]; return ids.map(id => String(id)); } return String(value); case 'tuple': { if (!Array.isArray(value)) return value; return schema.elements.map((el, i) => hasNestedReferences(el.schema) ? extractNestedReferenceIds(value[i], el.schema) : value[i] ); } case 'array': { if (!Array.isArray(value)) return value; return value.map(item => hasNestedReferences(schema.element) ? extractNestedReferenceIds(item, schema.element) : item ); } case 'union': { for (const member of schema.members) { if (hasNestedReferences(member)) { try { return extractNestedReferenceIds(value, member); } catch {} } } return value; } default: return value; } } function collectReferenceFields(schema: Schema, name: string): ReferenceFieldInfo[] { const fields: ReferenceFieldInfo[] = []; switch (schema.type) { case 'reference': fields.push({ name, tableName: schema.tableName, isArray: schema.isArray, schema }); break; case 'tuple': for (const el of schema.elements) { fields.push(...collectReferenceFields(el.schema, name)); } break; case 'array': fields.push(...collectReferenceFields(schema.element, name)); break; case 'union': for (const member of schema.members) { fields.push(...collectReferenceFields(member, name)); } break; } return fields; } function parseValueWithReferences( valueString: string, schema: Schema, refBaseDir: string | undefined, defaultPrimaryKey: string, currentFilePath: string | undefined ): unknown { if (!hasNestedReferences(schema)) { return parseValue(schema, valueString); } switch (schema.type) { case 'reference': return parseReferenceValue(schema, valueString, refBaseDir, defaultPrimaryKey, currentFilePath); case 'tuple': { const parsed = parseValue(schema, valueString) as unknown[]; return schema.elements.map((el, i) => resolveNestedReferences(parsed[i], el.schema, refBaseDir, defaultPrimaryKey, currentFilePath) ); } case 'array': { const parsed = parseValue(schema, valueString) as unknown[]; return parsed.map(item => resolveNestedReferences(item, schema.element, refBaseDir, defaultPrimaryKey, currentFilePath) ); } case 'union': { const errors: Error[] = []; for (const member of schema.members) { if (hasNestedReferences(member)) { try { const parsed = parseValue(member, valueString); return resolveNestedReferences(parsed, member, refBaseDir, defaultPrimaryKey, currentFilePath); } catch (e) { errors.push(e instanceof Error ? e : new Error(String(e))); } } } if (errors.length > 0 && errors.every(e => /not found|Circular reference|Failed to load/.test(e.message))) { for (const member of schema.members) { if (!hasNestedReferences(member)) { try { return parseValue(member, valueString); } catch {} } } } return parseValue(schema, valueString); } default: return parseValue(schema, valueString); } } function resolveNestedReferences( value: unknown, schema: Schema, refBaseDir: string | undefined, defaultPrimaryKey: string, currentFilePath: string | undefined ): unknown { switch (schema.type) { case 'reference': { const { lookup } = loadReferenceTable(schema, refBaseDir, defaultPrimaryKey, currentFilePath); if (schema.isArray) { const ids = Array.isArray(value) ? value : [value]; return ids.map(id => resolveReferenceId(String(id), lookup, schema.tableName)); } return resolveReferenceId(String(value), lookup, schema.tableName); } case 'tuple': { if (!Array.isArray(value)) return value; return schema.elements.map((el, i) => resolveNestedReferences(value[i], el.schema, refBaseDir, defaultPrimaryKey, currentFilePath) ); } case 'array': { if (!Array.isArray(value)) return value; return value.map(item => resolveNestedReferences(item, schema.element, refBaseDir, defaultPrimaryKey, currentFilePath) ); } case 'union': { const errors: Error[] = []; for (const member of schema.members) { if (hasNestedReferences(member)) { try { return resolveNestedReferences(value, member, refBaseDir, defaultPrimaryKey, currentFilePath); } catch (e) { errors.push(e instanceof Error ? e : new Error(String(e))); } } } if (errors.length > 0) { throw errors[0]; } return value; } default: return value; } } export interface CsvLoaderOptions { delimiter?: string; quote?: string; escape?: string; bom?: boolean; comment?: string | false; trim?: boolean; /** Generate TypeScript declaration file (.d.ts) */ emitTypes?: boolean; /** Output directory for generated type files (relative to output path) */ typesOutputDir?: string; /** Write .d.ts files to disk (useful for dev server) */ writeToDisk?: boolean; /** Base directory for resolving referenced CSV files (default: directory of current file) */ refBaseDir?: string; /** Primary key field name for referenced tables (default: 'id') */ defaultPrimaryKey?: string; /** Current file path (used to resolve relative references) */ currentFilePath?: string; /** * When false, reference fields store parsed IDs instead of resolved objects. * Used by csvToModule to emit accessor-based code with lazy resolution. * Default: true (resolves references eagerly by loading referenced CSV files). */ resolveReferences?: boolean; } export interface ReferenceFieldInfo { /** Column name in the CSV */ name: string; /** Referenced table name */ tableName: string; /** Whether it's an array reference */ isArray: boolean; /** The schema of this field (for nested references) */ schema: Schema; } export interface CsvParseResult { /** Parsed CSV data as array of objects */ data: Record[]; /** Generated TypeScript type definition string (if emitTypes is true) */ typeDefinition?: string; /** Property configurations for the CSV columns */ propertyConfigs: PropertyConfig[]; /** Referenced table names */ references: Set; /** Reference field metadata (populated when resolveReferences is false) */ referenceFields: ReferenceFieldInfo[]; } interface PropertyConfig { name: string; schema: any; validator: (value: unknown) => boolean; parser: (valueString: string) => unknown; /** Whether this property is a reference to another table */ isReference?: boolean; /** Referenced table name (if isReference is true) */ referenceTableName?: string; /** Whether it's an array reference */ referenceIsArray?: boolean; } /** Cache for loaded referenced tables */ const referenceTableCache = new Map[]>(); /** Set of file paths currently being loaded (to detect circular references) */ const loadingFiles = new Set(); /** * Parse and resolve a reference value. * Loads the referenced table and replaces IDs with actual objects. */ function parseReferenceValue( schema: ReferenceSchema, valueString: string, refBaseDir: string | undefined, defaultPrimaryKey: string, currentFilePath: string | undefined ): unknown { const { lookup } = loadReferenceTable(schema, refBaseDir, defaultPrimaryKey, currentFilePath); const valueParser = new ReferenceValueParser(valueString.trim()); const ids = valueParser.parseIds(schema.isArray); if (schema.isArray) { return ids.map(id => resolveReferenceId(id, lookup, schema.tableName)); } return resolveReferenceId(ids[0], lookup, schema.tableName); } /** * Parser for reference values (extracts IDs from value string) */ class ReferenceValueParser { private input: string; private pos: number = 0; constructor(input: string) { this.input = input; } private peek(): string { return this.input[this.pos] || ''; } private consume(): string { return this.input[this.pos++] || ''; } private skipWhitespace(): void { while (this.pos < this.input.length && /\s/.test(this.input[this.pos])) { this.pos++; } } private consumeStr(str: string): boolean { if (this.input.slice(this.pos, this.pos + str.length) === str) { this.pos += str.length; return true; } return false; } parseIds(isArray: boolean): string[] { this.skipWhitespace(); if (isArray) { // Parse array format: [id1; id2; id3] if (this.peek() === '[') { this.consume(); } this.skipWhitespace(); if (this.peek() === ']') { this.consume(); return []; } const ids: string[] = []; while (true) { this.skipWhitespace(); let id = ''; while (this.pos < this.input.length && this.peek() !== ';' && this.peek() !== ']') { id += this.consume(); } const trimmedId = id.trim(); if (trimmedId) { ids.push(trimmedId); } this.skipWhitespace(); if (!this.consumeStr(';')) { break; } } this.skipWhitespace(); if (this.peek() === ']') { this.consume(); } return ids; } else { // Parse single ID let id = ''; while (this.pos < this.input.length) { const char = this.peek(); if (char === ';' || char === ']' || char === ',') { break; } id += this.consume(); } return [id.trim()]; } } } /** * Convert a schema to TypeScript type string */ function schemaToTypeString(schema: Schema, resourceNames?: Map): string { switch (schema.type) { case 'string': return 'string'; case 'number': case 'int': case 'float': return 'number'; case 'boolean': return 'boolean'; case 'stringLiteral': return `"${schema.value}"`; case 'union': return schema.members.map(m => schemaToTypeString(m, resourceNames)).join(' | '); case 'reference': { // Use the resource name mapping if provided, otherwise capitalize table name const typeName = resourceNames?.get(schema.tableName) || schema.tableName.charAt(0).toUpperCase() + schema.tableName.slice(1); return schema.isArray ? `readonly ${typeName}[]` : typeName; } case 'array': if (schema.element.type === 'tuple') { const tupleElements = schema.element.elements.map((el) => { const typeStr = schemaToTypeString(el.schema, resourceNames); return el.name ? `readonly ${el.name}: ${typeStr}` : typeStr; }); return `readonly [${tupleElements.join(', ')}]`; } // Wrap union types in parentheses to maintain correct precedence const elementType = schemaToTypeString(schema.element, resourceNames); if (schema.element.type === 'union') { return `readonly (${elementType})[]`; } return `readonly ${elementType}[]`; case 'tuple': const tupleElements = schema.elements.map((el) => { const typeStr = schemaToTypeString(el.schema, resourceNames); return el.name ? `readonly ${el.name}: ${typeStr}` : typeStr; }); return `readonly [${tupleElements.join(', ')}]`; default: return 'unknown'; } } /** * Generate TypeScript interface for the CSV data */ function generateTypeDefinition( resourceName: string, propertyConfigs: PropertyConfig[], references: Set, currentFilePath?: string, hasRefs?: boolean ): string { const typeName = resourceName ? `${resourceName}Table` : 'Table'; const currentTableName = currentFilePath ? path.basename(currentFilePath, path.extname(currentFilePath)) : undefined; const singularType = resourceName ? resourceName.charAt(0).toUpperCase() + resourceName.slice(1) : `${typeName}[number]`; // Generate import statements for referenced tables const imports: string[] = []; const resourceNames = new Map(); references.forEach(tableName => { if (tableName === currentTableName) { resourceNames.set(tableName, singularType); return; } // Convert table name to type name by capitalizing const typeBase = tableName.charAt(0).toUpperCase() + tableName.slice(1); resourceNames.set(tableName, typeBase); // Generate import path based on current file path let importPath: string; if (currentFilePath) { importPath = `./${tableName}.csv`; } else { importPath = `../${tableName}.csv`; } imports.push(`import type { ${typeBase} } from '${importPath}';`); }); const importSection = imports.length > 0 ? imports.join('\n') + '\n\n' : ''; const properties = propertyConfigs .map((config) => ` readonly ${config.name}: ${schemaToTypeString(config.schema, resourceNames)};`) .join('\n'); let exportAlias = ''; if (resourceName) { const singularType = resourceName.charAt(0).toUpperCase() + resourceName.slice(1); exportAlias = `\nexport type ${singularType} = ${typeName}[number];`; } if (hasRefs) { return `${importSection}type ${typeName} = readonly { ${properties} }[]; ${exportAlias} declare function getData(): ${typeName}; export default getData; `; } return `${importSection}type ${typeName} = readonly { ${properties} }[]; ${exportAlias} declare const data: ${typeName}; export default data; `; } /** * Parse CSV content string into structured data with schema validation. * This is a standalone function that doesn't depend on webpack/rspack LoaderContext. * * @param content - CSV content string (must have at least headers + schema row + 1 data row) * @param options - Parsing options * @returns CsvParseResult containing parsed data and optional type definitions */ export function parseCsv( content: string, options: CsvLoaderOptions & { resourceName?: string } = {} ): CsvParseResult { const delimiter = options.delimiter ?? ','; const quote = options.quote ?? '"'; const escape = options.escape ?? '\\'; const bom = options.bom ?? true; const comment = options.comment === false ? undefined : (options.comment ?? '#'); const trim = options.trim ?? true; const emitTypes = options.emitTypes ?? true; const refBaseDir = options.refBaseDir; const defaultPrimaryKey = options.defaultPrimaryKey ?? 'id'; const records = parse(content, { delimiter, quote, escape, bom, comment, trim, skip_empty_lines: true, relax_column_count: true, }); if (records.length < 2) { throw new Error('CSV must have at least 2 rows: headers and schemas'); } const headers = records[0]; const schemas = records[1]; if (headers.length !== schemas.length) { throw new Error( `Header count (${headers.length}) does not match schema count (${schemas.length})` ); } const resolveReferences = options.resolveReferences ?? true; const propertyConfigs: PropertyConfig[] = headers.map((header: string, index: number) => { const schemaString = schemas[index]; const schema = parseSchema(schemaString); const config: PropertyConfig = { name: header, schema, validator: createValidator(schema), parser: (valueString: string) => parseValue(schema, valueString), }; if (schema.type === 'reference') { config.isReference = true; config.referenceTableName = schema.tableName; config.referenceIsArray = schema.isArray; if (resolveReferences) { config.parser = (valueString: string) => { return parseReferenceValue(schema, valueString, refBaseDir, defaultPrimaryKey, options.currentFilePath); }; } else { config.parser = (valueString: string) => { return parseReferenceIds(schema, valueString); }; } } else if (hasNestedReferences(schema)) { config.isReference = true; if (resolveReferences) { config.parser = (valueString: string) => { return parseValueWithReferences(valueString, schema, refBaseDir, defaultPrimaryKey, options.currentFilePath); }; } else { config.parser = (valueString: string) => { return parseValueWithReferenceIds(valueString, schema); }; } } return config; }); // Collect all referenced tables (including nested references in tuples/arrays) const references = new Set(); function collectReferences(schema: Schema): void { if (schema.type === 'reference') { references.add(schema.tableName); } else if (schema.type === 'tuple') { schema.elements.forEach(el => collectReferences(el.schema)); } else if (schema.type === 'array') { collectReferences(schema.element); } else if (schema.type === 'union') { schema.members.forEach(m => collectReferences(m)); } } propertyConfigs.forEach(config => { if (config.isReference && config.referenceTableName) { references.add(config.referenceTableName); } collectReferences(config.schema); }); const dataRows = records.slice(2); const objects = dataRows.map((row: string[], rowIndex: number) => { const obj: Record = {}; propertyConfigs.forEach((config, colIndex) => { const rawValue = row[colIndex] ?? ''; try { const parsed = config.parser(rawValue); // Skip validation for reference fields (validation happens during reference resolution) if (!config.isReference && !config.validator(parsed)) { throw new Error( `Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}` ); } obj[config.name] = parsed; } catch (error) { if (error instanceof Error) { throw new Error( `Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}` ); } throw error; } }); return obj; }); const referenceFields: ReferenceFieldInfo[] = []; if (!resolveReferences) { for (const config of propertyConfigs) { if (hasNestedReferences(config.schema)) { referenceFields.push(...collectReferenceFields(config.schema, config.name)); } } } const result: CsvParseResult = { data: objects, propertyConfigs, references, referenceFields, }; if (emitTypes) { result.typeDefinition = generateTypeDefinition( options.resourceName || '', propertyConfigs, references, options.currentFilePath, referenceFields.length > 0 ); } return result; } /** * Generate runtime reference resolution code for a schema. * Returns a JS expression string that resolves references using lookup maps. */ function generateSchemaResolutionCode( schema: Schema, valueExpr: string, lookupVar: (tableName: string) => string, pkField: string ): string { switch (schema.type) { case 'reference': { const lookup = lookupVar(schema.tableName); if (schema.isArray) { return `(Array.isArray(${valueExpr}) ? ${valueExpr}.map(id => ${lookup}.get(String(id))) : ${valueExpr})`; } return `${lookup}.get(String(${valueExpr}))`; } case 'tuple': { const elementResolvers = schema.elements.map((el, i) => { if (hasNestedReferences(el.schema)) { return generateSchemaResolutionCode(el.schema, `${valueExpr}[${i}]`, lookupVar, pkField); } return `${valueExpr}[${i}]`; }); return `[${elementResolvers.join(', ')}]`; } case 'array': { if (hasNestedReferences(schema.element)) { const itemResolve = generateSchemaResolutionCode(schema.element, 'item', lookupVar, pkField); return `(${valueExpr}).map(item => ${itemResolve})`; } return valueExpr; } case 'union': { const refMembers = schema.members.filter(m => hasNestedReferences(m)); const nonRefMembers = schema.members.filter(m => !hasNestedReferences(m)); const resolveParts: string[] = []; for (const member of refMembers) { const resolveCode = generateSchemaResolutionCode(member, valueExpr, lookupVar, pkField); resolveParts.push(resolveCode); } if (nonRefMembers.length > 0) { resolveParts.push(valueExpr); } if (resolveParts.length === 0) return valueExpr; if (resolveParts.length === 1) return resolveParts[0]; return `(${resolveParts.join(' ?? ')})`; } default: return valueExpr; } } /** * Generate JavaScript module code from CSV content. * Emits an accessor function for tables with references (lazy resolution), * or static JSON for tables without references. */ export function csvToModule( content: string, options: CsvLoaderOptions & { resourceName?: string } = {} ): { js: string; dts?: string } { const result = parseCsv(content, { ...options, resolveReferences: false }); const hasRefs = result.referenceFields.length > 0; const defaultPrimaryKey = options.defaultPrimaryKey ?? 'id'; if (!hasRefs) { const json = JSON.stringify(result.data, null, 2); return { js: `export default ${json};`, dts: result.typeDefinition, }; } const imports: string[] = []; const lookupInits: string[] = []; const lookupVarMap = new Map(); const currentTableName = options.currentFilePath ? path.basename(options.currentFilePath, path.extname(options.currentFilePath)) : undefined; const uniqueTables = new Set(result.referenceFields.map(f => f.tableName)); uniqueTables.forEach(tableName => { const lookupVar = `_${tableName}Lookup`; lookupVarMap.set(tableName, lookupVar); if (tableName === currentTableName) { lookupInits.push( `const ${lookupVar} = new Map(_raw.map(p => [String(p.${defaultPrimaryKey}), p]));` ); } else { const varName = `_${tableName}`; imports.push(`import ${varName} from './${tableName}.csv';`); lookupInits.push( `const ${lookupVar} = new Map(${varName}().map(p => [String(p.${defaultPrimaryKey}), p]));` ); } }); const lookupVar = (tableName: string) => lookupVarMap.get(tableName)!; const rowResolvers: string[] = []; for (const config of result.propertyConfigs) { if (hasNestedReferences(config.schema)) { const resolveCode = generateSchemaResolutionCode( config.schema, `row.${config.name}`, lookupVar, defaultPrimaryKey ); rowResolvers.push(` ${config.name}: ${resolveCode},`); } } const rawJson = JSON.stringify(result.data, null, 2); let js: string; if (rowResolvers.length > 0) { js = [ ...imports, '', `const _raw = ${rawJson};`, '', 'let _resolved = null;', '', 'export default function getData() {', ' if (_resolved) return _resolved;', ' _resolved = _raw;', ...lookupInits.map(l => ` ${l}`), ' _resolved = _raw.map(row => ({', ' ...row,', ...rowResolvers, ' }));', ' return _resolved;', '}', ].join('\n'); } else { js = `export default ${rawJson};`; } return { js, dts: result.typeDefinition, }; }