inline-schema/src/csv-loader/loader.ts

1269 lines
36 KiB
TypeScript
Raw Normal View History

import { parse } from "csv-parse/sync";
import {
parseSchema,
createValidator,
parseValue,
schemaToTypeString,
} from "../index.js";
import type {
Schema,
ReferenceSchema,
ReverseReferenceSchema,
} from "../types.js";
import * as fs from "fs";
import * as path from "path";
2026-03-31 13:02:29 +08:00
2026-04-15 13:58:14 +08:00
function hasNestedReferences(schema: Schema): boolean {
switch (schema.type) {
case "reference":
case "reverseReference":
2026-04-15 13:58:14 +08:00
return true;
case "tuple":
return schema.elements.some((el) => hasNestedReferences(el.schema));
case "array":
2026-04-15 13:58:14 +08:00
return hasNestedReferences(schema.element);
case "union":
return schema.members.some((m) => hasNestedReferences(m));
2026-04-15 13:58:14 +08:00
default:
return false;
}
}
function loadReferenceTable(
schema: ReferenceSchema | ReverseReferenceSchema,
2026-04-15 13:58:14 +08:00
refBaseDir: string | undefined,
defaultPrimaryKey: string,
currentFilePath: string | undefined,
): {
lookup: Map<string, Record<string, unknown>>;
refTable: Record<string, unknown>[];
} {
const baseDir =
refBaseDir ||
(currentFilePath ? path.dirname(currentFilePath) : process.cwd());
2026-04-15 13:58:14 +08:00
const fileName = `${schema.tableName}.csv`;
const refFilePath = path.isAbsolute(fileName)
? fileName
: path.join(baseDir, fileName);
let refTable: Record<string, unknown>[];
if (referenceTableCache.has(refFilePath)) {
refTable = referenceTableCache.get(refFilePath)!;
} else {
if (loadingFiles.has(refFilePath)) {
throw new Error(
`Circular reference detected: table "${schema.tableName}" (${refFilePath}) is already being loaded`,
2026-04-15 13:58:14 +08:00
);
}
loadingFiles.add(refFilePath);
try {
const refContent = fs.readFileSync(refFilePath, "utf-8");
2026-04-15 13:58:14 +08:00
const refResult = parseCsv(refContent, {
currentFilePath: refFilePath,
emitTypes: false,
});
refTable = refResult.data;
referenceTableCache.set(refFilePath, refTable);
} catch (error) {
throw new Error(
`Failed to load referenced table "${schema.tableName}" from ${refFilePath}: ${error instanceof Error ? error.message : String(error)}`,
2026-04-15 13:58:14 +08:00
);
} finally {
loadingFiles.delete(refFilePath);
}
}
const lookup = new Map<string, Record<string, unknown>>();
refTable.forEach((row) => {
2026-04-15 13:58:14 +08:00
const pkValue = row[defaultPrimaryKey];
if (pkValue !== undefined) {
lookup.set(String(pkValue), row);
}
});
return { lookup, refTable };
}
function resolveReferenceId(
id: string,
lookup: Map<string, Record<string, unknown>>,
tableName: string,
2026-04-15 13:58:14 +08:00
): Record<string, unknown> {
const obj = lookup.get(id);
if (!obj) {
throw new Error(`Reference to "${tableName}" with id="${id}" not found`);
}
return obj;
}
function parseReferenceIds(
schema: ReferenceSchema,
valueString: string,
): unknown {
2026-04-17 11:41:06 +08:00
const trimmed = valueString.trim();
if (schema.isOptional && trimmed === "") {
2026-04-17 11:41:06 +08:00
return null;
}
const valueParser = new ReferenceValueParser(trimmed);
2026-04-15 14:36:52 +08:00
const ids = valueParser.parseIds(schema.isArray);
if (schema.isArray) {
return ids;
}
return ids[0];
}
function parseValueWithReferenceIds(
valueString: string,
schema: Schema,
2026-04-15 14:36:52 +08:00
): unknown {
if (!hasNestedReferences(schema)) {
return parseValue(schema, valueString);
}
switch (schema.type) {
case "reference":
2026-04-15 14:36:52 +08:00
return parseReferenceIds(schema, valueString);
case "reverseReference":
// Reverse references don't store IDs; they're derived at resolution time
return null;
case "tuple": {
2026-04-15 14:36:52 +08:00
const parsed = parseValue(schema, valueString) as unknown[];
return schema.elements.map((el, i) =>
hasNestedReferences(el.schema)
? extractNestedReferenceIds(parsed[i], el.schema)
: parsed[i],
2026-04-15 14:36:52 +08:00
);
}
case "array": {
2026-04-15 14:36:52 +08:00
const parsed = parseValue(schema, valueString) as unknown[];
return parsed.map((item) =>
2026-04-15 14:36:52 +08:00
hasNestedReferences(schema.element)
? extractNestedReferenceIds(item, schema.element)
: item,
2026-04-15 14:36:52 +08:00
);
}
case "union": {
2026-04-15 14:36:52 +08:00
for (const member of schema.members) {
if (hasNestedReferences(member)) {
try {
const parsed = parseValue(member, valueString);
return extractNestedReferenceIds(parsed, member);
} catch {}
}
}
return parseValue(schema, valueString);
}
default:
return parseValue(schema, valueString);
}
}
function extractNestedReferenceIds(value: unknown, schema: Schema): unknown {
switch (schema.type) {
case "reference":
2026-04-17 11:41:06 +08:00
if (value === null || value === undefined) return value;
2026-04-15 14:36:52 +08:00
if (schema.isArray) {
const ids = Array.isArray(value) ? value : [value];
return ids.map((id) => String(id));
2026-04-15 14:36:52 +08:00
}
return String(value);
case "reverseReference":
// Reverse references don't store IDs; return null placeholder
return null;
case "tuple": {
2026-04-15 14:36:52 +08:00
if (!Array.isArray(value)) return value;
return schema.elements.map((el, i) =>
hasNestedReferences(el.schema)
? extractNestedReferenceIds(value[i], el.schema)
: value[i],
2026-04-15 14:36:52 +08:00
);
}
case "array": {
2026-04-15 14:36:52 +08:00
if (!Array.isArray(value)) return value;
return value.map((item) =>
2026-04-15 14:36:52 +08:00
hasNestedReferences(schema.element)
? extractNestedReferenceIds(item, schema.element)
: item,
2026-04-15 14:36:52 +08:00
);
}
case "union": {
2026-04-15 14:36:52 +08:00
for (const member of schema.members) {
if (hasNestedReferences(member)) {
try {
return extractNestedReferenceIds(value, member);
} catch {}
}
}
return value;
}
default:
return value;
}
}
function collectReferenceFields(
schema: Schema,
name: string,
): ReferenceFieldInfo[] {
2026-04-15 14:36:52 +08:00
const fields: ReferenceFieldInfo[] = [];
switch (schema.type) {
case "reference":
fields.push({
name,
tableName: schema.tableName,
isArray: schema.isArray,
schema,
});
break;
case "reverseReference":
fields.push({
name,
tableName: schema.tableName,
isArray: true,
foreignKey: schema.foreignKey,
schema,
});
2026-04-15 14:36:52 +08:00
break;
case "tuple":
2026-04-15 14:36:52 +08:00
for (const el of schema.elements) {
fields.push(...collectReferenceFields(el.schema, name));
}
break;
case "array":
2026-04-15 14:36:52 +08:00
fields.push(...collectReferenceFields(schema.element, name));
break;
case "union":
2026-04-15 14:36:52 +08:00
for (const member of schema.members) {
fields.push(...collectReferenceFields(member, name));
}
break;
}
return fields;
}
2026-04-15 13:58:14 +08:00
function parseValueWithReferences(
valueString: string,
schema: Schema,
refBaseDir: string | undefined,
defaultPrimaryKey: string,
currentFilePath: string | undefined,
currentRowPk?: unknown,
2026-04-15 13:58:14 +08:00
): unknown {
if (!hasNestedReferences(schema)) {
return parseValue(schema, valueString);
}
switch (schema.type) {
case "reference":
return parseReferenceValue(
schema,
valueString,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
);
case "reverseReference": {
if (currentRowPk === undefined) return [];
return resolveReverseReference(
schema,
currentRowPk,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
);
}
case "tuple": {
2026-04-15 13:58:14 +08:00
const parsed = parseValue(schema, valueString) as unknown[];
return schema.elements.map((el, i) =>
resolveNestedReferences(
parsed[i],
el.schema,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
currentRowPk,
),
2026-04-15 13:58:14 +08:00
);
}
case "array": {
2026-04-15 13:58:14 +08:00
const parsed = parseValue(schema, valueString) as unknown[];
return parsed.map((item) =>
resolveNestedReferences(
item,
schema.element,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
currentRowPk,
),
2026-04-15 13:58:14 +08:00
);
}
case "union": {
2026-04-15 13:58:14 +08:00
const errors: Error[] = [];
for (const member of schema.members) {
if (hasNestedReferences(member)) {
try {
const parsed = parseValue(member, valueString);
return resolveNestedReferences(
parsed,
member,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
currentRowPk,
);
2026-04-15 13:58:14 +08:00
} catch (e) {
errors.push(e instanceof Error ? e : new Error(String(e)));
}
}
}
if (
errors.length > 0 &&
errors.every((e) =>
/not found|Circular reference|Failed to load/.test(e.message),
)
) {
2026-04-15 13:58:14 +08:00
for (const member of schema.members) {
if (!hasNestedReferences(member)) {
try {
return parseValue(member, valueString);
} catch {}
}
}
}
return parseValue(schema, valueString);
}
default:
return parseValue(schema, valueString);
}
}
function resolveReverseReference(
schema: ReverseReferenceSchema,
pkValue: unknown,
refBaseDir: string | undefined,
defaultPrimaryKey: string,
currentFilePath: string | undefined,
): Record<string, unknown>[] {
const { refTable } = loadReferenceTable(
schema,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
);
const pkStr = String(pkValue);
return refTable.filter((row) => String(row[schema.foreignKey]) === pkStr);
}
2026-04-15 13:58:14 +08:00
function resolveNestedReferences(
value: unknown,
schema: Schema,
refBaseDir: string | undefined,
defaultPrimaryKey: string,
currentFilePath: string | undefined,
currentRowPk?: unknown,
2026-04-15 13:58:14 +08:00
): unknown {
switch (schema.type) {
case "reference": {
2026-04-17 11:41:06 +08:00
if (value === null || value === undefined) return value;
const { lookup } = loadReferenceTable(
schema,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
);
2026-04-15 13:58:14 +08:00
if (schema.isArray) {
const ids = Array.isArray(value) ? value : [value];
return ids.map((id) =>
resolveReferenceId(String(id), lookup, schema.tableName),
);
2026-04-15 13:58:14 +08:00
}
return resolveReferenceId(String(value), lookup, schema.tableName);
}
case "reverseReference": {
if (currentRowPk === undefined) return [];
const results = resolveReverseReference(
schema,
currentRowPk,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
);
return results;
}
case "tuple": {
2026-04-15 13:58:14 +08:00
if (!Array.isArray(value)) return value;
return schema.elements.map((el, i) =>
resolveNestedReferences(
value[i],
el.schema,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
currentRowPk,
),
2026-04-15 13:58:14 +08:00
);
}
case "array": {
2026-04-15 13:58:14 +08:00
if (!Array.isArray(value)) return value;
return value.map((item) =>
resolveNestedReferences(
item,
schema.element,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
currentRowPk,
),
2026-04-15 13:58:14 +08:00
);
}
case "union": {
2026-04-15 13:58:14 +08:00
const errors: Error[] = [];
for (const member of schema.members) {
if (hasNestedReferences(member)) {
try {
return resolveNestedReferences(
value,
member,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
currentRowPk,
);
2026-04-15 13:58:14 +08:00
} catch (e) {
errors.push(e instanceof Error ? e : new Error(String(e)));
}
}
}
if (errors.length > 0) {
throw errors[0];
}
return value;
}
default:
return value;
}
}
2026-03-31 13:02:29 +08:00
export interface CsvLoaderOptions {
delimiter?: string;
quote?: string;
escape?: string;
2026-03-31 14:45:02 +08:00
bom?: boolean;
comment?: string | false;
trim?: boolean;
/** Generate TypeScript declaration file (.d.ts) */
emitTypes?: boolean;
/** Output directory for generated type files (relative to output path) */
typesOutputDir?: string;
/** Write .d.ts files to disk (useful for dev server) */
writeToDisk?: boolean;
2026-04-11 22:56:01 +08:00
/** Base directory for resolving referenced CSV files (default: directory of current file) */
refBaseDir?: string;
/** Primary key field name for referenced tables (default: 'id') */
defaultPrimaryKey?: string;
/** Current file path (used to resolve relative references) */
currentFilePath?: string;
2026-04-15 14:36:52 +08:00
/**
* When false, reference fields store parsed IDs instead of resolved objects.
* Used by csvToModule to emit accessor-based code with lazy resolution.
* Default: true (resolves references eagerly by loading referenced CSV files).
*/
resolveReferences?: boolean;
}
export interface ReferenceFieldInfo {
/** Column name in the CSV */
name: string;
/** Referenced table name */
tableName: string;
/** Whether it's an array reference */
isArray: boolean;
/** The schema of this field (for nested references) */
schema: Schema;
/** For reverse references: the foreign key field name in the referenced table */
foreignKey?: string;
2026-03-31 13:02:29 +08:00
}
export interface CsvParseResult {
/** Parsed CSV data as array of objects */
data: Record<string, unknown>[];
/** Generated TypeScript type definition string (if emitTypes is true) */
typeDefinition?: string;
/** Property configurations for the CSV columns */
propertyConfigs: PropertyConfig[];
2026-04-11 22:56:01 +08:00
/** Referenced table names */
references: Set<string>;
2026-04-15 14:36:52 +08:00
/** Reference field metadata (populated when resolveReferences is false) */
referenceFields: ReferenceFieldInfo[];
/** Reverse reference declarations parsed from comment lines */
reverseReferences: ReverseReferenceDeclaration[];
}
2026-03-31 13:02:29 +08:00
interface PropertyConfig {
name: string;
schema: any;
validator: (value: unknown) => boolean;
parser: (valueString: string) => unknown;
2026-04-11 22:56:01 +08:00
/** Whether this property is a reference to another table */
isReference?: boolean;
/** Referenced table name (if isReference is true) */
referenceTableName?: string;
/** Whether it's an array reference */
referenceIsArray?: boolean;
/** Whether this is a reverse reference (one-to-many) */
isReverseReference?: boolean;
/** Foreign key field name for reverse references */
reverseReferenceForeignKey?: string;
}
/** Parsed reverse reference declaration from a comment line */
export interface ReverseReferenceDeclaration {
/** Field name in the current table */
fieldName: string;
/** Referenced table name */
tableName: string;
/** Foreign key field name in the referenced table */
foreignKey: string;
/** Whether it's optional */
isOptional: boolean;
/** The parsed schema */
schema: ReverseReferenceSchema;
}
/**
* Parse a reverse reference declaration from a comment line.
* Format: # fieldName := ~tableName(foreignKey)
* Returns null if the line is not a reverse reference declaration.
*/
function parseReverseReferenceDeclaration(
line: string,
): ReverseReferenceDeclaration | null {
const trimmed = line.trim();
// Must start with # (comment)
if (!trimmed.startsWith("#")) return null;
const content = trimmed.slice(1).trim();
// Match pattern: fieldName := ~tableName(foreignKey)
const match = content.match(/^(\w+)\s*:=\s*~(\w+)\((\w+)\)(\?)?$/);
if (!match) return null;
const [, fieldName, tableName, foreignKey, optionalMark] = match;
const isOptional = optionalMark === "?";
const schema: ReverseReferenceSchema = {
type: "reverseReference",
tableName,
foreignKey,
isOptional,
};
return {
fieldName,
tableName,
foreignKey,
isOptional,
schema,
};
2026-04-11 22:56:01 +08:00
}
/** Cache for loaded referenced tables */
const referenceTableCache = new Map<string, Record<string, unknown>[]>();
2026-04-15 13:58:14 +08:00
/** Set of file paths currently being loaded (to detect circular references) */
const loadingFiles = new Set<string>();
2026-04-11 22:56:01 +08:00
/**
* Parse and resolve a reference value.
* Loads the referenced table and replaces IDs with actual objects.
*/
function parseReferenceValue(
schema: ReferenceSchema,
valueString: string,
refBaseDir: string | undefined,
defaultPrimaryKey: string,
currentFilePath: string | undefined,
2026-04-11 22:56:01 +08:00
): unknown {
2026-04-17 11:41:06 +08:00
const trimmed = valueString.trim();
if (schema.isOptional && trimmed === "") {
2026-04-17 11:41:06 +08:00
return null;
}
const { lookup } = loadReferenceTable(
schema,
refBaseDir,
defaultPrimaryKey,
currentFilePath,
);
2026-04-17 11:41:06 +08:00
const valueParser = new ReferenceValueParser(trimmed);
2026-04-11 22:56:01 +08:00
const ids = valueParser.parseIds(schema.isArray);
2026-04-11 22:56:01 +08:00
if (schema.isArray) {
return ids.map((id) => resolveReferenceId(id, lookup, schema.tableName));
2026-04-11 22:56:01 +08:00
}
2026-04-15 13:58:14 +08:00
return resolveReferenceId(ids[0], lookup, schema.tableName);
2026-04-11 22:56:01 +08:00
}
/**
2026-04-17 17:27:19 +08:00
* Generate TypeScript interface for the CSV data
2026-04-11 22:56:01 +08:00
*/
class ReferenceValueParser {
private input: string;
private pos: number = 0;
constructor(input: string) {
this.input = input;
}
private peek(): string {
return this.input[this.pos] || "";
2026-04-11 22:56:01 +08:00
}
private consume(): string {
return this.input[this.pos++] || "";
2026-04-11 22:56:01 +08:00
}
private skipWhitespace(): void {
while (this.pos < this.input.length && /\s/.test(this.input[this.pos])) {
this.pos++;
}
}
private consumeStr(str: string): boolean {
if (this.input.slice(this.pos, this.pos + str.length) === str) {
this.pos += str.length;
return true;
}
return false;
}
parseIds(isArray: boolean): string[] {
this.skipWhitespace();
if (isArray) {
// Parse array format: [id1; id2; id3]
if (this.peek() === "[") {
2026-04-11 22:56:01 +08:00
this.consume();
}
this.skipWhitespace();
if (this.peek() === "]") {
2026-04-11 22:56:01 +08:00
this.consume();
return [];
}
const ids: string[] = [];
while (true) {
this.skipWhitespace();
let id = "";
while (
this.pos < this.input.length &&
this.peek() !== ";" &&
this.peek() !== "]"
) {
2026-04-11 22:56:01 +08:00
id += this.consume();
}
const trimmedId = id.trim();
if (trimmedId) {
ids.push(trimmedId);
}
this.skipWhitespace();
if (!this.consumeStr(";")) {
2026-04-11 22:56:01 +08:00
break;
}
}
this.skipWhitespace();
if (this.peek() === "]") {
2026-04-11 22:56:01 +08:00
this.consume();
}
return ids;
} else {
// Parse single ID
let id = "";
2026-04-11 22:56:01 +08:00
while (this.pos < this.input.length) {
const char = this.peek();
if (char === ";" || char === "]" || char === ",") {
2026-04-11 22:56:01 +08:00
break;
}
id += this.consume();
}
return [id.trim()];
}
}
2026-03-31 13:02:29 +08:00
}
/**
* Generate TypeScript interface for the CSV data
*/
function generateTypeDefinition(
resourceName: string,
2026-04-11 22:56:01 +08:00
propertyConfigs: PropertyConfig[],
references: Set<string>,
currentFilePath?: string,
): string {
const typeName = resourceName ? `${resourceName}Table` : "Table";
2026-04-15 14:46:03 +08:00
const currentTableName = currentFilePath
? path.basename(currentFilePath, path.extname(currentFilePath))
: undefined;
const singularType = resourceName
? resourceName.charAt(0).toUpperCase() + resourceName.slice(1)
: `${typeName}[number]`;
2026-04-11 22:56:01 +08:00
// Generate import statements for referenced tables
const imports: string[] = [];
const resourceNames = new Map<string, string>();
references.forEach((tableName) => {
2026-04-15 14:46:03 +08:00
if (tableName === currentTableName) {
resourceNames.set(tableName, singularType);
return;
}
2026-04-11 23:05:22 +08:00
// Convert table name to type name by capitalizing
const typeBase = tableName.charAt(0).toUpperCase() + tableName.slice(1);
2026-04-11 22:56:01 +08:00
resourceNames.set(tableName, typeBase);
2026-04-11 23:05:22 +08:00
// Generate import path based on current file path
let importPath: string;
if (currentFilePath) {
importPath = `./${tableName}.csv`;
} else {
importPath = `../${tableName}.csv`;
}
2026-04-11 22:56:01 +08:00
imports.push(`import type { ${typeBase} } from '${importPath}';`);
});
const importSection = imports.length > 0 ? imports.join("\n") + "\n\n" : "";
const properties = propertyConfigs
.map(
(config) =>
` readonly ${config.name}: ${schemaToTypeString(config.schema, resourceNames)};`,
)
.join("\n");
let exportAlias = "";
2026-04-11 23:05:22 +08:00
if (resourceName) {
const singularType =
resourceName.charAt(0).toUpperCase() + resourceName.slice(1);
2026-04-11 23:05:22 +08:00
exportAlias = `\nexport type ${singularType} = ${typeName}[number];`;
}
2026-04-15 14:52:41 +08:00
return `${importSection}type ${typeName} = readonly {
2026-04-15 14:36:52 +08:00
${properties}
}[];
${exportAlias}
declare function getData(): ${typeName};
export default getData;
`;
}
/**
* Parse CSV content string into structured data with schema validation.
* This is a standalone function that doesn't depend on webpack/rspack LoaderContext.
2026-04-11 22:56:01 +08:00
*
* @param content - CSV content string (must have at least headers + schema row + 1 data row)
* @param options - Parsing options
* @returns CsvParseResult containing parsed data and optional type definitions
*/
export function parseCsv(
content: string,
options: CsvLoaderOptions & { resourceName?: string } = {},
): CsvParseResult {
const delimiter = options.delimiter ?? ",";
const quote = options.quote ?? '"';
const escape = options.escape ?? "\\";
const bom = options.bom ?? true;
const comment =
options.comment === false ? undefined : (options.comment ?? "#");
const trim = options.trim ?? true;
const emitTypes = options.emitTypes ?? true;
2026-04-11 22:56:01 +08:00
const refBaseDir = options.refBaseDir;
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
2026-03-31 13:02:29 +08:00
const records = parse(content, {
delimiter,
quote,
escape,
2026-03-31 14:45:02 +08:00
bom,
comment: undefined, // Don't let csv-parse skip comments; we need to parse them for reverse references
2026-03-31 14:45:02 +08:00
trim,
2026-04-07 12:11:01 +08:00
skip_empty_lines: true,
2026-03-31 13:02:29 +08:00
relax_column_count: true,
});
if (records.length < 2) {
throw new Error("CSV must have at least 2 rows: headers and schemas");
2026-03-31 13:02:29 +08:00
}
const headers = records[0];
const schemas = records[1];
if (headers.length !== schemas.length) {
throw new Error(
`Header count (${headers.length}) does not match schema count (${schemas.length})`,
2026-03-31 13:02:29 +08:00
);
}
// Parse reverse reference declarations from comment lines between schema row and data rows
const reverseReferences: ReverseReferenceDeclaration[] = [];
const dataRows: string[][] = [];
for (let i = 2; i < records.length; i++) {
const row = records[i];
// Check if this is a single-column row starting with # (comment with reverse ref declaration)
const firstCell = (row[0] ?? "").trim();
if (firstCell.startsWith("#")) {
const decl = parseReverseReferenceDeclaration(firstCell);
if (decl) {
reverseReferences.push(decl);
}
// Skip comment lines (whether or not they're reverse ref declarations)
continue;
}
dataRows.push(row);
}
// Also check schema row cells for comment-prefixed reverse reference declarations
// (in case they appear as schema cells rather than separate rows)
for (let col = 0; col < schemas.length; col++) {
const cell = (schemas[col] ?? "").trim();
if (cell.startsWith("#")) {
const decl = parseReverseReferenceDeclaration(cell);
if (decl) {
reverseReferences.push(decl);
}
}
}
2026-04-15 14:36:52 +08:00
const resolveReferences = options.resolveReferences ?? true;
const propertyConfigs: PropertyConfig[] = headers.map(
(header: string, index: number) => {
const schemaString = schemas[index];
const schema = parseSchema(schemaString);
const config: PropertyConfig = {
name: header,
schema,
validator: createValidator(schema),
parser: (valueString: string) => parseValue(schema, valueString),
};
if (schema.type === "reference") {
config.isReference = true;
config.referenceTableName = schema.tableName;
config.referenceIsArray = schema.isArray;
if (resolveReferences) {
config.parser = (valueString: string) => {
return parseReferenceValue(
schema,
valueString,
refBaseDir,
defaultPrimaryKey,
options.currentFilePath,
);
};
} else {
config.parser = (valueString: string) => {
return parseReferenceIds(schema, valueString);
};
}
} else if (hasNestedReferences(schema)) {
config.isReference = true;
if (resolveReferences) {
config.parser = (valueString: string) => {
return parseValueWithReferences(
valueString,
schema,
refBaseDir,
defaultPrimaryKey,
options.currentFilePath,
);
};
} else {
config.parser = (valueString: string) => {
return parseValueWithReferenceIds(valueString, schema);
};
}
}
return config;
},
);
// Add reverse reference property configs
for (const decl of reverseReferences) {
2026-04-11 22:56:01 +08:00
const config: PropertyConfig = {
name: decl.fieldName,
schema: decl.schema,
validator: createValidator(decl.schema),
parser: (_valueString: string) => {
// Reverse references are resolved after all rows are parsed
return null;
},
isReference: true,
isReverseReference: true,
referenceTableName: decl.tableName,
referenceIsArray: true,
reverseReferenceForeignKey: decl.foreignKey,
2026-03-31 13:02:29 +08:00
};
propertyConfigs.push(config);
}
2026-04-11 22:56:01 +08:00
2026-04-15 13:24:51 +08:00
// Collect all referenced tables (including nested references in tuples/arrays)
2026-04-11 22:56:01 +08:00
const references = new Set<string>();
2026-04-15 13:24:51 +08:00
function collectReferences(schema: Schema): void {
if (schema.type === "reference") {
2026-04-15 13:24:51 +08:00
references.add(schema.tableName);
} else if (schema.type === "reverseReference") {
references.add(schema.tableName);
} else if (schema.type === "tuple") {
schema.elements.forEach((el) => collectReferences(el.schema));
} else if (schema.type === "array") {
2026-04-15 13:24:51 +08:00
collectReferences(schema.element);
} else if (schema.type === "union") {
schema.members.forEach((m) => collectReferences(m));
2026-04-15 13:24:51 +08:00
}
}
propertyConfigs.forEach((config) => {
2026-04-11 22:56:01 +08:00
if (config.isReference && config.referenceTableName) {
references.add(config.referenceTableName);
}
2026-04-15 13:24:51 +08:00
collectReferences(config.schema);
2026-03-31 13:02:29 +08:00
});
const objects = dataRows.map((row: string[], rowIndex: number) => {
const obj: Record<string, unknown> = {};
propertyConfigs.forEach((config, colIndex) => {
// Skip reverse reference columns — they don't have CSV cell data
if (config.isReverseReference) {
return;
}
const rawValue = row[colIndex] ?? "";
2026-03-31 13:02:29 +08:00
try {
const parsed = config.parser(rawValue);
2026-04-11 22:56:01 +08:00
// Skip validation for reference fields (validation happens during reference resolution)
if (!config.isReference && !config.validator(parsed)) {
2026-03-31 13:02:29 +08:00
throw new Error(
`Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}`,
2026-03-31 13:02:29 +08:00
);
}
obj[config.name] = parsed;
} catch (error) {
if (error instanceof Error) {
throw new Error(
`Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}`,
2026-03-31 13:02:29 +08:00
);
}
throw error;
}
});
return obj;
});
// Resolve reverse references after all rows are parsed
if (resolveReferences) {
for (const decl of reverseReferences) {
for (const obj of objects) {
const pkValue = obj[defaultPrimaryKey];
if (pkValue !== undefined) {
const resolved = resolveReverseReference(
decl.schema,
pkValue,
refBaseDir,
defaultPrimaryKey,
options.currentFilePath,
);
obj[decl.fieldName] =
decl.isOptional && resolved.length === 0 ? null : resolved;
} else {
obj[decl.fieldName] = decl.isOptional ? null : [];
}
}
}
}
2026-04-15 14:36:52 +08:00
const referenceFields: ReferenceFieldInfo[] = [];
if (!resolveReferences) {
for (const config of propertyConfigs) {
if (hasNestedReferences(config.schema)) {
referenceFields.push(
...collectReferenceFields(config.schema, config.name),
);
2026-04-15 14:36:52 +08:00
}
}
// Reverse reference fields are already included by collectReferenceFields
// above (which handles the reverseReference schema type), so no additional
// loop is needed here.
2026-04-15 14:36:52 +08:00
}
const result: CsvParseResult = {
data: objects,
propertyConfigs,
2026-04-11 22:56:01 +08:00
references,
2026-04-15 14:36:52 +08:00
referenceFields,
reverseReferences,
};
if (emitTypes) {
2026-04-11 22:56:01 +08:00
result.typeDefinition = generateTypeDefinition(
options.resourceName || "",
2026-04-11 22:56:01 +08:00
propertyConfigs,
references,
options.currentFilePath,
2026-04-11 22:56:01 +08:00
);
}
return result;
}
2026-04-15 14:36:52 +08:00
/**
* Generate runtime reference resolution code for a schema.
* Returns a JS expression string that resolves references using lookup maps.
*/
function generateSchemaResolutionCode(
schema: Schema,
valueExpr: string,
lookupVar: (tableName: string) => string,
pkField: string,
reverseLookupVar?: (tableName: string, foreignKey: string) => string,
2026-04-15 14:36:52 +08:00
): string {
switch (schema.type) {
case "reference": {
2026-04-15 14:36:52 +08:00
const lookup = lookupVar(schema.tableName);
2026-04-17 11:41:06 +08:00
if (schema.isOptional) {
if (schema.isArray) {
return `(${valueExpr} === null || ${valueExpr} === undefined ? ${valueExpr} : (Array.isArray(${valueExpr}) ? ${valueExpr}.map(id => ${lookup}.get(String(id))) : ${lookup}.get(String(${valueExpr}))))`;
}
return `(${valueExpr} === null || ${valueExpr} === undefined ? ${valueExpr} : ${lookup}.get(String(${valueExpr})))`;
}
2026-04-15 14:36:52 +08:00
if (schema.isArray) {
return `(Array.isArray(${valueExpr}) ? ${valueExpr}.map(id => ${lookup}.get(String(id))) : ${valueExpr})`;
}
return `${lookup}.get(String(${valueExpr}))`;
}
case "reverseReference": {
if (!reverseLookupVar) return valueExpr;
const reverseLookup = reverseLookupVar(
schema.tableName,
schema.foreignKey,
);
if (schema.isOptional) {
return `(${reverseLookup}.get(String(row.${pkField})) || null)`;
}
return `(${reverseLookup}.get(String(row.${pkField})) || [])`;
}
case "tuple": {
2026-04-15 14:36:52 +08:00
const elementResolvers = schema.elements.map((el, i) => {
if (hasNestedReferences(el.schema)) {
return generateSchemaResolutionCode(
el.schema,
`${valueExpr}[${i}]`,
lookupVar,
pkField,
reverseLookupVar,
);
2026-04-15 14:36:52 +08:00
}
return `${valueExpr}[${i}]`;
});
return `[${elementResolvers.join(", ")}]`;
2026-04-15 14:36:52 +08:00
}
case "array": {
2026-04-15 14:36:52 +08:00
if (hasNestedReferences(schema.element)) {
const itemResolve = generateSchemaResolutionCode(
schema.element,
"item",
lookupVar,
pkField,
reverseLookupVar,
);
2026-04-15 14:36:52 +08:00
return `(${valueExpr}).map(item => ${itemResolve})`;
}
return valueExpr;
}
case "union": {
const refMembers = schema.members.filter((m) => hasNestedReferences(m));
const nonRefMembers = schema.members.filter(
(m) => !hasNestedReferences(m),
);
2026-04-15 14:46:03 +08:00
const resolveParts: string[] = [];
2026-04-15 14:36:52 +08:00
for (const member of refMembers) {
const resolveCode = generateSchemaResolutionCode(
member,
valueExpr,
lookupVar,
pkField,
reverseLookupVar,
);
2026-04-15 14:46:03 +08:00
resolveParts.push(resolveCode);
2026-04-15 14:36:52 +08:00
}
if (nonRefMembers.length > 0) {
2026-04-15 14:46:03 +08:00
resolveParts.push(valueExpr);
2026-04-15 14:36:52 +08:00
}
2026-04-15 14:46:03 +08:00
if (resolveParts.length === 0) return valueExpr;
if (resolveParts.length === 1) return resolveParts[0];
return `(${resolveParts.join(" ?? ")})`;
2026-04-15 14:36:52 +08:00
}
default:
return valueExpr;
}
}
/**
* Generate JavaScript module code from CSV content.
2026-04-15 14:36:52 +08:00
* Emits an accessor function for tables with references (lazy resolution),
* or static JSON for tables without references.
*/
export function csvToModule(
content: string,
options: CsvLoaderOptions & { resourceName?: string } = {},
): { js: string; dts?: string } {
2026-04-15 14:36:52 +08:00
const result = parseCsv(content, { ...options, resolveReferences: false });
const hasRefs =
result.referenceFields.length > 0 || result.reverseReferences.length > 0;
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
2026-04-15 14:36:52 +08:00
const imports: string[] = [];
const lookupInits: string[] = [];
const lookupVarMap = new Map<string, string>();
// Reverse lookup maps: grouped by (tableName, foreignKey)
const reverseLookupInits: string[] = [];
const reverseLookupVarMap = new Map<string, string>();
2026-04-15 14:46:03 +08:00
const currentTableName = options.currentFilePath
? path.basename(
options.currentFilePath,
path.extname(options.currentFilePath),
)
2026-04-15 14:46:03 +08:00
: undefined;
// Build forward lookup maps for referenced tables
const uniqueTables = new Set(result.referenceFields.map((f) => f.tableName));
// Also include tables from reverse references
for (const decl of result.reverseReferences) {
uniqueTables.add(decl.tableName);
}
uniqueTables.forEach((tableName) => {
2026-04-15 14:46:03 +08:00
const lookupVar = `_${tableName}Lookup`;
lookupVarMap.set(tableName, lookupVar);
if (tableName === currentTableName) {
lookupInits.push(
`const ${lookupVar} = new Map(_raw.map(p => [String(p.${defaultPrimaryKey}), p]));`,
2026-04-15 14:46:03 +08:00
);
} else {
const varName = `_${tableName}`;
imports.push(`import ${varName} from './${tableName}.csv';`);
lookupInits.push(
`const ${lookupVar} = new Map(${varName}().map(p => [String(p.${defaultPrimaryKey}), p]));`,
2026-04-15 14:46:03 +08:00
);
}
2026-04-15 14:36:52 +08:00
});
// Build reverse lookup maps for reverse references
for (const decl of result.reverseReferences) {
const key = `${decl.tableName}:${decl.foreignKey}`;
if (reverseLookupVarMap.has(key)) continue;
const revLookupVar = `_${decl.tableName}By_${decl.foreignKey}`;
reverseLookupVarMap.set(key, revLookupVar);
if (decl.tableName === currentTableName) {
reverseLookupInits.push(
`const ${revLookupVar} = new Map();`,
`for (const r of _raw) {`,
` const k = String(r.${decl.foreignKey});`,
` if (!${revLookupVar}.has(k)) ${revLookupVar}.set(k, []);`,
` ${revLookupVar}.get(k).push(r);`,
`}`,
);
} else {
const varName = `_${decl.tableName}`;
reverseLookupInits.push(
`const ${revLookupVar} = new Map();`,
`for (const r of ${varName}()) {`,
` const k = String(r.${decl.foreignKey});`,
` if (!${revLookupVar}.has(k)) ${revLookupVar}.set(k, []);`,
` ${revLookupVar}.get(k).push(r);`,
`}`,
);
}
}
2026-04-15 14:36:52 +08:00
const lookupVar = (tableName: string) => lookupVarMap.get(tableName)!;
const reverseLookupVar = (tableName: string, foreignKey: string) =>
reverseLookupVarMap.get(`${tableName}:${foreignKey}`)!;
2026-04-15 14:36:52 +08:00
const rowResolvers: string[] = [];
for (const config of result.propertyConfigs) {
if (config.isReverseReference) {
// Reverse reference resolution
const decl = result.reverseReferences.find(
(d) => d.fieldName === config.name,
);
if (decl) {
const revLookup = reverseLookupVar(decl.tableName, decl.foreignKey);
if (decl.isOptional) {
rowResolvers.push(
` ${config.name}: (${revLookup}.get(String(row.${defaultPrimaryKey})) || null),`,
);
} else {
rowResolvers.push(
` ${config.name}: (${revLookup}.get(String(row.${defaultPrimaryKey})) || []),`,
);
}
}
} else if (hasNestedReferences(config.schema)) {
2026-04-15 14:36:52 +08:00
const resolveCode = generateSchemaResolutionCode(
config.schema,
`row.${config.name}`,
lookupVar,
defaultPrimaryKey,
reverseLookupVar,
2026-04-15 14:36:52 +08:00
);
rowResolvers.push(` ${config.name}: ${resolveCode},`);
}
}
const rawJson = JSON.stringify(result.data, null, 2);
2026-04-15 14:52:41 +08:00
const js = [
...imports,
"",
2026-04-15 14:52:41 +08:00
`const _raw = ${rawJson};`,
"",
"let _resolved = null;",
"",
"export default function getData() {",
" if (_resolved) return _resolved;",
" _resolved = _raw;",
...lookupInits.map((l) => ` ${l}`),
...reverseLookupInits.map((l) => ` ${l}`),
...(rowResolvers.length > 0
? [
" _resolved = _raw.map(row => ({",
" ...row,",
...rowResolvers,
" }));",
]
: []),
" return _resolved;",
"}",
].join("\n");
return {
js,
dts: result.typeDefinition,
};
}