inline-schema/src/csv-loader/loader.ts

384 lines
12 KiB
TypeScript
Raw Normal View History

import { parse } from "csv-parse/sync";
import {
parseSchema,
createValidator,
parseValue,
schemaToTypeString,
} from "../index.js";
import type {
Schema,
ReferenceSchema,
ReverseReferenceSchema,
} from "../types.js";
import type {
CsvLoaderOptions,
ReferenceFieldInfo,
CsvParseResult,
PropertyConfig,
ReverseReferenceDeclaration,
TypeDeclaration,
} from "./types.js";
import { ParseError } from "../parser.js";
import {
hasNestedReferences,
loadReferenceTable,
resolveReferenceId,
parseReferenceIds,
parseValueWithReferenceIds,
extractNestedReferenceIds,
collectReferenceFields,
parseValueWithReferences,
resolveReverseReference,
resolveNestedReferences,
parseReferenceValue,
} from "./reference-resolver.js";
import { generateTypeDefinition } from "./type-gen.js";
import { csvToModule } from "./module-gen.js";
import * as fs from "fs";
import * as path from "path";
import {
parseTypeDeclaration,
parseReverseReferenceDeclaration,
expandSchemaString,
resolveTypeReferences,
} from "./type-declarations.js";
2026-04-11 22:56:01 +08:00
/**
* Parse CSV content string into structured data with schema validation.
* This is a standalone function that doesn't depend on webpack/rspack LoaderContext.
2026-04-11 22:56:01 +08:00
*
* @param content - CSV content string (must have at least headers + schema row + 1 data row)
* @param options - Parsing options
* @returns CsvParseResult containing parsed data and optional type definitions
*/
export function parseCsv(
content: string,
options: CsvLoaderOptions & { resourceName?: string } = {},
): CsvParseResult {
const delimiter = options.delimiter ?? ",";
const quote = options.quote ?? '"';
const escape = options.escape ?? "\\";
const bom = options.bom ?? true;
const comment =
options.comment === false ? undefined : (options.comment ?? "#");
const trim = options.trim ?? true;
const emitTypes = options.emitTypes ?? true;
2026-04-11 22:56:01 +08:00
const refBaseDir = options.refBaseDir;
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
2026-03-31 13:02:29 +08:00
// Pre-strip comment lines from content before passing to csv-parse,
// to avoid quote parsing errors in comment lines containing double quotes.
const reverseReferences: ReverseReferenceDeclaration[] = [];
// Store raw type declarations (name + schema string) first, resolve after all names are known
const typeDeclarationsRaw: { typeName: string; schemaString: string }[] = [];
let filteredContent = content;
if (comment) {
const lines = content.split(/\r?\n/);
const nonCommentLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith(comment)) {
// Try to parse as type declaration first
const typeDecl = parseTypeDeclaration(trimmed, comment);
if (typeDecl) {
typeDeclarationsRaw.push(typeDecl);
continue; // Skip type declaration lines
}
// Try to parse as reverse reference
const decl = parseReverseReferenceDeclaration(trimmed, comment);
if (decl) {
reverseReferences.push(decl);
continue; // Skip reverse reference lines
}
// Regular comment line - strip it (csv-parse can't handle quotes in comments)
continue;
}
nonCommentLines.push(line);
}
filteredContent = nonCommentLines.join("\n");
}
const records = parse(filteredContent, {
2026-03-31 13:02:29 +08:00
delimiter,
quote,
escape,
2026-03-31 14:45:02 +08:00
bom,
comment: undefined,
2026-03-31 14:45:02 +08:00
trim,
2026-04-07 12:11:01 +08:00
skip_empty_lines: true,
2026-03-31 13:02:29 +08:00
relax_column_count: true,
});
// Comment lines were already filtered out before parsing
const filteredRecords = records;
if (filteredRecords.length < 2) {
throw new Error("CSV must have at least 2 rows: headers and schemas");
}
const headers = filteredRecords[0];
const schemas = filteredRecords[1];
if (headers.length !== schemas.length) {
throw new Error(
`Header count (${headers.length}) does not match schema count (${schemas.length})`,
);
}
const dataRows = filteredRecords.slice(2);
// Also check schema row cells for comment-prefixed type declarations
// and reverse reference declarations
for (let col = 0; col < schemas.length; col++) {
const cell = (schemas[col] ?? "").trim();
if (comment && cell.startsWith(comment)) {
// Try type declaration first
const typeDecl = parseTypeDeclaration(cell, comment);
if (typeDecl) {
typeDeclarationsRaw.push(typeDecl);
continue;
}
// Try reverse reference
const decl = parseReverseReferenceDeclaration(cell, comment);
if (decl) {
reverseReferences.push(decl);
}
}
}
// Build a map of declared type names first
const declaredTypeNames = new Set<string>();
for (const decl of typeDeclarationsRaw) {
declaredTypeNames.add(decl.typeName);
}
// Build a map of schema strings for expansion (only stores string schemas initially)
const declaredSchemaStrings = new Map<string, string>();
for (const decl of typeDeclarationsRaw) {
// If the schema is a string literal union, store it for expansion
declaredSchemaStrings.set(decl.typeName, decl.schemaString);
}
// Parse type declarations with expansion of type name references
const typeDeclarationsParsed: { name: string; schema: Schema }[] = [];
for (const decl of typeDeclarationsRaw) {
// Expand any type name references before parsing
const expandedSchema = expandSchemaString(
decl.schemaString,
declaredSchemaStrings,
);
const schema = parseSchema(expandedSchema.trim());
typeDeclarationsParsed.push({ name: decl.typeName, schema });
}
// Build declared types map
const declaredTypes = new Map<string, Schema>();
for (const decl of typeDeclarationsParsed) {
declaredTypes.set(decl.name, decl.schema);
}
// Now resolve all type references within type declarations (for nested type refs)
const typeDeclarations: TypeDeclaration[] = [];
for (const decl of typeDeclarationsParsed) {
const resolvedSchema = resolveTypeReferences(decl.schema, declaredTypes);
typeDeclarations.push({ name: decl.name, schema: resolvedSchema });
}
// Update declaredTypes with resolved schemas for column schema lookup
for (const decl of typeDeclarations) {
declaredTypes.set(decl.name, decl.schema);
}
2026-04-15 14:36:52 +08:00
const resolveReferences = options.resolveReferences ?? true;
const propertyConfigs: PropertyConfig[] = headers.map(
(header: string, index: number) => {
const schemaString = schemas[index];
// Check if schema string matches a declared type name
let schema: Schema;
let declaredTypeName: string | undefined;
if (declaredTypes.has(schemaString)) {
schema = declaredTypes.get(schemaString)!;
declaredTypeName = schemaString;
} else {
schema = parseSchema(schemaString);
}
const config: PropertyConfig = {
name: header,
schema,
validator: createValidator(schema),
parser: (valueString: string) => parseValue(schema, valueString),
declaredTypeName,
};
if (schema.type === "reference") {
config.isReference = true;
config.referenceTableName = schema.tableName;
config.referenceIsArray = schema.isArray;
if (resolveReferences) {
config.parser = (valueString: string) => {
return parseReferenceValue(
schema,
valueString,
refBaseDir,
defaultPrimaryKey,
options.currentFilePath,
);
};
} else {
config.parser = (valueString: string) => {
return parseReferenceIds(schema, valueString);
};
}
} else if (hasNestedReferences(schema)) {
config.isReference = true;
if (resolveReferences) {
config.parser = (valueString: string) => {
return parseValueWithReferences(
valueString,
schema,
refBaseDir,
defaultPrimaryKey,
options.currentFilePath,
);
};
} else {
config.parser = (valueString: string) => {
return parseValueWithReferenceIds(valueString, schema);
};
}
}
return config;
},
);
// Add reverse reference property configs
for (const decl of reverseReferences) {
2026-04-11 22:56:01 +08:00
const config: PropertyConfig = {
name: decl.fieldName,
schema: decl.schema,
validator: createValidator(decl.schema),
parser: (_valueString: string) => {
// Reverse references are resolved after all rows are parsed
return null;
},
isReference: true,
isReverseReference: true,
referenceTableName: decl.tableName,
referenceIsArray: true,
reverseReferenceForeignKey: decl.foreignKey,
2026-03-31 13:02:29 +08:00
};
propertyConfigs.push(config);
}
2026-04-11 22:56:01 +08:00
2026-04-15 13:24:51 +08:00
// Collect all referenced tables (including nested references in tuples/arrays)
2026-04-11 22:56:01 +08:00
const references = new Set<string>();
2026-04-15 13:24:51 +08:00
function collectReferences(schema: Schema): void {
if (schema.type === "reference") {
2026-04-15 13:24:51 +08:00
references.add(schema.tableName);
} else if (schema.type === "reverseReference") {
references.add(schema.tableName);
} else if (schema.type === "tuple") {
schema.elements.forEach((el) => collectReferences(el.schema));
} else if (schema.type === "array") {
2026-04-15 13:24:51 +08:00
collectReferences(schema.element);
} else if (schema.type === "union") {
schema.members.forEach((m) => collectReferences(m));
2026-04-15 13:24:51 +08:00
}
}
propertyConfigs.forEach((config) => {
2026-04-11 22:56:01 +08:00
if (config.isReference && config.referenceTableName) {
references.add(config.referenceTableName);
}
2026-04-15 13:24:51 +08:00
collectReferences(config.schema);
2026-03-31 13:02:29 +08:00
});
const objects = dataRows.map((row: string[], rowIndex: number) => {
const obj: Record<string, unknown> = {};
propertyConfigs.forEach((config, colIndex) => {
// Skip reverse reference columns — they don't have CSV cell data
if (config.isReverseReference) {
return;
}
const rawValue = row[colIndex] ?? "";
2026-03-31 13:02:29 +08:00
try {
const parsed = config.parser(rawValue);
2026-04-11 22:56:01 +08:00
// Skip validation for reference fields (validation happens during reference resolution)
if (!config.isReference && !config.validator(parsed)) {
2026-03-31 13:02:29 +08:00
throw new Error(
`Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}`,
2026-03-31 13:02:29 +08:00
);
}
obj[config.name] = parsed;
} catch (error) {
if (error instanceof Error) {
throw new Error(
`Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}`,
2026-03-31 13:02:29 +08:00
);
}
throw error;
}
});
return obj;
});
// Resolve reverse references after all rows are parsed
if (resolveReferences) {
for (const decl of reverseReferences) {
for (const obj of objects) {
const pkValue = obj[defaultPrimaryKey];
if (pkValue !== undefined) {
const resolved = resolveReverseReference(
decl.schema,
pkValue,
refBaseDir,
defaultPrimaryKey,
options.currentFilePath,
);
obj[decl.fieldName] =
decl.isOptional && resolved.length === 0 ? null : resolved;
} else {
obj[decl.fieldName] = decl.isOptional ? null : [];
}
}
}
}
2026-04-15 14:36:52 +08:00
const referenceFields: ReferenceFieldInfo[] = [];
if (!resolveReferences) {
for (const config of propertyConfigs) {
if (hasNestedReferences(config.schema)) {
referenceFields.push(
...collectReferenceFields(config.schema, config.name),
);
2026-04-15 14:36:52 +08:00
}
}
// Reverse reference fields are already included by collectReferenceFields
// above (which handles the reverseReference schema type), so no additional
// loop is needed here.
2026-04-15 14:36:52 +08:00
}
const result: CsvParseResult = {
data: objects,
propertyConfigs,
2026-04-11 22:56:01 +08:00
references,
2026-04-15 14:36:52 +08:00
referenceFields,
reverseReferences,
typeDeclarations,
};
if (emitTypes) {
2026-04-11 22:56:01 +08:00
result.typeDefinition = generateTypeDefinition(
options.resourceName || "",
2026-04-11 22:56:01 +08:00
propertyConfigs,
references,
options.currentFilePath,
typeDeclarations,
2026-04-11 22:56:01 +08:00
);
}
return result;
}