407 lines
13 KiB
TypeScript
407 lines
13 KiB
TypeScript
import { parse } from "csv-parse/sync";
|
|
import {
|
|
parseSchema,
|
|
createValidator,
|
|
parseValue,
|
|
schemaToTypeString,
|
|
} from "../index.js";
|
|
import type {
|
|
Schema,
|
|
ReferenceSchema,
|
|
ReverseReferenceSchema,
|
|
} from "../types.js";
|
|
import type {
|
|
CsvLoaderOptions,
|
|
ReferenceFieldInfo,
|
|
CsvParseResult,
|
|
PropertyConfig,
|
|
ReverseReferenceDeclaration,
|
|
TypeDeclaration,
|
|
} from "./types.js";
|
|
import { ParseError } from "../parser.js";
|
|
import {
|
|
hasNestedReferences,
|
|
loadReferenceTable,
|
|
resolveReferenceId,
|
|
parseReferenceIds,
|
|
parseValueWithReferenceIds,
|
|
extractNestedReferenceIds,
|
|
collectReferenceFields,
|
|
parseValueWithReferences,
|
|
resolveReverseReference,
|
|
resolveNestedReferences,
|
|
parseReferenceValue,
|
|
} from "./reference-resolver.js";
|
|
import { generateTypeDefinition } from "./type-gen.js";
|
|
import { csvToModule } from "./module-gen.js";
|
|
import * as fs from "fs";
|
|
import * as path from "path";
|
|
import {
|
|
parseTypeDeclaration,
|
|
parseReverseReferenceDeclaration,
|
|
expandSchemaString,
|
|
resolveTypeReferences,
|
|
} from "./type-declarations.js";
|
|
|
|
/**
|
|
* Parse CSV content string into structured data with schema validation.
|
|
* This is a standalone function that doesn't depend on webpack/rspack LoaderContext.
|
|
*
|
|
* @param content - CSV content string (must have at least headers + schema row + 1 data row)
|
|
* @param options - Parsing options
|
|
* @returns CsvParseResult containing parsed data and optional type definitions
|
|
*/
|
|
export function parseCsv(
|
|
content: string,
|
|
options: CsvLoaderOptions & { resourceName?: string } = {},
|
|
): CsvParseResult {
|
|
const delimiter = options.delimiter ?? ",";
|
|
const quote = options.quote ?? '"';
|
|
const escape = options.escape ?? "\\";
|
|
const bom = options.bom ?? true;
|
|
const comment =
|
|
options.comment === false ? undefined : (options.comment ?? "#");
|
|
const trim = options.trim ?? true;
|
|
const emitTypes = options.emitTypes ?? true;
|
|
const refBaseDir = options.refBaseDir;
|
|
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
|
|
|
|
// Pre-strip comment lines from content before passing to csv-parse,
|
|
// to avoid quote parsing errors in comment lines containing double quotes.
|
|
const reverseReferences: ReverseReferenceDeclaration[] = [];
|
|
// Store raw type declarations (name + schema string) first, resolve after all names are known
|
|
const typeDeclarationsRaw: { typeName: string; schemaString: string }[] = [];
|
|
let filteredContent = content;
|
|
if (comment) {
|
|
const lines = content.split(/\r?\n/);
|
|
const nonCommentLines: string[] = [];
|
|
for (const line of lines) {
|
|
const trimmed = line.trim();
|
|
if (trimmed.startsWith(comment)) {
|
|
// Try to parse as type declaration first
|
|
const typeDecl = parseTypeDeclaration(trimmed, comment);
|
|
if (typeDecl) {
|
|
typeDeclarationsRaw.push(typeDecl);
|
|
continue; // Skip type declaration lines
|
|
}
|
|
// Try to parse as reverse reference
|
|
const decl = parseReverseReferenceDeclaration(trimmed, comment);
|
|
if (decl) {
|
|
reverseReferences.push(decl);
|
|
continue; // Skip reverse reference lines
|
|
}
|
|
// Regular comment line - strip it (csv-parse can't handle quotes in comments)
|
|
continue;
|
|
}
|
|
nonCommentLines.push(line);
|
|
}
|
|
filteredContent = nonCommentLines.join("\n");
|
|
}
|
|
|
|
const records = parse(filteredContent, {
|
|
delimiter,
|
|
quote,
|
|
escape,
|
|
bom,
|
|
comment: undefined,
|
|
trim,
|
|
skip_empty_lines: true,
|
|
relax_column_count: true,
|
|
});
|
|
|
|
// Comment lines were already filtered out before parsing
|
|
const filteredRecords = records;
|
|
|
|
if (filteredRecords.length < 2) {
|
|
throw new Error("CSV must have at least 2 rows: headers and schemas");
|
|
}
|
|
|
|
const headers = filteredRecords[0];
|
|
const schemas = filteredRecords[1];
|
|
|
|
if (headers.length !== schemas.length) {
|
|
throw new Error(
|
|
`Header count (${headers.length}) does not match schema count (${schemas.length})`,
|
|
);
|
|
}
|
|
|
|
const dataRows = filteredRecords.slice(2);
|
|
|
|
// Also check schema row cells for comment-prefixed type declarations
|
|
// and reverse reference declarations
|
|
for (let col = 0; col < schemas.length; col++) {
|
|
const cell = (schemas[col] ?? "").trim();
|
|
if (comment && cell.startsWith(comment)) {
|
|
// Try type declaration first
|
|
const typeDecl = parseTypeDeclaration(cell, comment);
|
|
if (typeDecl) {
|
|
typeDeclarationsRaw.push(typeDecl);
|
|
continue;
|
|
}
|
|
// Try reverse reference
|
|
const decl = parseReverseReferenceDeclaration(cell, comment);
|
|
if (decl) {
|
|
reverseReferences.push(decl);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build a map of declared type names first
|
|
const declaredTypeNames = new Set<string>();
|
|
for (const decl of typeDeclarationsRaw) {
|
|
declaredTypeNames.add(decl.typeName);
|
|
}
|
|
|
|
// Build a map of schema strings for expansion (only stores string schemas initially)
|
|
const declaredSchemaStrings = new Map<string, string>();
|
|
for (const decl of typeDeclarationsRaw) {
|
|
// If the schema is a string literal union, store it for expansion
|
|
declaredSchemaStrings.set(decl.typeName, decl.schemaString);
|
|
}
|
|
|
|
// Parse type declarations with expansion of type name references
|
|
const typeDeclarationsParsed: {
|
|
name: string;
|
|
schema: Schema;
|
|
schemaString: string;
|
|
}[] = [];
|
|
for (const decl of typeDeclarationsRaw) {
|
|
// Expand any type name references before parsing
|
|
const expandedSchema = expandSchemaString(
|
|
decl.schemaString,
|
|
declaredSchemaStrings,
|
|
);
|
|
const schema = parseSchema(expandedSchema.trim());
|
|
typeDeclarationsParsed.push({
|
|
name: decl.typeName,
|
|
schema,
|
|
schemaString: decl.schemaString,
|
|
});
|
|
}
|
|
|
|
// Build declared types map
|
|
const declaredTypes = new Map<string, Schema>();
|
|
for (const decl of typeDeclarationsParsed) {
|
|
declaredTypes.set(decl.name, decl.schema);
|
|
}
|
|
|
|
// Now resolve all type references within type declarations (for nested type refs)
|
|
const typeDeclarations: TypeDeclaration[] = [];
|
|
for (const decl of typeDeclarationsParsed) {
|
|
const resolvedSchema = resolveTypeReferences(decl.schema, declaredTypes);
|
|
typeDeclarations.push({
|
|
name: decl.name,
|
|
schema: resolvedSchema,
|
|
schemaString: decl.schemaString,
|
|
});
|
|
}
|
|
|
|
// Update declaredTypes with resolved schemas for column schema lookup
|
|
for (const decl of typeDeclarations) {
|
|
declaredTypes.set(decl.name, decl.schema);
|
|
}
|
|
|
|
const resolveReferences = options.resolveReferences ?? true;
|
|
|
|
const propertyConfigs: PropertyConfig[] = headers.map(
|
|
(header: string, index: number) => {
|
|
const schemaString = schemas[index];
|
|
// Check if schema string matches a declared type name
|
|
let schema: Schema;
|
|
let declaredTypeName: string | undefined;
|
|
let columnSchemaString: string | undefined;
|
|
if (declaredTypes.has(schemaString)) {
|
|
schema = declaredTypes.get(schemaString)!;
|
|
declaredTypeName = schemaString;
|
|
} else {
|
|
// Expand any custom type name references before parsing
|
|
const expandedSchema = expandSchemaString(
|
|
schemaString,
|
|
declaredSchemaStrings,
|
|
);
|
|
schema = parseSchema(expandedSchema.trim());
|
|
// Only preserve the original schema string if expansion actually changed it
|
|
if (expandedSchema !== schemaString) {
|
|
columnSchemaString = schemaString;
|
|
}
|
|
}
|
|
|
|
const config: PropertyConfig = {
|
|
name: header,
|
|
schema,
|
|
validator: createValidator(schema),
|
|
parser: (valueString: string) => parseValue(schema, valueString),
|
|
declaredTypeName,
|
|
schemaString: columnSchemaString,
|
|
};
|
|
|
|
if (schema.type === "reference") {
|
|
config.isReference = true;
|
|
config.referenceTableName = schema.tableName;
|
|
config.referenceIsArray = schema.isArray;
|
|
if (resolveReferences) {
|
|
config.parser = (valueString: string) => {
|
|
return parseReferenceValue(
|
|
schema,
|
|
valueString,
|
|
refBaseDir,
|
|
defaultPrimaryKey,
|
|
options.currentFilePath,
|
|
);
|
|
};
|
|
} else {
|
|
config.parser = (valueString: string) => {
|
|
return parseReferenceIds(schema, valueString);
|
|
};
|
|
}
|
|
} else if (hasNestedReferences(schema)) {
|
|
config.isReference = true;
|
|
if (resolveReferences) {
|
|
config.parser = (valueString: string) => {
|
|
return parseValueWithReferences(
|
|
valueString,
|
|
schema,
|
|
refBaseDir,
|
|
defaultPrimaryKey,
|
|
options.currentFilePath,
|
|
);
|
|
};
|
|
} else {
|
|
config.parser = (valueString: string) => {
|
|
return parseValueWithReferenceIds(valueString, schema);
|
|
};
|
|
}
|
|
}
|
|
|
|
return config;
|
|
},
|
|
);
|
|
|
|
// Add reverse reference property configs
|
|
for (const decl of reverseReferences) {
|
|
const config: PropertyConfig = {
|
|
name: decl.fieldName,
|
|
schema: decl.schema,
|
|
validator: createValidator(decl.schema),
|
|
parser: (_valueString: string) => {
|
|
// Reverse references are resolved after all rows are parsed
|
|
return null;
|
|
},
|
|
isReference: true,
|
|
isReverseReference: true,
|
|
referenceTableName: decl.tableName,
|
|
referenceIsArray: true,
|
|
reverseReferenceForeignKey: decl.foreignKey,
|
|
};
|
|
propertyConfigs.push(config);
|
|
}
|
|
|
|
// Collect all referenced tables (including nested references in tuples/arrays)
|
|
const references = new Set<string>();
|
|
function collectReferences(schema: Schema): void {
|
|
if (schema.type === "reference") {
|
|
references.add(schema.tableName);
|
|
} else if (schema.type === "reverseReference") {
|
|
references.add(schema.tableName);
|
|
} else if (schema.type === "tuple") {
|
|
schema.elements.forEach((el) => collectReferences(el.schema));
|
|
} else if (schema.type === "array") {
|
|
collectReferences(schema.element);
|
|
} else if (schema.type === "union") {
|
|
schema.members.forEach((m) => collectReferences(m));
|
|
}
|
|
}
|
|
propertyConfigs.forEach((config) => {
|
|
if (config.isReference && config.referenceTableName) {
|
|
references.add(config.referenceTableName);
|
|
}
|
|
collectReferences(config.schema);
|
|
});
|
|
|
|
const objects = dataRows.map((row: string[], rowIndex: number) => {
|
|
const obj: Record<string, unknown> = {};
|
|
propertyConfigs.forEach((config, colIndex) => {
|
|
// Skip reverse reference columns — they don't have CSV cell data
|
|
if (config.isReverseReference) {
|
|
return;
|
|
}
|
|
const rawValue = row[colIndex] ?? "";
|
|
try {
|
|
const parsed = config.parser(rawValue);
|
|
// Skip validation for reference fields (validation happens during reference resolution)
|
|
if (!config.isReference && !config.validator(parsed)) {
|
|
throw new Error(
|
|
`Validation failed for property "${config.name}" at row ${rowIndex + 3}: ${rawValue}`,
|
|
);
|
|
}
|
|
obj[config.name] = parsed;
|
|
} catch (error) {
|
|
if (error instanceof Error) {
|
|
throw new Error(
|
|
`Failed to parse property "${config.name}" at row ${rowIndex + 3}, column ${colIndex + 1}: ${error.message}`,
|
|
);
|
|
}
|
|
throw error;
|
|
}
|
|
});
|
|
return obj;
|
|
});
|
|
|
|
// Resolve reverse references after all rows are parsed
|
|
if (resolveReferences) {
|
|
for (const decl of reverseReferences) {
|
|
for (const obj of objects) {
|
|
const pkValue = obj[defaultPrimaryKey];
|
|
if (pkValue !== undefined) {
|
|
const resolved = resolveReverseReference(
|
|
decl.schema,
|
|
pkValue,
|
|
refBaseDir,
|
|
defaultPrimaryKey,
|
|
options.currentFilePath,
|
|
);
|
|
obj[decl.fieldName] =
|
|
decl.isOptional && resolved.length === 0 ? null : resolved;
|
|
} else {
|
|
obj[decl.fieldName] = decl.isOptional ? null : [];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const referenceFields: ReferenceFieldInfo[] = [];
|
|
if (!resolveReferences) {
|
|
for (const config of propertyConfigs) {
|
|
if (hasNestedReferences(config.schema)) {
|
|
referenceFields.push(
|
|
...collectReferenceFields(config.schema, config.name),
|
|
);
|
|
}
|
|
}
|
|
// Reverse reference fields are already included by collectReferenceFields
|
|
// above (which handles the reverseReference schema type), so no additional
|
|
// loop is needed here.
|
|
}
|
|
|
|
const result: CsvParseResult = {
|
|
data: objects,
|
|
propertyConfigs,
|
|
references,
|
|
referenceFields,
|
|
reverseReferences,
|
|
typeDeclarations,
|
|
};
|
|
|
|
if (emitTypes) {
|
|
result.typeDefinition = generateTypeDefinition(
|
|
options.resourceName || "",
|
|
propertyConfigs,
|
|
references,
|
|
options.currentFilePath,
|
|
typeDeclarations,
|
|
);
|
|
}
|
|
|
|
return result;
|
|
}
|