feat(csv-loader): strip comments before parsing CSV
Pre-filter comment lines from the content before passing it to `csv-parse`. This prevents quote parsing errors when comment lines contain double quotes and simplifies the record filtering logic.
This commit is contained in:
parent
5a1627c6f1
commit
9a2db5edb6
|
|
@ -40,6 +40,24 @@ describe("parseCsv - basic parsing", () => {
|
||||||
expect(result.data[1]).toEqual({ id: 2, count: 3, price: 4.5 });
|
expect(result.data[1]).toEqual({ id: 2, count: 3, price: 4.5 });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should parse CSV with non-ASCII characters and comments", () => {
|
||||||
|
const csv = [
|
||||||
|
'# id: unique intent state ID (e.g. "仙人掌怪-boost")',
|
||||||
|
"id",
|
||||||
|
"string",
|
||||||
|
"仙人掌怪-boost",
|
||||||
|
"仙人掌怪-defend",
|
||||||
|
"仙人掌怪-attack",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const result = parseCsv(csv, { emitTypes: false });
|
||||||
|
|
||||||
|
expect(result.data).toHaveLength(3);
|
||||||
|
expect(result.data[0]).toEqual({ id: "仙人掌怪-boost" });
|
||||||
|
expect(result.data[1]).toEqual({ id: "仙人掌怪-defend" });
|
||||||
|
expect(result.data[2]).toEqual({ id: "仙人掌怪-attack" });
|
||||||
|
});
|
||||||
|
|
||||||
it("should parse CSV with string literal columns (unquoted in CSV)", () => {
|
it("should parse CSV with string literal columns (unquoted in CSV)", () => {
|
||||||
const csv = [
|
const csv = [
|
||||||
"name,status",
|
"name,status",
|
||||||
|
|
|
||||||
|
|
@ -789,34 +789,41 @@ export function parseCsv(
|
||||||
const refBaseDir = options.refBaseDir;
|
const refBaseDir = options.refBaseDir;
|
||||||
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
|
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
|
||||||
|
|
||||||
const records = parse(content, {
|
// Pre-strip comment lines from content before passing to csv-parse,
|
||||||
|
// to avoid quote parsing errors in comment lines containing double quotes.
|
||||||
|
const reverseReferences: ReverseReferenceDeclaration[] = [];
|
||||||
|
let filteredContent = content;
|
||||||
|
if (comment) {
|
||||||
|
const lines = content.split(/\r?\n/);
|
||||||
|
const nonCommentLines: string[] = [];
|
||||||
|
for (const line of lines) {
|
||||||
|
const trimmed = line.trim();
|
||||||
|
if (trimmed.startsWith(comment)) {
|
||||||
|
const decl = parseReverseReferenceDeclaration(trimmed, comment);
|
||||||
|
if (decl) {
|
||||||
|
reverseReferences.push(decl);
|
||||||
|
}
|
||||||
|
// Skip comment lines
|
||||||
|
} else {
|
||||||
|
nonCommentLines.push(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filteredContent = nonCommentLines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
const records = parse(filteredContent, {
|
||||||
delimiter,
|
delimiter,
|
||||||
quote,
|
quote,
|
||||||
escape,
|
escape,
|
||||||
bom,
|
bom,
|
||||||
// Don't let csv-parse skip comments; we need to parse them for reverse references.
|
|
||||||
// Comment lines are filtered out manually below using the configured comment character.
|
|
||||||
comment: undefined,
|
comment: undefined,
|
||||||
trim,
|
trim,
|
||||||
skip_empty_lines: true,
|
skip_empty_lines: true,
|
||||||
relax_column_count: true,
|
relax_column_count: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Filter out comment lines from all records, collecting reverse reference declarations
|
// Comment lines were already filtered out before parsing
|
||||||
const reverseReferences: ReverseReferenceDeclaration[] = [];
|
const filteredRecords = records;
|
||||||
const filteredRecords: string[][] = [];
|
|
||||||
for (const row of records) {
|
|
||||||
const firstCell = (row[0] ?? "").trim();
|
|
||||||
if (comment && firstCell.startsWith(comment)) {
|
|
||||||
const decl = parseReverseReferenceDeclaration(firstCell, comment);
|
|
||||||
if (decl) {
|
|
||||||
reverseReferences.push(decl);
|
|
||||||
}
|
|
||||||
// Skip comment lines (whether or not they're reverse ref declarations)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
filteredRecords.push(row);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (filteredRecords.length < 2) {
|
if (filteredRecords.length < 2) {
|
||||||
throw new Error("CSV must have at least 2 rows: headers and schemas");
|
throw new Error("CSV must have at least 2 rows: headers and schemas");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue