Skip to content

Commit e5187b1

Browse files
committed
uniformize code
1 parent 8f1f43c commit e5187b1

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

src/csv.ts

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ export async function csvDataFrame({
9595
// Fetch the first chunk (stop at 80% of the chunk size, to avoid doing another fetch, as we have no way to limit to one chunk in Papaparse)
9696
// TODO(SL): should we return the dataframe after parsing one row, and then keep parsing the chunk, but triggering updates?)
9797
const firstParsedRange: ParsedRange = {
98-
start: cursor,
98+
start: 0,
9999
end: cursor,
100100
validRows: [],
101101
};
@@ -108,30 +108,30 @@ export async function csvDataFrame({
108108
skipEmptyLines: false, // to be able to compute the byte ranges. Beware, it requires post processing (see result.rows.at(-1), for example, when fetching all the rows)
109109
dynamicTyping: false, // keep strings, and let the user convert them if needed
110110
step: ({ data, meta }, parser) => {
111-
const start = cursor;
112-
cursor = meta.cursor;
113-
const end = cursor;
111+
const parsedRow = {
112+
start: cursor,
113+
end: meta.cursor,
114+
data,
115+
};
116+
cursor = parsedRow.end;
117+
114118
if (
115-
cursor >= 0.8 * chunkSize ||
119+
cursor >= 0.8 * chunkSize || // stop at 80% of the chunk size, to avoid doing another fetch, as we have no way to limit to one chunk in Papaparse
116120
firstParsedRange.validRows.length >= 100
117121
) {
118122
// abort the parsing, we have enough rows for now
119123
parser.abort();
120124
return;
121125
}
122-
123-
const parsedRow = { start, end, data };
124-
// parsedRowIndex.set(start, parsedRow); // TODO(SL): remove?
125-
// for the statistics:
126-
cachedBytes += parsedRow.end - parsedRow.start;
127-
128-
firstParsedRange.end = end;
126+
// update the range end, even if the row is empty
127+
firstParsedRange.end = parsedRow.end;
129128

130129
if (isEmpty(data)) {
131130
// empty row, ignore
132131
return;
133132
}
134133
if (header === undefined) {
134+
// TODO(SL): should the header be included in the first range bytes?
135135
// first non-empty row: header
136136
header = {
137137
...parsedRow,
@@ -156,6 +156,8 @@ export async function csvDataFrame({
156156
}
157157
// valid row: add it to the range
158158
firstParsedRange.validRows.push(parsedRow);
159+
// for the statistics:
160+
cachedBytes += parsedRow.end - parsedRow.start;
159161
}
160162
// the errors field is ignored
161163
},
@@ -346,15 +348,6 @@ export async function csvDataFrame({
346348
return;
347349
}
348350

349-
console.debug({
350-
rowStart,
351-
rowEnd,
352-
estimatedStart,
353-
estimatedEnd,
354-
missingRanges,
355-
cache,
356-
});
357-
358351
// fetch each missing range and fill the cache
359352

360353
await Promise.all(

0 commit comments

Comments
 (0)