@@ -23,6 +23,7 @@ export type CSVDataFrame = DataFrame<Metadata>;
2323
2424const defaultChunkSize = 50 * 1024 ; // 50 KB, same as Papaparse default
2525const defaultMaxCachedBytes = 20 * 1024 * 1024 ; // 20 MB
26+ const paddingRows = 10 ; // fetch a bit before and after the requested range, to avoid cutting rows
2627
2728interface Params {
2829 url : string ;
@@ -365,6 +366,7 @@ export async function csvDataFrame({
365366 // TODO(SL): Update the average size of a row?
366367 // For now, we keep it constant, to provide stability - otherwise empty rows appear after the update
367368 // cache.averageRowBytes = getAverageRowBytes(cache);
369+ // eventTarget.dispatchEvent(new CustomEvent("resolve")); // to refresh the table
368370 } ) ;
369371
370372 // TODO(SL): evict old rows (or only cell contents?) if needed
@@ -468,8 +470,13 @@ function fetchRange({
468470} ) : Promise < void > {
469471 checkSignal ( signal ) ;
470472
471- let cursor = start ;
473+ const firstChunkOffset = Math . max (
474+ cache . serial . end , // don't fetch known rows again
475+ Math . floor ( start - paddingRows * cache . averageRowBytes ) // fetch a bit before, to ensure we get a complete first row
476+ ) ;
477+ let cursor = firstChunkOffset ;
472478 let isFirstStep = true ;
479+ const endCursor = Math . ceil ( end + paddingRows * cache . averageRowBytes ) ; // fetch a bit after, just in case the average is not accurate
473480
474481 return new Promise < void > ( ( resolve , reject ) => {
475482 Papa . parse < string [ ] > ( cache . url , {
@@ -481,15 +488,24 @@ function fetchRange({
481488 delimiter : cache . header . delimiter ,
482489 newline : cache . header . newline ,
483490 chunkSize : cache . chunkSize ,
484- firstChunkOffset : start , // custom option, only available in the modified Papaparse @severo_tests/papaparse
491+ firstChunkOffset, // custom option, only available in the modified Papaparse @severo_tests/papaparse
485492 step : ( { data, meta } , parser ) => {
486493 if ( signal ?. aborted ) {
487494 parser . abort ( ) ;
488495 return ;
489496 }
490497
491- const parsedRow = { start : cursor , end : start + meta . cursor , data } ;
492- cursor = start + meta . cursor ;
498+ const parsedRow = {
499+ start : cursor ,
500+ end : firstChunkOffset + meta . cursor ,
501+ data,
502+ } ;
503+ cursor = parsedRow . end ;
504+
505+ if ( isFirstStep ) {
506+ isFirstStep = false ;
507+ return ; // ignore the first row, because we cannot know if it's partial or complete
508+ }
493509
494510 if ( meta . delimiter !== cache . header . delimiter ) {
495511 reject (
@@ -507,12 +523,12 @@ function fetchRange({
507523 }
508524
509525 // add the row to the cache
510- if ( addParsedRowToCache ( { cache, parsedRow, isFirstStep } ) ) {
526+ if ( addParsedRowToCache ( { cache, parsedRow } ) ) {
511527 // send an event for the new row
512528 eventTarget . dispatchEvent ( new CustomEvent ( "resolve" ) ) ;
513529 }
514530
515- if ( cursor >= end ) {
531+ if ( cursor >= endCursor ) {
516532 // abort the parsing, we have enough rows for now
517533 parser . abort ( ) ;
518534 return ;
@@ -537,17 +553,10 @@ function isEmpty(data: string[]): boolean {
537553function addParsedRowToCache ( {
538554 cache,
539555 parsedRow,
540- isFirstStep,
541556} : {
542557 cache : Cache ;
543558 parsedRow : ParsedRow ;
544- isFirstStep : boolean ; // to handle the case where we start in the middle of a row
545559} ) : boolean {
546- if ( isFirstStep && parsedRow . data . length < cache . header . data . length ) {
547- // the first parsed row is partial, we ignore it, it must be part of the previous row
548- return false ;
549- }
550-
551560 // TODO(SL): optimize
552561 const inserted = ! isEmpty ( parsedRow . data ) ;
553562 const allRanges = [ cache . serial , ...cache . random ] ;
0 commit comments