Skip to content

Commit bb4a2b7

Browse files
authored
fix: panic when lance.auto_cleanup.interval is set to 0 (#5571)
For now, when lance.auto_cleanup.interval is set to 0, dataset commits panic with a "division by zero" error: ``` thread 'dataset::cleanup::tests::test_auto_cleanup_interval_zero' panicked at rust/lance/src/dataset/cleanup.rs:672:12: attempt to calculate the remainder with a divisor of zero note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace ``` To fix the panic, we can interpret `lance.auto_cleanup.interval = 0` as triggering cleanup after each commit, which is equivalent to `lance.auto_cleanup.interval = 1`
1 parent f20e0a9 commit bb4a2b7

File tree

1 file changed

+44
-10
lines changed

1 file changed

+44
-10
lines changed

rust/lance/src/dataset/cleanup.rs

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ pub async fn auto_cleanup_hook(
669669
}
670670
};
671671

672-
if manifest.version % interval != 0 {
672+
if interval != 0 && manifest.version % interval != 0 {
673673
return Ok(None);
674674
}
675675
} else {
@@ -1345,6 +1345,15 @@ mod tests {
13451345
assert_eq!(removed.old_versions, 1);
13461346
}
13471347

1348+
// Helper function to check that the number of files is correct.
1349+
async fn check_num_files(fixture: &MockDatasetFixture, num_expected_files: usize) {
1350+
let file_count = fixture.count_files().await.unwrap();
1351+
1352+
assert_eq!(file_count.num_data_files, num_expected_files);
1353+
assert_eq!(file_count.num_manifest_files, num_expected_files);
1354+
assert_eq!(file_count.num_tx_files, num_expected_files);
1355+
}
1356+
13481357
#[tokio::test]
13491358
async fn auto_cleanup_old_versions() {
13501359
// Every n commits, all versions older than T should be deleted.
@@ -1371,15 +1380,6 @@ mod tests {
13711380
)
13721381
.unwrap();
13731382

1374-
// Helper function to check that the number of files is correct.
1375-
async fn check_num_files(fixture: &MockDatasetFixture, num_expected_files: usize) {
1376-
let file_count = fixture.count_files().await.unwrap();
1377-
1378-
assert_eq!(file_count.num_data_files, num_expected_files);
1379-
assert_eq!(file_count.num_manifest_files, num_expected_files);
1380-
assert_eq!(file_count.num_tx_files, num_expected_files);
1381-
}
1382-
13831383
// First, write many files within the "older_than" window. Check that
13841384
// no files are automatically cleaned up.
13851385
for num_expected_files in 2..2 * cleanup_interval {
@@ -1441,6 +1441,40 @@ mod tests {
14411441
}
14421442
}
14431443

1444+
#[tokio::test]
1445+
async fn test_auto_cleanup_interval_zero() {
1446+
let fixture = MockDatasetFixture::try_new().unwrap();
1447+
1448+
fixture.create_some_data().await.unwrap();
1449+
fixture.overwrite_some_data().await.unwrap();
1450+
fixture.overwrite_some_data().await.unwrap();
1451+
check_num_files(&fixture, 3).await;
1452+
1453+
let mut dataset = fixture.open().await.unwrap();
1454+
let mut config_updates = HashMap::new();
1455+
config_updates.insert(
1456+
"lance.auto_cleanup.interval".to_string(),
1457+
Some("0".to_string()),
1458+
);
1459+
config_updates.insert(
1460+
"lance.auto_cleanup.retain_versions".to_string(),
1461+
Some("1".to_string()),
1462+
);
1463+
dataset
1464+
.update_config(config_updates)
1465+
.replace()
1466+
.await
1467+
.unwrap();
1468+
1469+
fixture.overwrite_some_data().await.unwrap();
1470+
fixture.overwrite_some_data().await.unwrap();
1471+
// The last version before the new commit is retained, means we have 2 versions to assert
1472+
check_num_files(&fixture, 2).await;
1473+
1474+
fixture.overwrite_some_data().await.unwrap();
1475+
check_num_files(&fixture, 2).await;
1476+
}
1477+
14441478
#[tokio::test]
14451479
async fn cleanup_recent_verified_files() {
14461480
let fixture = MockDatasetFixture::try_new().unwrap();

0 commit comments

Comments
 (0)