Skip to content

Commit 03127aa

Browse files
authored
csplit: add benchmarks for line number and regex pattern splitting (#10927)
1 parent 54566dd commit 03127aa

File tree

4 files changed

+89
-1
lines changed

4 files changed

+89
-1
lines changed

.github/workflows/benchmarks.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ jobs:
5252
uu_uniq,
5353
uu_wc,
5454
uu_factor,
55-
uu_date
55+
uu_date,
56+
uu_csplit
5657
]
5758
steps:
5859
- uses: actions/checkout@v6

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/csplit/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,12 @@ fluent = { workspace = true }
2828
[[bin]]
2929
name = "csplit"
3030
path = "src/main.rs"
31+
32+
[dev-dependencies]
33+
divan = { workspace = true }
34+
tempfile = { workspace = true }
35+
uucore = { workspace = true, features = ["benchmark"] }
36+
37+
[[bench]]
38+
name = "csplit_bench"
39+
harness = false
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use tempfile::TempDir;
8+
use uu_csplit::uumain;
9+
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
10+
11+
/// Benchmark splitting by line number
12+
#[divan::bench]
13+
fn csplit_line_number(bencher: Bencher) {
14+
let data = text_data::generate_by_lines(100_000, 80);
15+
let file_path = setup_test_file(&data);
16+
17+
bencher
18+
.with_inputs(|| {
19+
let output_dir = TempDir::new().unwrap();
20+
let prefix = output_dir.path().join("xx");
21+
(output_dir, prefix.to_str().unwrap().to_string())
22+
})
23+
.bench_values(|(output_dir, prefix)| {
24+
black_box(run_util_function(
25+
uumain,
26+
&[
27+
"-f",
28+
&prefix,
29+
file_path.to_str().unwrap(),
30+
"10000",
31+
"50000",
32+
"90000",
33+
],
34+
));
35+
drop(output_dir);
36+
});
37+
}
38+
39+
/// Benchmark splitting by regex pattern
40+
#[divan::bench]
41+
fn csplit_regex_pattern(bencher: Bencher) {
42+
// Generate data with periodic marker lines that we can split on
43+
let mut data = Vec::new();
44+
for i in 0..100_000 {
45+
if i % 10_000 == 0 && i > 0 {
46+
data.extend_from_slice(format!("SECTION {i}\n").as_bytes());
47+
} else {
48+
data.extend_from_slice(format!("line {i} with some content to process\n").as_bytes());
49+
}
50+
}
51+
let file_path = setup_test_file(&data);
52+
53+
bencher
54+
.with_inputs(|| {
55+
let output_dir = TempDir::new().unwrap();
56+
let prefix = output_dir.path().join("xx");
57+
(output_dir, prefix.to_str().unwrap().to_string())
58+
})
59+
.bench_values(|(output_dir, prefix)| {
60+
black_box(run_util_function(
61+
uumain,
62+
&[
63+
"-f",
64+
&prefix,
65+
file_path.to_str().unwrap(),
66+
"/^SECTION/",
67+
"{*}",
68+
],
69+
));
70+
drop(output_dir);
71+
});
72+
}
73+
74+
fn main() {
75+
divan::main();
76+
}

0 commit comments

Comments
 (0)