Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions src/uu/sort/src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use crate::{
GeneralBigDecimalParseResult, GlobalSettings, Line, SortMode, numeric_str_cmp::NumInfo,
};

const ALLOC_CHUNK_SIZE: usize = 64 * 1024;
const MAX_TOKEN_BUFFER_BYTES: usize = 4 * 1024 * 1024;
const MAX_TOKEN_BUFFER_ELEMS: usize = MAX_TOKEN_BUFFER_BYTES / size_of::<Range<usize>>();

Expand Down Expand Up @@ -181,8 +180,7 @@ pub fn read<T: Read>(
mut buffer,
} = recycled_chunk;
if buffer.len() < carry_over.len() {
// Separate carry_over and copy them to avoid cost of 0 fill buffer
buffer.extend_from_slice(&carry_over[buffer.len()..]);
buffer.resize(carry_over.len() + 8 * 1024, 0);
}
buffer[..carry_over.len()].copy_from_slice(carry_over);
let (read, should_continue) = read_to_buffer(
Expand Down Expand Up @@ -254,6 +252,9 @@ fn parse_lines<'a>(
assert!(line_data.parsed_floats.is_empty());
assert!(line_data.line_num_floats.is_empty());
token_buffer.clear();
if token_buffer.capacity() > MAX_TOKEN_BUFFER_ELEMS {
token_buffer.shrink_to(MAX_TOKEN_BUFFER_ELEMS);
}
const SMALL_CHUNK_BYTES: usize = 64 * 1024;
let mut estimated = (*line_count_hint).max(1);
let mut exact_line_count = None;
Expand All @@ -266,8 +267,8 @@ fn parse_lines<'a>(
exact_line_count = Some(count);
estimated = count;
} else if estimated == 1 {
const LINE_LEN_HINT: usize = 128;
estimated = (read.len() / LINE_LEN_HINT).clamp(1, 1024);
const LINE_LEN_HINT: usize = 32;
estimated = (read.len() / LINE_LEN_HINT).max(1);
}
lines.reserve(estimated);
if settings.precomputed.selections_per_line > 0 {
Expand Down Expand Up @@ -348,7 +349,12 @@ fn read_to_buffer<T: Read>(
if max_buffer_size > buffer.len() {
// we can grow the buffer
let prev_len = buffer.len();
buffer.resize(prev_len + ALLOC_CHUNK_SIZE, 0);
let target = if buffer.len() < max_buffer_size / 2 {
buffer.len().saturating_mul(2)
} else {
max_buffer_size
};
buffer.resize(target.min(max_buffer_size), 0);
read_target = &mut buffer[prev_len..];
continue;
}
Expand All @@ -368,7 +374,8 @@ fn read_to_buffer<T: Read>(

// We need to read more lines
let len = buffer.len();
buffer.resize(len + ALLOC_CHUNK_SIZE, 0);
let grow_by = (len / 2).max(1024 * 1024);
buffer.resize(len + grow_by, 0);
read_target = &mut buffer[len..];
} else {
// This file has been fully read.
Expand Down
Loading
Loading