lukefleed
diff --git a/‎Cargo.lock‎
Lines changed: 0 additions & 11 deletions b/‎Cargo.lock‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 4 additions & 10 deletions b/‎Cargo.toml‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎README.md‎
Lines changed: 18 additions & 1 deletion b/‎README.md‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎benches/fixed/benchmark_lock_free_access.rs‎
Lines changed: 30 additions & 11 deletions b/‎benches/fixed/benchmark_lock_free_access.rs‎
Lines changed: 30 additions & 11 deletions
diff --git a/‎benches/fixed/benchmark_locked_access.rs‎
Lines changed: 28 additions & 10 deletions b/‎benches/fixed/benchmark_locked_access.rs‎
Lines changed: 28 additions & 10 deletions
@@ -3,28 +3,21 @@ name = "compressed-intvec"
 version = "0.5.0"
 edition = "2021"
 authors = ["Luca Lombardo"]
-description = "A compressed integer vector with fast random access that stores values with instantaneous codes in a bitstream"
+description = "Space-efficient integer vectors for Rust. Offers a fixed-width implementation for O(1) mutable and atomic access, and a variable-width implementation that uses instantaneous codes and sampling for high compression ratios on non-uniform data."
 readme = "README.md"
 license = "Apache-2.0"
 repository = "https://github.com/lukefleed/compressed-intvec"
-keywords = [
-    "compression",
-    "vector",
-    "bitstream",
-    "data-compression",
-    "integer-encoding",
-]
+keywords = ["compression", "vector", "integer", "data-structures", "succinct"]
 categories = ["data-structures", "compression", "algorithms"]
 exclude = [".github/*", "images/*", "python/*"]
 
 [dependencies]
 atomic = "0.5.3"
 bytemuck = { version = "1.23.1", optional = true }
 common_traits = "0.12.0"
-dsi-bitstream = {version = "0.5.0", features = ["mem_dbg"]}
+dsi-bitstream = { version = "0.5.0", features = ["mem_dbg"] }
 mem_dbg = "0.3.0"
 num-traits = "0.2.19"
-num_cpus = "1.17.0"
 parking_lot = "0.12.4"
 rayon = { version = "1.10.0", optional = true }
 serde = { version = "1.0.219", features = ["derive"], optional = true }
@@ -117,6 +110,7 @@ path = "benches/fixed/bench_random_write.rs"
 default = ["parallel"]
 parallel = ["rayon"]
 serde = ["dep:serde"]
+arch-dependent-storable = []
 
 
 # --- Build Profiles ---
 
@@ -411,7 +411,24 @@ The resulting SVGs file will be saved in the `images` directory.
 [`sux::BitFieldVec`]: https://docs.rs/sux/latest/sux/bits/bit_field_vec/index.htmll
 [`succinct::IntVector`]: https://docs.rs/succinct/latest/succinct/int_vec/trait.IntVec.html
 
+## Optional Features
+
+### `arch-dependent-storable`: Storing `usize` and `isize`
+
+By default, [`variable::IntVec`] only supports integer types with a fixed size (e.g., `u32`, `i64`). This guarantees that compressed data is portable across different machine architectures (e.g., from a 64-bit server to a 32-bit embedded device).
+
+The `arch-dependent-storable` feature flag enables [`Storable`] implementations for `usize` and `isize`. When activated, you can create an `IntVec<usize>` directly.
+
+**Warning**: This feature breaks data portability. An `IntVec<usize>` created on a 64-bit system containing values larger than `u32::MAX` will cause a panic if deserialized or read on a 32-bit system. Only enable this feature if you can guarantee that your application and its data will only ever run on a single target architecture (e.g., `x86_64`).
+
+Enable it in your `Cargo.toml`:
+```toml
+compressed-intvec = { version = "0.5.0", features = ["arch-dependent-storable"] }
+```
+
+[`Storable`]: https://docs.rs/compressed-intvec/latest/compressed_intvec/variable/traits/trait.Storable.html
+[`variable::IntVec`]: https://docs.rs/compressed-intvec/latest/compressed_intvec/variable/struct.IntVec.html
+
 # TODO
 
 * [ ] Add support for [`epsilon-serde`](https://crates.io/crates/epserde)
-* [ ] Add SIMD feature
 
@@ -13,18 +13,19 @@ const OPS_PER_THREAD: usize = 100_000;
 const BIT_WIDTH: usize = 16; // Power of two for the lock-free path
 
 fn benchmark_lock_free_scaling(c: &mut Criterion) {
-    let mut thread_counts: Vec<usize> = (1..=num_cpus::get())
-        .filter(|n| n.is_power_of_two())
-        .collect();
-    if !thread_counts.contains(&num_cpus::get()) {
-        thread_counts.push(num_cpus::get());
+    // Determine the number of logical cores available.
+    let num_cores = std::thread::available_parallelism().unwrap().get();
+    let mut thread_counts: Vec<usize> = (1..=num_cores).filter(|n| n.is_power_of_two()).collect();
+    if !thread_counts.contains(&num_cores) {
+        thread_counts.push(num_cores);
     }
     thread_counts.sort_unstable();
     thread_counts.dedup();
 
     for &num_threads in &thread_counts {
         let total_ops = (OPS_PER_THREAD * num_threads) as u64;
-        let mut group = c.benchmark_group(format!("LockFreeScaling_Diffuse/{}Threads", num_threads));
+        let mut group =
+            c.benchmark_group(format!("LockFreeScaling_Diffuse/{}Threads", num_threads));
         group.throughput(Throughput::Elements(total_ops));
 
         // Pre-generate a single set of random indices for this benchmark configuration.
@@ -34,14 +35,22 @@ fn benchmark_lock_free_scaling(c: &mut Criterion) {
             .collect();
 
         // --- Setup Data Structures Once ---
-        let baseline_u16 = Arc::new((0..VECTOR_SIZE).map(|_| AtomicU16::new(0)).collect::<Vec<_>>());
+        let baseline_u16 = Arc::new(
+            (0..VECTOR_SIZE)
+                .map(|_| AtomicU16::new(0))
+                .collect::<Vec<_>>(),
+        );
         let afv_16bit = Arc::new(
             UAtomicFixedVec::<u64>::builder()
                 .bit_width(BitWidth::Explicit(BIT_WIDTH))
                 .build(&vec![0; VECTOR_SIZE])
                 .unwrap(),
         );
-        let sux_storage_16bit = Arc::new((0..(VECTOR_SIZE * BIT_WIDTH).div_ceil(64) + 2).map(|_| AtomicU64::new(0)).collect());
+        let sux_storage_16bit = Arc::new(
+            (0..(VECTOR_SIZE * BIT_WIDTH).div_ceil(64) + 2)
+                .map(|_| AtomicU64::new(0))
+                .collect(),
+        );
 
         // --- Benchmark Runs ---
         group.bench_function("Baseline_Vec<AtomicU16>/store", |b| {
@@ -78,7 +87,11 @@ fn run_store_on_atomic_u16(vec: &Arc<Vec<AtomicU16>>, num_threads: usize, indice
     });
 }
 
-fn run_store_on_atomic_fixed_vec(vec: &Arc<UAtomicFixedVec<u64>>, num_threads: usize, indices: &[usize]) {
+fn run_store_on_atomic_fixed_vec(
+    vec: &Arc<UAtomicFixedVec<u64>>,
+    num_threads: usize,
+    indices: &[usize],
+) {
     let barrier = Arc::new(Barrier::new(num_threads));
     let chunks: Vec<_> = indices.chunks(OPS_PER_THREAD).collect();
 
@@ -105,7 +118,13 @@ fn run_store_on_sux_vec(storage: &Arc<Vec<AtomicU64>>, num_threads: usize, indic
             let storage_clone = Arc::clone(storage);
             let barrier_clone = Arc::clone(&barrier);
             s.spawn(move || {
-                let sux_vec = unsafe { AtomicBitFieldVec::<u64, _>::from_raw_parts(storage_clone.as_slice(), BIT_WIDTH, VECTOR_SIZE) };
+                let sux_vec = unsafe {
+                    AtomicBitFieldVec::<u64, _>::from_raw_parts(
+                        storage_clone.as_slice(),
+                        BIT_WIDTH,
+                        VECTOR_SIZE,
+                    )
+                };
                 barrier_clone.wait();
                 for &index in *chunk {
                     unsafe {
@@ -125,4 +144,4 @@ criterion_group! {
         .measurement_time(Duration::from_secs(3));
     targets = benchmark_lock_free_scaling
 }
-criterion_main!(benches);
+criterion_main!(benches);
@@ -13,11 +13,11 @@ const OPS_PER_THREAD: usize = 100_000;
 const BIT_WIDTH: usize = 15; // Non-power of two to force the locked path
 
 fn benchmark_locked_scaling(c: &mut Criterion) {
-    let mut thread_counts: Vec<usize> = (1..=num_cpus::get())
-        .filter(|n| n.is_power_of_two())
-        .collect();
-    if !thread_counts.contains(&num_cpus::get()) {
-        thread_counts.push(num_cpus::get());
+    // Determine the number of logical cores available.
+    let num_cores = std::thread::available_parallelism().unwrap().get();
+    let mut thread_counts: Vec<usize> = (1..=num_cores).filter(|n| n.is_power_of_two()).collect();
+    if !thread_counts.contains(&num_cores) {
+        thread_counts.push(num_cores);
     }
     thread_counts.sort_unstable();
     thread_counts.dedup();
@@ -34,14 +34,22 @@ fn benchmark_locked_scaling(c: &mut Criterion) {
             .collect();
 
         // --- Setup Data Structures Once ---
-        let baseline_u16 = Arc::new((0..VECTOR_SIZE).map(|_| AtomicU16::new(0)).collect::<Vec<_>>());
+        let baseline_u16 = Arc::new(
+            (0..VECTOR_SIZE)
+                .map(|_| AtomicU16::new(0))
+                .collect::<Vec<_>>(),
+        );
         let afv_15bit = Arc::new(
             UAtomicFixedVec::<u64>::builder()
                 .bit_width(BitWidth::Explicit(BIT_WIDTH))
                 .build(&vec![0; VECTOR_SIZE])
                 .unwrap(),
         );
-        let sux_storage_15bit = Arc::new((0..(VECTOR_SIZE * BIT_WIDTH).div_ceil(64) + 2).map(|_| AtomicU64::new(0)).collect());
+        let sux_storage_15bit = Arc::new(
+            (0..(VECTOR_SIZE * BIT_WIDTH).div_ceil(64) + 2)
+                .map(|_| AtomicU64::new(0))
+                .collect(),
+        );
 
         // --- Benchmark Runs ---
         group.bench_function("Baseline_Vec<AtomicU16>/store", |b| {
@@ -78,7 +86,11 @@ fn run_store_on_atomic_u16(vec: &Arc<Vec<AtomicU16>>, num_threads: usize, indice
     });
 }
 
-fn run_store_on_atomic_fixed_vec(vec: &Arc<UAtomicFixedVec<u64>>, num_threads: usize, indices: &[usize]) {
+fn run_store_on_atomic_fixed_vec(
+    vec: &Arc<UAtomicFixedVec<u64>>,
+    num_threads: usize,
+    indices: &[usize],
+) {
     let barrier = Arc::new(Barrier::new(num_threads));
     let chunks: Vec<_> = indices.chunks(OPS_PER_THREAD).collect();
 
@@ -105,7 +117,13 @@ fn run_store_on_sux_vec(storage: &Arc<Vec<AtomicU64>>, num_threads: usize, indic
             let storage_clone = Arc::clone(storage);
             let barrier_clone = Arc::clone(&barrier);
             s.spawn(move || {
-                let sux_vec = unsafe { AtomicBitFieldVec::<u64, _>::from_raw_parts(storage_clone.as_slice(), BIT_WIDTH, VECTOR_SIZE) };
+                let sux_vec = unsafe {
+                    AtomicBitFieldVec::<u64, _>::from_raw_parts(
+                        storage_clone.as_slice(),
+                        BIT_WIDTH,
+                        VECTOR_SIZE,
+                    )
+                };
                 barrier_clone.wait();
                 for &index in *chunk {
                     unsafe {
@@ -125,4 +143,4 @@ criterion_group! {
         .measurement_time(Duration::from_secs(3));
     targets = benchmark_locked_scaling
 }
-criterion_main!(benches);
+criterion_main!(benches);