diff --git a/crates/plotnik-cli/src/commands/dump.rs b/crates/plotnik-cli/src/commands/dump.rs index 59b28d8d..558eeee9 100644 --- a/crates/plotnik-cli/src/commands/dump.rs +++ b/crates/plotnik-cli/src/commands/dump.rs @@ -82,7 +82,7 @@ pub fn run(args: DumpArgs) { query.emit().expect("bytecode emission failed") }; - let module = Module::from_bytes(bytecode).expect("module loading failed"); + let module = Module::load(&bytecode).expect("module loading failed"); let colors = Colors::new(args.color); print!("{}", dump(&module, colors)); } diff --git a/crates/plotnik-cli/src/commands/infer.rs b/crates/plotnik-cli/src/commands/infer.rs index c8e5aaf6..b78b92d0 100644 --- a/crates/plotnik-cli/src/commands/infer.rs +++ b/crates/plotnik-cli/src/commands/infer.rs @@ -96,7 +96,7 @@ pub fn run(args: InferArgs) { } query.emit().expect("bytecode emission failed") }; - let module = Module::from_bytes(bytecode).expect("module loading failed"); + let module = Module::load(&bytecode).expect("module loading failed"); // Emit TypeScript types let void_type = match args.void_type.as_deref() { diff --git a/crates/plotnik-cli/src/commands/run_common.rs b/crates/plotnik-cli/src/commands/run_common.rs index 77333813..5cc80da3 100644 --- a/crates/plotnik-cli/src/commands/run_common.rs +++ b/crates/plotnik-cli/src/commands/run_common.rs @@ -194,7 +194,7 @@ pub fn prepare_query(input: QueryInput) -> PreparedQuery { } let bytecode = emit_linked(&query).expect("emit failed"); - let module = Module::from_bytes(bytecode).expect("module load failed"); + let module = Module::load(&bytecode).expect("module load failed"); let entrypoint = resolve_entrypoint(&module, input.entry); let tree = lang.parse(&source_code); diff --git a/crates/plotnik-lib/src/bytecode/aligned_vec.rs b/crates/plotnik-lib/src/bytecode/aligned_vec.rs new file mode 100644 index 00000000..ca7aa828 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/aligned_vec.rs @@ -0,0 +1,108 @@ +//! 64-byte aligned storage for bytecode. +//! +//! Bytecode sections are 64-byte aligned internally. For this alignment to be +//! meaningful at runtime, the buffer itself must start at a 64-byte boundary. +//! Standard `Vec` provides no alignment guarantees for `u8`. + +use std::ops::Deref; + +/// Alignment for bytecode buffers (matches `SECTION_ALIGN`). +pub const ALIGN: usize = 64; + +/// 64-byte aligned block for bytecode storage. +#[repr(C, align(64))] +#[derive(Clone, Copy)] +struct Block([u8; 64]); + +/// Immutable 64-byte aligned byte storage. +/// +/// Uses `Vec` internally — Vec guarantees element alignment, +/// so the data starts at a 64-byte boundary. No custom allocator needed. +pub struct AlignedVec { + blocks: Vec, + len: usize, +} + +impl AlignedVec { + /// Copy bytes into aligned storage. + pub fn copy_from_slice(bytes: &[u8]) -> Self { + if bytes.is_empty() { + return Self { + blocks: Vec::new(), + len: 0, + }; + } + + let num_blocks = bytes.len().div_ceil(64); + let mut blocks = vec![Block([0; 64]); num_blocks]; + + // Copy block by block to stay safe + for (i, chunk) in bytes.chunks(64).enumerate() { + blocks[i].0[..chunk.len()].copy_from_slice(chunk); + } + + Self { + blocks, + len: bytes.len(), + } + } + + /// Read a file into aligned storage. + pub fn from_file(path: impl AsRef) -> std::io::Result { + let bytes = std::fs::read(path)?; + Ok(Self::copy_from_slice(&bytes)) + } + + /// Number of bytes stored. + pub fn len(&self) -> usize { + self.len + } + + /// Check if empty. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// View as byte slice. + pub fn as_slice(&self) -> &[u8] { + if self.blocks.is_empty() { + return &[]; + } + if self.len > self.blocks.len() * 64 { + panic!( + "AlignedVec invariant violated: len {} exceeds capacity {}", + self.len, + self.blocks.len() * 64 + ); + } + // SAFETY: Block is repr(C) with only [u8; 64], so pointer cast is valid. + // We only expose `len` bytes, which were initialized in copy_from_slice. + unsafe { std::slice::from_raw_parts(self.blocks.as_ptr() as *const u8, self.len) } + } +} + +impl Deref for AlignedVec { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + self.as_slice() + } +} + +impl Clone for AlignedVec { + fn clone(&self) -> Self { + Self { + blocks: self.blocks.clone(), + len: self.len, + } + } +} + +impl std::fmt::Debug for AlignedVec { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AlignedVec") + .field("len", &self.len) + .field("aligned", &(self.blocks.as_ptr() as usize).is_multiple_of(ALIGN)) + .finish() + } +} diff --git a/crates/plotnik-lib/src/bytecode/aligned_vec_tests.rs b/crates/plotnik-lib/src/bytecode/aligned_vec_tests.rs new file mode 100644 index 00000000..dc7650d7 --- /dev/null +++ b/crates/plotnik-lib/src/bytecode/aligned_vec_tests.rs @@ -0,0 +1,76 @@ +use super::aligned_vec::{AlignedVec, ALIGN}; + +fn is_aligned(ptr: *const u8) -> bool { + (ptr as usize).is_multiple_of(ALIGN) +} + +#[test] +fn alignment_guarantee() { + let data: Vec = (0..100).collect(); + let vec = AlignedVec::copy_from_slice(&data); + assert!(is_aligned(vec.as_ptr())); +} + +#[test] +fn copy_from_slice() { + let data = [1u8, 2, 3, 4, 5]; + let vec = AlignedVec::copy_from_slice(&data); + + assert!(is_aligned(vec.as_ptr())); + assert_eq!(&*vec, &data); +} + +#[test] +fn empty_slice() { + let vec = AlignedVec::copy_from_slice(&[]); + assert!(vec.is_empty()); + assert_eq!(vec.len(), 0); + assert_eq!(vec.as_slice(), &[] as &[u8]); +} + +#[test] +fn clone_preserves_alignment() { + let data: Vec = (0..100).collect(); + let vec = AlignedVec::copy_from_slice(&data); + let cloned = vec.clone(); + + assert!(is_aligned(cloned.as_ptr())); + assert_eq!(&*cloned, &*vec); +} + +#[test] +fn deref_to_slice() { + let vec = AlignedVec::copy_from_slice(&[10, 20, 30]); + + let slice: &[u8] = &vec; + assert_eq!(slice, &[10, 20, 30]); + assert_eq!(vec[0], 10); + assert_eq!(vec[2], 30); +} + +#[test] +fn large_data() { + let data: Vec = (0..10_000).map(|i| (i % 256) as u8).collect(); + let vec = AlignedVec::copy_from_slice(&data); + + assert!(is_aligned(vec.as_ptr())); + assert_eq!(&*vec, &data[..]); +} + +#[test] +fn partial_block() { + let data: Vec = (0..37).collect(); + let vec = AlignedVec::copy_from_slice(&data); + + assert_eq!(vec.len(), 37); + assert_eq!(&*vec, &data[..]); +} + +#[test] +fn exact_block_boundary() { + let data: Vec = (0..128).map(|i| i as u8).collect(); + let vec = AlignedVec::copy_from_slice(&data); + + assert_eq!(vec.len(), 128); + assert_eq!(&*vec, &data[..]); +} diff --git a/crates/plotnik-lib/src/bytecode/mod.rs b/crates/plotnik-lib/src/bytecode/mod.rs index 1d989e4c..350e8473 100644 --- a/crates/plotnik-lib/src/bytecode/mod.rs +++ b/crates/plotnik-lib/src/bytecode/mod.rs @@ -2,6 +2,7 @@ //! //! Implements the binary format specified in `docs/binary-format/`. +mod aligned_vec; mod constants; mod dump; mod effects; @@ -16,6 +17,8 @@ mod nav; mod sections; mod type_meta; +pub use aligned_vec::AlignedVec; + pub use constants::{ MAGIC, MAX_MATCH_PAYLOAD_SLOTS, MAX_PRE_EFFECTS, SECTION_ALIGN, STEP_SIZE, VERSION, }; @@ -56,6 +59,8 @@ pub use ir::{ TrampolineIR, }; +#[cfg(test)] +mod aligned_vec_tests; #[cfg(test)] mod effects_tests; #[cfg(test)] diff --git a/crates/plotnik-lib/src/bytecode/module.rs b/crates/plotnik-lib/src/bytecode/module.rs index ebfc3d3f..9f404035 100644 --- a/crates/plotnik-lib/src/bytecode/module.rs +++ b/crates/plotnik-lib/src/bytecode/module.rs @@ -7,6 +7,7 @@ use std::io; use std::ops::Deref; use std::path::Path; +use super::aligned_vec::AlignedVec; use super::header::{Header, SectionOffsets}; use super::ids::{StringId, TypeId}; use super::instructions::{Call, Match, Opcode, Return, Trampoline}; @@ -31,28 +32,69 @@ fn read_u32_le(bytes: &[u8], offset: usize) -> u32 { ]) } -/// Storage for bytecode bytes. -#[derive(Debug)] -pub struct ByteStorage(Vec); +/// Storage for bytecode bytes with guaranteed 64-byte alignment. +/// +/// All bytecode must be 64-byte aligned for DFA deserialization and cache +/// efficiency. This enum ensures alignment through two paths: +/// - `Static`: Pre-aligned via `include_query_aligned!` macro +/// - `Aligned`: Allocated with 64-byte alignment via `AlignedVec` +pub enum ByteStorage { + /// Static bytes from `include_query_aligned!` (zero-copy, pre-aligned). + Static(&'static [u8]), + /// Owned bytes with guaranteed 64-byte alignment. + Aligned(AlignedVec), +} impl Deref for ByteStorage { type Target = [u8]; fn deref(&self) -> &Self::Target { - &self.0 + match self { + ByteStorage::Static(s) => s, + ByteStorage::Aligned(v) => v, + } + } +} + +impl std::fmt::Debug for ByteStorage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ByteStorage::Static(s) => f.debug_tuple("Static").field(&s.len()).finish(), + ByteStorage::Aligned(v) => f.debug_tuple("Aligned").field(&v.len()).finish(), + } } } impl ByteStorage { - /// Create from owned bytes. - pub fn from_vec(bytes: Vec) -> Self { - Self(bytes) + /// Create from static bytes (zero-copy). + /// + /// The bytes must be 64-byte aligned. Use `include_query_aligned!` macro. + /// + /// # Panics + /// Panics if bytes are not 64-byte aligned. + pub fn from_static(bytes: &'static [u8]) -> Self { + assert!( + (bytes.as_ptr() as usize).is_multiple_of(64), + "static bytes must be 64-byte aligned; use include_query_aligned! macro" + ); + Self::Static(bytes) + } + + /// Create from an aligned vector (from compiler or file read). + pub fn from_aligned(vec: AlignedVec) -> Self { + Self::Aligned(vec) + } + + /// Create by copying bytes into aligned storage. + /// + /// Use this when receiving bytes from unknown sources (e.g., network). + pub fn copy_from_slice(bytes: &[u8]) -> Self { + Self::Aligned(AlignedVec::copy_from_slice(bytes)) } - /// Read a file into memory. + /// Read a file into aligned storage. pub fn from_file(path: impl AsRef) -> io::Result { - let bytes = std::fs::read(path)?; - Ok(Self(bytes)) + Ok(Self::Aligned(AlignedVec::from_file(path)?)) } } @@ -118,15 +160,46 @@ pub struct Module { } impl Module { - /// Load a module from owned bytes. - pub fn from_bytes(bytes: Vec) -> Result { - Self::from_storage(ByteStorage::from_vec(bytes)) + /// Load a module from an aligned vector (compiler output). + /// + /// This is the primary constructor for bytecode produced by the compiler. + pub fn from_aligned(vec: AlignedVec) -> Result { + Self::from_storage(ByteStorage::from_aligned(vec)) + } + + /// Load a module from static bytes (zero-copy). + /// + /// Use with `include_query_aligned!` to embed aligned bytecode: + /// ```ignore + /// use plotnik_lib::include_query_aligned; + /// + /// let module = Module::from_static(include_query_aligned!("query.ptk.bin"))?; + /// ``` + /// + /// # Panics + /// Panics if bytes are not 64-byte aligned. + pub fn from_static(bytes: &'static [u8]) -> Result { + Self::from_storage(ByteStorage::from_static(bytes)) } /// Load a module from a file path. + /// + /// Reads the file into 64-byte aligned storage. pub fn from_path(path: impl AsRef) -> Result { - let storage = ByteStorage::from_file(&path)?; - Self::from_storage(storage) + Self::from_storage(ByteStorage::from_file(&path)?) + } + + /// Load a module from arbitrary bytes (copies into aligned storage). + /// + /// Use this for bytes from unknown sources (network, etc.). Always copies. + pub fn load(bytes: &[u8]) -> Result { + Self::from_storage(ByteStorage::copy_from_slice(bytes)) + } + + /// Load a module from owned bytes (copies into aligned storage). + #[deprecated(since = "0.1.0", note = "use `Module::from_aligned` for AlignedVec or `Module::load` for copying")] + pub fn from_bytes(bytes: Vec) -> Result { + Self::load(&bytes) } /// Load a module from storage. diff --git a/crates/plotnik-lib/src/bytecode/module_tests.rs b/crates/plotnik-lib/src/bytecode/module_tests.rs index aa280968..56efd4b0 100644 --- a/crates/plotnik-lib/src/bytecode/module_tests.rs +++ b/crates/plotnik-lib/src/bytecode/module_tests.rs @@ -10,7 +10,7 @@ fn module_from_bytes_valid() { let input = "Test = (identifier) @id"; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); assert!(module.header().validate_magic()); assert!(module.header().validate_version()); @@ -23,7 +23,7 @@ fn module_from_bytes_too_small() { let bytes = Query::expect_valid_linked_bytes(input); let truncated = bytes[..32].to_vec(); - let err = Module::from_bytes(truncated).unwrap_err(); + let err = Module::load(&truncated).unwrap_err(); assert!(matches!(err, ModuleError::FileTooSmall(32))); } @@ -34,7 +34,7 @@ fn module_from_bytes_invalid_magic() { let mut bytes = Query::expect_valid_linked_bytes(input); bytes[0] = b'X'; // Corrupt magic - let err = Module::from_bytes(bytes).unwrap_err(); + let err = Module::load(&bytes).unwrap_err(); assert!(matches!(err, ModuleError::InvalidMagic)); } @@ -45,7 +45,7 @@ fn module_from_bytes_wrong_version() { let mut bytes = Query::expect_valid_linked_bytes(input); bytes[4..8].copy_from_slice(&999u32.to_le_bytes()); // Wrong version - let err = Module::from_bytes(bytes).unwrap_err(); + let err = Module::load(&bytes).unwrap_err(); assert!(matches!(err, ModuleError::UnsupportedVersion(999))); } @@ -57,7 +57,7 @@ fn module_from_bytes_size_mismatch() { let actual_size = bytes.len() as u32; bytes[12..16].copy_from_slice(&(actual_size + 100).to_le_bytes()); // Wrong total_size - let err = Module::from_bytes(bytes).unwrap_err(); + let err = Module::load(&bytes).unwrap_err(); assert!(matches!( err, ModuleError::SizeMismatch { @@ -72,7 +72,7 @@ fn module_strings_view() { let input = "Test = (identifier) @id"; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); let strings = module.strings(); // String 0 is the easter egg (accessed via raw index, not StringId) @@ -86,7 +86,7 @@ fn module_node_types_view() { let input = "Test = (identifier) @id"; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); let node_types = module.node_types(); assert!(!node_types.is_empty()); @@ -103,7 +103,7 @@ fn module_node_fields_view() { let input = "Test = (function_declaration name: (identifier) @name)"; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); let fields = module.node_fields(); assert!(!fields.is_empty()); @@ -124,7 +124,7 @@ fn module_types_view() { "#}; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); let types = module.types(); // Should have custom types (struct with fields) @@ -140,7 +140,7 @@ fn module_entrypoints_view() { "#}; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); let entrypoints = module.entrypoints(); assert_eq!(entrypoints.len(), 2); @@ -159,7 +159,7 @@ fn module_decode_step() { let input = "Test = (identifier) @id"; let bytes = Query::expect_valid_linked_bytes(input); - let module = Module::from_bytes(bytes).unwrap(); + let module = Module::load(&bytes).unwrap(); let instr = module.decode_step(0); assert!(matches!(instr, crate::bytecode::Instruction::Match(_))); @@ -192,13 +192,38 @@ fn module_from_path_mmap() { } #[test] -fn byte_storage_deref() { +fn byte_storage_copy_from_slice() { use crate::bytecode::ByteStorage; - let data = vec![1, 2, 3, 4, 5]; - let storage = ByteStorage::from_vec(data.clone()); + let data = [1u8, 2, 3, 4, 5]; + let storage = ByteStorage::copy_from_slice(&data); assert_eq!(&*storage, &data[..]); assert_eq!(storage.len(), 5); assert_eq!(storage[2], 3); } + +#[test] +fn byte_storage_from_aligned() { + use crate::bytecode::{AlignedVec, ByteStorage}; + + let vec = AlignedVec::copy_from_slice(&[1, 2, 3, 4, 5]); + let storage = ByteStorage::from_aligned(vec); + + assert_eq!(&*storage, &[1, 2, 3, 4, 5]); + assert_eq!(storage.len(), 5); +} + +#[test] +fn module_load() { + let input = "Test = (identifier) @id"; + + let bytes = Query::expect_valid_linked_bytes(input); + let module = Module::load(&bytes).unwrap(); + + assert!(module.header().validate_magic()); + assert!(module.header().validate_version()); + + let strings = module.strings(); + assert_eq!(strings.get_by_index(0), "Beauty will save the world"); +} diff --git a/crates/plotnik-lib/src/engine/engine_tests.rs b/crates/plotnik-lib/src/engine/engine_tests.rs index 250bafd2..26f6ffd3 100644 --- a/crates/plotnik-lib/src/engine/engine_tests.rs +++ b/crates/plotnik-lib/src/engine/engine_tests.rs @@ -29,7 +29,7 @@ fn execute_with_entry(query: &str, source: &str, entry: Option<&str>) -> String assert!(query_obj.is_valid(), "query should be valid"); let bytecode = emit_linked(&query_obj).expect("emit failed"); - let module = Module::from_bytes(bytecode).expect("decode failed"); + let module = Module::load(&bytecode).expect("decode failed"); let tree = lang.parse(source); let trivia = build_trivia_types(&module); diff --git a/crates/plotnik-lib/src/engine/verify_tests.rs b/crates/plotnik-lib/src/engine/verify_tests.rs index 97567060..1f18a710 100644 --- a/crates/plotnik-lib/src/engine/verify_tests.rs +++ b/crates/plotnik-lib/src/engine/verify_tests.rs @@ -18,7 +18,7 @@ fn build_module(query: &str) -> (Module, TypeId) { .link(&lang); assert!(query_obj.is_valid(), "query should be valid"); let bytecode = emit_linked(&query_obj).expect("emit failed"); - let module = Module::from_bytes(bytecode).expect("decode failed"); + let module = Module::load(&bytecode).expect("decode failed"); let declared_type = module.entrypoints().get(0).result_type; (module, declared_type) } diff --git a/crates/plotnik-lib/src/lib.rs b/crates/plotnik-lib/src/lib.rs index daf8a89f..8ce6b49e 100644 --- a/crates/plotnik-lib/src/lib.rs +++ b/crates/plotnik-lib/src/lib.rs @@ -59,3 +59,27 @@ pub enum Error { /// Result type for query operations. pub type Result = std::result::Result; + +/// Embed bytecode with 64-byte alignment (zero-copy loading). +/// +/// Use this instead of `include_bytes!` to ensure the embedded bytecode +/// is properly aligned for DFA deserialization and cache efficiency. +/// +/// # Example +/// +/// ```ignore +/// use plotnik_lib::{include_query_aligned, bytecode::Module}; +/// +/// let module = Module::from_static(include_query_aligned!("query.ptk.bin"))?; +/// ``` +#[macro_export] +macro_rules! include_query_aligned { + ($path:expr) => {{ + #[repr(C, align(64))] + struct Aligned([u8; N]); + + const BYTES: &[u8] = include_bytes!($path); + static ALIGNED: Aligned<{ BYTES.len() }> = Aligned(*BYTES); + ALIGNED.0.as_slice() + }}; +} diff --git a/crates/plotnik-lib/src/query/dump.rs b/crates/plotnik-lib/src/query/dump.rs index ed26222c..99856713 100644 --- a/crates/plotnik-lib/src/query/dump.rs +++ b/crates/plotnik-lib/src/query/dump.rs @@ -39,7 +39,7 @@ mod test_helpers { pub fn emit_typescript(&self) -> String { let bytecode = self.emit().expect("bytecode emission should succeed"); - let module = crate::bytecode::Module::from_bytes(bytecode) + let module = crate::bytecode::Module::load(&bytecode) .expect("module loading should succeed"); crate::typegen::typescript::emit(&module) } diff --git a/crates/plotnik-lib/src/query/query_tests.rs b/crates/plotnik-lib/src/query/query_tests.rs index 498e3767..82e74b55 100644 --- a/crates/plotnik-lib/src/query/query_tests.rs +++ b/crates/plotnik-lib/src/query/query_tests.rs @@ -103,7 +103,7 @@ impl QueryAnalyzed { // Emit to bytecode and then emit TypeScript from the bytecode module let bytecode = query.emit().expect("bytecode emission should succeed"); - let module = Module::from_bytes(bytecode).expect("module loading should succeed"); + let module = Module::load(&bytecode).expect("module loading should succeed"); crate::typegen::typescript::emit(&module) } @@ -111,7 +111,7 @@ impl QueryAnalyzed { pub fn expect_valid_bytecode(src: &str) -> String { let query = Self::parse_and_validate(src); let bytecode = query.emit().expect("bytecode emission should succeed"); - let module = Module::from_bytes(bytecode).expect("module loading should succeed"); + let module = Module::load(&bytecode).expect("module loading should succeed"); crate::bytecode::dump(&module, crate::Colors::OFF) } @@ -125,7 +125,7 @@ impl QueryAnalyzed { ); } let bytecode = query.emit().expect("bytecode emission should succeed"); - let module = Module::from_bytes(bytecode).expect("module loading should succeed"); + let module = Module::load(&bytecode).expect("module loading should succeed"); crate::bytecode::dump(&module, crate::Colors::OFF) } diff --git a/crates/plotnik-lib/src/typegen/mod.rs b/crates/plotnik-lib/src/typegen/mod.rs index 5c94f6cf..8ec1ce3d 100644 --- a/crates/plotnik-lib/src/typegen/mod.rs +++ b/crates/plotnik-lib/src/typegen/mod.rs @@ -9,7 +9,7 @@ //! use plotnik_lib::typegen::typescript; //! use plotnik_lib::bytecode::Module; //! -//! let module = Module::from_bytes(bytecode)?; +//! let module = Module::load(&bytecode)?; //! let output = typescript::emit(&module); //! ```