Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/plotnik-lib/src/bytecode/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
pub const MAGIC: [u8; 4] = *b"PTKQ";

/// Current bytecode format version.
pub const VERSION: u32 = 1;
/// v2: Removed explicit offsets (computed from counts), added regex section.
pub const VERSION: u32 = 2;

/// Section alignment in bytes.
pub const SECTION_ALIGN: usize = 64;
Expand Down
207 changes: 149 additions & 58 deletions crates/plotnik-lib/src/bytecode/header.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
//! Bytecode file header (64 bytes).
//!
//! v2 layout: Offsets are computed from counts + SECTION_ALIGN (64 bytes).
//! Section order: Header → StringBlob → RegexBlob → StringTable → RegexTable →
//! NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers → TypeNames →
//! Entrypoints → Transitions

use super::{MAGIC, VERSION};
use super::{MAGIC, SECTION_ALIGN, VERSION};

/// Header flags (bit field).
pub mod flags {
Expand All @@ -11,41 +16,43 @@ pub mod flags {

/// File header - first 64 bytes of the bytecode file.
///
/// Note: TypeMeta sub-section counts are stored in the TypeMetaHeader,
/// not in the main header. See type_meta.rs for details.
/// v2 layout (offsets computed from counts):
/// - 0-23: identity and sizes (magic, version, checksum, total_size, str_blob_size, regex_blob_size)
/// - 24-45: counts (11 × u16) — order matches section order
/// - 46-63: reserved
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C, align(64))]
pub struct Header {
// Bytes 0-23: Identity and sizes (6 × u32)
/// Magic bytes: b"PTKQ"
pub magic: [u8; 4],
/// Format version (currently 1)
/// Format version (currently 2)
pub version: u32,
/// CRC32 checksum of everything after the header
pub checksum: u32,
/// Total file size in bytes
pub total_size: u32,
/// Size of the string blob in bytes.
pub str_blob_size: u32,
/// Size of the regex blob in bytes.
pub regex_blob_size: u32,

// Section offsets (absolute byte offsets)
pub str_blob_offset: u32,
pub str_table_offset: u32,
pub node_types_offset: u32,
pub node_fields_offset: u32,
pub trivia_offset: u32,
pub type_meta_offset: u32,
pub entrypoints_offset: u32,
pub transitions_offset: u32,

// Element counts (type counts are in TypeMetaHeader at type_meta_offset)
// Bytes 24-45: Element counts (11 × u16) — order matches section order
pub str_table_count: u16,
pub regex_table_count: u16,
pub node_types_count: u16,
pub node_fields_count: u16,
pub trivia_count: u16,
pub type_defs_count: u16,
pub type_members_count: u16,
pub type_names_count: u16,
pub entrypoints_count: u16,
pub transitions_count: u16,
/// Header flags (see `flags` module for bit definitions).
pub flags: u16,
/// Padding to maintain 64-byte size.
pub(crate) _pad: u16,

// Bytes 46-63: Reserved
pub(crate) _reserved: [u8; 18],
}

const _: () = assert!(std::mem::size_of::<Header>() == 64);
Expand All @@ -57,52 +64,71 @@ impl Default for Header {
version: VERSION,
checksum: 0,
total_size: 0,
str_blob_offset: 0,
str_table_offset: 0,
node_types_offset: 0,
node_fields_offset: 0,
trivia_offset: 0,
type_meta_offset: 0,
entrypoints_offset: 0,
transitions_offset: 0,
str_blob_size: 0,
regex_blob_size: 0,
str_table_count: 0,
regex_table_count: 0,
node_types_count: 0,
node_fields_count: 0,
trivia_count: 0,
type_defs_count: 0,
type_members_count: 0,
type_names_count: 0,
entrypoints_count: 0,
transitions_count: 0,
flags: 0,
_pad: 0,
_reserved: [0; 18],
}
}
}

/// Computed section offsets derived from header counts.
///
/// Order: StringBlob → RegexBlob → StringTable → RegexTable → NodeTypes →
/// NodeFields → Trivia → TypeDefs → TypeMembers → TypeNames → Entrypoints → Transitions
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct SectionOffsets {
pub str_blob: u32,
pub regex_blob: u32,
pub str_table: u32,
pub regex_table: u32,
pub node_types: u32,
pub node_fields: u32,
pub trivia: u32,
pub type_defs: u32,
pub type_members: u32,
pub type_names: u32,
pub entrypoints: u32,
pub transitions: u32,
}

impl Header {
/// Decode header from 64 bytes.
pub fn from_bytes(bytes: &[u8]) -> Self {
assert!(bytes.len() >= 64, "header too short");

let mut reserved = [0u8; 18];
reserved.copy_from_slice(&bytes[46..64]);

Self {
magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
version: u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
checksum: u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]),
total_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
str_blob_offset: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
str_table_offset: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
node_types_offset: u32::from_le_bytes([bytes[24], bytes[25], bytes[26], bytes[27]]),
node_fields_offset: u32::from_le_bytes([bytes[28], bytes[29], bytes[30], bytes[31]]),
trivia_offset: u32::from_le_bytes([bytes[32], bytes[33], bytes[34], bytes[35]]),
type_meta_offset: u32::from_le_bytes([bytes[36], bytes[37], bytes[38], bytes[39]]),
entrypoints_offset: u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]),
transitions_offset: u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]),
str_table_count: u16::from_le_bytes([bytes[48], bytes[49]]),
node_types_count: u16::from_le_bytes([bytes[50], bytes[51]]),
node_fields_count: u16::from_le_bytes([bytes[52], bytes[53]]),
trivia_count: u16::from_le_bytes([bytes[54], bytes[55]]),
entrypoints_count: u16::from_le_bytes([bytes[56], bytes[57]]),
transitions_count: u16::from_le_bytes([bytes[58], bytes[59]]),
flags: u16::from_le_bytes([bytes[60], bytes[61]]),
_pad: u16::from_le_bytes([bytes[62], bytes[63]]),
str_blob_size: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
regex_blob_size: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
str_table_count: u16::from_le_bytes([bytes[24], bytes[25]]),
regex_table_count: u16::from_le_bytes([bytes[26], bytes[27]]),
node_types_count: u16::from_le_bytes([bytes[28], bytes[29]]),
node_fields_count: u16::from_le_bytes([bytes[30], bytes[31]]),
trivia_count: u16::from_le_bytes([bytes[32], bytes[33]]),
type_defs_count: u16::from_le_bytes([bytes[34], bytes[35]]),
type_members_count: u16::from_le_bytes([bytes[36], bytes[37]]),
type_names_count: u16::from_le_bytes([bytes[38], bytes[39]]),
entrypoints_count: u16::from_le_bytes([bytes[40], bytes[41]]),
transitions_count: u16::from_le_bytes([bytes[42], bytes[43]]),
flags: u16::from_le_bytes([bytes[44], bytes[45]]),
_reserved: reserved,
}
}

Expand All @@ -113,22 +139,20 @@ impl Header {
bytes[4..8].copy_from_slice(&self.version.to_le_bytes());
bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes());
bytes[12..16].copy_from_slice(&self.total_size.to_le_bytes());
bytes[16..20].copy_from_slice(&self.str_blob_offset.to_le_bytes());
bytes[20..24].copy_from_slice(&self.str_table_offset.to_le_bytes());
bytes[24..28].copy_from_slice(&self.node_types_offset.to_le_bytes());
bytes[28..32].copy_from_slice(&self.node_fields_offset.to_le_bytes());
bytes[32..36].copy_from_slice(&self.trivia_offset.to_le_bytes());
bytes[36..40].copy_from_slice(&self.type_meta_offset.to_le_bytes());
bytes[40..44].copy_from_slice(&self.entrypoints_offset.to_le_bytes());
bytes[44..48].copy_from_slice(&self.transitions_offset.to_le_bytes());
bytes[48..50].copy_from_slice(&self.str_table_count.to_le_bytes());
bytes[50..52].copy_from_slice(&self.node_types_count.to_le_bytes());
bytes[52..54].copy_from_slice(&self.node_fields_count.to_le_bytes());
bytes[54..56].copy_from_slice(&self.trivia_count.to_le_bytes());
bytes[56..58].copy_from_slice(&self.entrypoints_count.to_le_bytes());
bytes[58..60].copy_from_slice(&self.transitions_count.to_le_bytes());
bytes[60..62].copy_from_slice(&self.flags.to_le_bytes());
bytes[62..64].copy_from_slice(&self._pad.to_le_bytes());
bytes[16..20].copy_from_slice(&self.str_blob_size.to_le_bytes());
bytes[20..24].copy_from_slice(&self.regex_blob_size.to_le_bytes());
bytes[24..26].copy_from_slice(&self.str_table_count.to_le_bytes());
bytes[26..28].copy_from_slice(&self.regex_table_count.to_le_bytes());
bytes[28..30].copy_from_slice(&self.node_types_count.to_le_bytes());
bytes[30..32].copy_from_slice(&self.node_fields_count.to_le_bytes());
bytes[32..34].copy_from_slice(&self.trivia_count.to_le_bytes());
bytes[34..36].copy_from_slice(&self.type_defs_count.to_le_bytes());
bytes[36..38].copy_from_slice(&self.type_members_count.to_le_bytes());
bytes[38..40].copy_from_slice(&self.type_names_count.to_le_bytes());
bytes[40..42].copy_from_slice(&self.entrypoints_count.to_le_bytes());
bytes[42..44].copy_from_slice(&self.transitions_count.to_le_bytes());
bytes[44..46].copy_from_slice(&self.flags.to_le_bytes());
bytes[46..64].copy_from_slice(&self._reserved);
bytes
}

Expand All @@ -153,4 +177,71 @@ impl Header {
self.flags &= !flags::LINKED;
}
}

/// Compute section offsets from counts and blob sizes.
///
/// Section order (all 64-byte aligned):
/// Header → StringBlob → RegexBlob → StringTable → RegexTable →
/// NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers →
/// TypeNames → Entrypoints → Transitions
pub fn compute_offsets(&self) -> SectionOffsets {
let align = SECTION_ALIGN as u32;

// Blobs first (right after header)
let str_blob = align; // 64
let regex_blob = align_up(str_blob + self.str_blob_size, align);

// Tables after blobs
let str_table = align_up(regex_blob + self.regex_blob_size, align);
let str_table_size = (self.str_table_count as u32 + 1) * 4;

let regex_table = align_up(str_table + str_table_size, align);
let regex_table_size = (self.regex_table_count as u32 + 1) * 4;

// Symbol sections
let node_types = align_up(regex_table + regex_table_size, align);
let node_types_size = self.node_types_count as u32 * 4;

let node_fields = align_up(node_types + node_types_size, align);
let node_fields_size = self.node_fields_count as u32 * 4;

let trivia = align_up(node_fields + node_fields_size, align);
let trivia_size = self.trivia_count as u32 * 2;

// Type metadata
let type_defs = align_up(trivia + trivia_size, align);
let type_defs_size = self.type_defs_count as u32 * 4;

let type_members = align_up(type_defs + type_defs_size, align);
let type_members_size = self.type_members_count as u32 * 4;

let type_names = align_up(type_members + type_members_size, align);
let type_names_size = self.type_names_count as u32 * 4;

// Entry points and instructions
let entrypoints = align_up(type_names + type_names_size, align);
let entrypoints_size = self.entrypoints_count as u32 * 8;

let transitions = align_up(entrypoints + entrypoints_size, align);

SectionOffsets {
str_blob,
regex_blob,
str_table,
regex_table,
node_types,
node_fields,
trivia,
type_defs,
type_members,
type_names,
entrypoints,
transitions,
}
}
}

/// Round up to the next multiple of `align`.
fn align_up(value: u32, align: u32) -> u32 {
(value + align - 1) & !(align - 1)
}
73 changes: 64 additions & 9 deletions crates/plotnik-lib/src/bytecode/header_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,20 @@ fn header_roundtrip() {
version: VERSION,
checksum: 0x12345678,
total_size: 1024,
str_blob_offset: 64,
str_table_offset: 128,
node_types_offset: 192,
node_fields_offset: 256,
trivia_offset: 320,
type_meta_offset: 384,
entrypoints_offset: 448,
transitions_offset: 512,
str_blob_size: 100,
regex_blob_size: 256,
str_table_count: 10,
regex_table_count: 3,
node_types_count: 20,
node_fields_count: 5,
trivia_count: 2,
type_defs_count: 8,
type_members_count: 12,
type_names_count: 4,
entrypoints_count: 1,
transitions_count: 15,
..Default::default()
flags: 0,
_reserved: [0; 18],
};

let bytes = h.to_bytes();
Expand Down Expand Up @@ -69,3 +68,59 @@ fn header_flags_roundtrip() {
assert!(decoded.is_linked());
assert_eq!(decoded.flags, flags::LINKED);
}

#[test]
fn compute_offsets_empty() {
let h = Header::default();
let offsets = h.compute_offsets();

// New order: blobs first, then tables
// All sections 64-byte aligned. With 0 counts, each table still has 1 sentinel entry (4 bytes)
assert_eq!(offsets.str_blob, 64); // after header
assert_eq!(offsets.regex_blob, 64); // 64 + align(0) = 64
assert_eq!(offsets.str_table, 64); // 64 + align(0) = 64
assert_eq!(offsets.regex_table, 128); // 64 + align(4) = 128
assert_eq!(offsets.node_types, 192); // 128 + align(4) = 192
assert_eq!(offsets.node_fields, 192); // 192 + align(0) = 192
assert_eq!(offsets.trivia, 192);
assert_eq!(offsets.type_defs, 192);
assert_eq!(offsets.type_members, 192);
assert_eq!(offsets.type_names, 192);
assert_eq!(offsets.entrypoints, 192);
assert_eq!(offsets.transitions, 192);
}

#[test]
fn compute_offsets_with_data() {
let h = Header {
str_table_count: 5, // (5+1)*4 = 24 bytes
regex_table_count: 2, // (2+1)*4 = 12 bytes
node_types_count: 10, // 10*4 = 40 bytes
node_fields_count: 5, // 5*4 = 20 bytes
trivia_count: 3, // 3*2 = 6 bytes
type_defs_count: 8, // 8*4 = 32 bytes
type_members_count: 12, // 12*4 = 48 bytes
type_names_count: 4, // 4*4 = 16 bytes
entrypoints_count: 2, // 2*8 = 16 bytes
transitions_count: 20, // 20*8 = 160 bytes
str_blob_size: 100,
regex_blob_size: 128,
..Default::default()
};

let offsets = h.compute_offsets();

// New order: blobs first, then tables. All offsets 64-byte aligned.
assert_eq!(offsets.str_blob, 64); // header end
assert_eq!(offsets.regex_blob, 192); // 64 + 100 = 164 → 192
assert_eq!(offsets.str_table, 320); // 192 + 128 = 320 (aligned)
assert_eq!(offsets.regex_table, 384); // 320 + 24 = 344 → 384
assert_eq!(offsets.node_types, 448); // 384 + 12 = 396 → 448
assert_eq!(offsets.node_fields, 512); // 448 + 40 = 488 → 512
assert_eq!(offsets.trivia, 576); // 512 + 20 = 532 → 576
assert_eq!(offsets.type_defs, 640); // 576 + 6 = 582 → 640
assert_eq!(offsets.type_members, 704); // 640 + 32 = 672 → 704
assert_eq!(offsets.type_names, 768); // 704 + 48 = 752 → 768
assert_eq!(offsets.entrypoints, 832); // 768 + 16 = 784 → 832
assert_eq!(offsets.transitions, 896); // 832 + 16 = 848 → 896
}
Loading