plotnik-lang · zharinov · Jan 14, 2026 · Jan 14, 2026
diff --git a/crates/plotnik-lib/src/bytecode/constants.rs b/crates/plotnik-lib/src/bytecode/constants.rs
@@ -4,7 +4,8 @@
 pub const MAGIC: [u8; 4] = *b"PTKQ";
 
 /// Current bytecode format version.
-pub const VERSION: u32 = 1;
+/// v2: Removed explicit offsets (computed from counts), added regex section.
+pub const VERSION: u32 = 2;
 
 /// Section alignment in bytes.
 pub const SECTION_ALIGN: usize = 64;

diff --git a/crates/plotnik-lib/src/bytecode/header.rs b/crates/plotnik-lib/src/bytecode/header.rs
@@ -1,6 +1,11 @@
 //! Bytecode file header (64 bytes).
+//!
+//! v2 layout: Offsets are computed from counts + SECTION_ALIGN (64 bytes).
+//! Section order: Header → StringBlob → RegexBlob → StringTable → RegexTable →
+//! NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers → TypeNames →
+//! Entrypoints → Transitions
 
-use super::{MAGIC, VERSION};
+use super::{MAGIC, SECTION_ALIGN, VERSION};
 
 /// Header flags (bit field).
 pub mod flags {
@@ -11,41 +16,43 @@ pub mod flags {
 
 /// File header - first 64 bytes of the bytecode file.
 ///
-/// Note: TypeMeta sub-section counts are stored in the TypeMetaHeader,
-/// not in the main header. See type_meta.rs for details.
+/// v2 layout (offsets computed from counts):
+/// - 0-23: identity and sizes (magic, version, checksum, total_size, str_blob_size, regex_blob_size)
+/// - 24-45: counts (11 × u16) — order matches section order
+/// - 46-63: reserved
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 #[repr(C, align(64))]
 pub struct Header {
+    // Bytes 0-23: Identity and sizes (6 × u32)
     /// Magic bytes: b"PTKQ"
     pub magic: [u8; 4],
-    /// Format version (currently 1)
+    /// Format version (currently 2)
     pub version: u32,
     /// CRC32 checksum of everything after the header
     pub checksum: u32,
     /// Total file size in bytes
     pub total_size: u32,
+    /// Size of the string blob in bytes.
+    pub str_blob_size: u32,
+    /// Size of the regex blob in bytes.
+    pub regex_blob_size: u32,
 
-    // Section offsets (absolute byte offsets)
-    pub str_blob_offset: u32,
-    pub str_table_offset: u32,
-    pub node_types_offset: u32,
-    pub node_fields_offset: u32,
-    pub trivia_offset: u32,
-    pub type_meta_offset: u32,
-    pub entrypoints_offset: u32,
-    pub transitions_offset: u32,
-
-    // Element counts (type counts are in TypeMetaHeader at type_meta_offset)
+    // Bytes 24-45: Element counts (11 × u16) — order matches section order
     pub str_table_count: u16,
+    pub regex_table_count: u16,
     pub node_types_count: u16,
     pub node_fields_count: u16,
     pub trivia_count: u16,
+    pub type_defs_count: u16,
+    pub type_members_count: u16,
+    pub type_names_count: u16,
     pub entrypoints_count: u16,
     pub transitions_count: u16,
     /// Header flags (see `flags` module for bit definitions).
     pub flags: u16,
-    /// Padding to maintain 64-byte size.
-    pub(crate) _pad: u16,
+
+    // Bytes 46-63: Reserved
+    pub(crate) _reserved: [u8; 18],
 }
 
 const _: () = assert!(std::mem::size_of::<Header>() == 64);
@@ -57,52 +64,71 @@ impl Default for Header {
             version: VERSION,
             checksum: 0,
             total_size: 0,
-            str_blob_offset: 0,
-            str_table_offset: 0,
-            node_types_offset: 0,
-            node_fields_offset: 0,
-            trivia_offset: 0,
-            type_meta_offset: 0,
-            entrypoints_offset: 0,
-            transitions_offset: 0,
+            str_blob_size: 0,
+            regex_blob_size: 0,
             str_table_count: 0,
+            regex_table_count: 0,
             node_types_count: 0,
             node_fields_count: 0,
             trivia_count: 0,
+            type_defs_count: 0,
+            type_members_count: 0,
+            type_names_count: 0,
             entrypoints_count: 0,
             transitions_count: 0,
             flags: 0,
-            _pad: 0,
+            _reserved: [0; 18],
         }
     }
 }
 
+/// Computed section offsets derived from header counts.
+///
+/// Order: StringBlob → RegexBlob → StringTable → RegexTable → NodeTypes →
+/// NodeFields → Trivia → TypeDefs → TypeMembers → TypeNames → Entrypoints → Transitions
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub struct SectionOffsets {
+    pub str_blob: u32,
+    pub regex_blob: u32,
+    pub str_table: u32,
+    pub regex_table: u32,
+    pub node_types: u32,
+    pub node_fields: u32,
+    pub trivia: u32,
+    pub type_defs: u32,
+    pub type_members: u32,
+    pub type_names: u32,
+    pub entrypoints: u32,
+    pub transitions: u32,
+}
+
 impl Header {
     /// Decode header from 64 bytes.
     pub fn from_bytes(bytes: &[u8]) -> Self {
         assert!(bytes.len() >= 64, "header too short");
 
+        let mut reserved = [0u8; 18];
+        reserved.copy_from_slice(&bytes[46..64]);
+
         Self {
             magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
             version: u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
             checksum: u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]),
             total_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
-            str_blob_offset: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
-            str_table_offset: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
-            node_types_offset: u32::from_le_bytes([bytes[24], bytes[25], bytes[26], bytes[27]]),
-            node_fields_offset: u32::from_le_bytes([bytes[28], bytes[29], bytes[30], bytes[31]]),
-            trivia_offset: u32::from_le_bytes([bytes[32], bytes[33], bytes[34], bytes[35]]),
-            type_meta_offset: u32::from_le_bytes([bytes[36], bytes[37], bytes[38], bytes[39]]),
-            entrypoints_offset: u32::from_le_bytes([bytes[40], bytes[41], bytes[42], bytes[43]]),
-            transitions_offset: u32::from_le_bytes([bytes[44], bytes[45], bytes[46], bytes[47]]),
-            str_table_count: u16::from_le_bytes([bytes[48], bytes[49]]),
-            node_types_count: u16::from_le_bytes([bytes[50], bytes[51]]),
-            node_fields_count: u16::from_le_bytes([bytes[52], bytes[53]]),
-            trivia_count: u16::from_le_bytes([bytes[54], bytes[55]]),
-            entrypoints_count: u16::from_le_bytes([bytes[56], bytes[57]]),
-            transitions_count: u16::from_le_bytes([bytes[58], bytes[59]]),
-            flags: u16::from_le_bytes([bytes[60], bytes[61]]),
-            _pad: u16::from_le_bytes([bytes[62], bytes[63]]),
+            str_blob_size: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
+            regex_blob_size: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
+            str_table_count: u16::from_le_bytes([bytes[24], bytes[25]]),
+            regex_table_count: u16::from_le_bytes([bytes[26], bytes[27]]),
+            node_types_count: u16::from_le_bytes([bytes[28], bytes[29]]),
+            node_fields_count: u16::from_le_bytes([bytes[30], bytes[31]]),
+            trivia_count: u16::from_le_bytes([bytes[32], bytes[33]]),
+            type_defs_count: u16::from_le_bytes([bytes[34], bytes[35]]),
+            type_members_count: u16::from_le_bytes([bytes[36], bytes[37]]),
+            type_names_count: u16::from_le_bytes([bytes[38], bytes[39]]),
+            entrypoints_count: u16::from_le_bytes([bytes[40], bytes[41]]),
+            transitions_count: u16::from_le_bytes([bytes[42], bytes[43]]),
+            flags: u16::from_le_bytes([bytes[44], bytes[45]]),
+            _reserved: reserved,
         }
     }
 
@@ -113,22 +139,20 @@ impl Header {
         bytes[4..8].copy_from_slice(&self.version.to_le_bytes());
         bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes());
         bytes[12..16].copy_from_slice(&self.total_size.to_le_bytes());
-        bytes[16..20].copy_from_slice(&self.str_blob_offset.to_le_bytes());
-        bytes[20..24].copy_from_slice(&self.str_table_offset.to_le_bytes());
-        bytes[24..28].copy_from_slice(&self.node_types_offset.to_le_bytes());
-        bytes[28..32].copy_from_slice(&self.node_fields_offset.to_le_bytes());
-        bytes[32..36].copy_from_slice(&self.trivia_offset.to_le_bytes());
-        bytes[36..40].copy_from_slice(&self.type_meta_offset.to_le_bytes());
-        bytes[40..44].copy_from_slice(&self.entrypoints_offset.to_le_bytes());
-        bytes[44..48].copy_from_slice(&self.transitions_offset.to_le_bytes());
-        bytes[48..50].copy_from_slice(&self.str_table_count.to_le_bytes());
-        bytes[50..52].copy_from_slice(&self.node_types_count.to_le_bytes());
-        bytes[52..54].copy_from_slice(&self.node_fields_count.to_le_bytes());
-        bytes[54..56].copy_from_slice(&self.trivia_count.to_le_bytes());
-        bytes[56..58].copy_from_slice(&self.entrypoints_count.to_le_bytes());
-        bytes[58..60].copy_from_slice(&self.transitions_count.to_le_bytes());
-        bytes[60..62].copy_from_slice(&self.flags.to_le_bytes());
-        bytes[62..64].copy_from_slice(&self._pad.to_le_bytes());
+        bytes[16..20].copy_from_slice(&self.str_blob_size.to_le_bytes());
+        bytes[20..24].copy_from_slice(&self.regex_blob_size.to_le_bytes());
+        bytes[24..26].copy_from_slice(&self.str_table_count.to_le_bytes());
+        bytes[26..28].copy_from_slice(&self.regex_table_count.to_le_bytes());
+        bytes[28..30].copy_from_slice(&self.node_types_count.to_le_bytes());
+        bytes[30..32].copy_from_slice(&self.node_fields_count.to_le_bytes());
+        bytes[32..34].copy_from_slice(&self.trivia_count.to_le_bytes());
+        bytes[34..36].copy_from_slice(&self.type_defs_count.to_le_bytes());
+        bytes[36..38].copy_from_slice(&self.type_members_count.to_le_bytes());
+        bytes[38..40].copy_from_slice(&self.type_names_count.to_le_bytes());
+        bytes[40..42].copy_from_slice(&self.entrypoints_count.to_le_bytes());
+        bytes[42..44].copy_from_slice(&self.transitions_count.to_le_bytes());
+        bytes[44..46].copy_from_slice(&self.flags.to_le_bytes());
+        bytes[46..64].copy_from_slice(&self._reserved);
         bytes
     }
 
@@ -153,4 +177,71 @@ impl Header {
             self.flags &= !flags::LINKED;
         }
     }
+
+    /// Compute section offsets from counts and blob sizes.
+    ///
+    /// Section order (all 64-byte aligned):
+    /// Header → StringBlob → RegexBlob → StringTable → RegexTable →
+    /// NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers →
+    /// TypeNames → Entrypoints → Transitions
+    pub fn compute_offsets(&self) -> SectionOffsets {
+        let align = SECTION_ALIGN as u32;
+
+        // Blobs first (right after header)
+        let str_blob = align; // 64
+        let regex_blob = align_up(str_blob + self.str_blob_size, align);
+
+        // Tables after blobs
+        let str_table = align_up(regex_blob + self.regex_blob_size, align);
+        let str_table_size = (self.str_table_count as u32 + 1) * 4;
+
+        let regex_table = align_up(str_table + str_table_size, align);
+        let regex_table_size = (self.regex_table_count as u32 + 1) * 4;
+
+        // Symbol sections
+        let node_types = align_up(regex_table + regex_table_size, align);
+        let node_types_size = self.node_types_count as u32 * 4;
+
+        let node_fields = align_up(node_types + node_types_size, align);
+        let node_fields_size = self.node_fields_count as u32 * 4;
+
+        let trivia = align_up(node_fields + node_fields_size, align);
+        let trivia_size = self.trivia_count as u32 * 2;
+
+        // Type metadata
+        let type_defs = align_up(trivia + trivia_size, align);
+        let type_defs_size = self.type_defs_count as u32 * 4;
+
+        let type_members = align_up(type_defs + type_defs_size, align);
+        let type_members_size = self.type_members_count as u32 * 4;
+
+        let type_names = align_up(type_members + type_members_size, align);
+        let type_names_size = self.type_names_count as u32 * 4;
+
+        // Entry points and instructions
+        let entrypoints = align_up(type_names + type_names_size, align);
+        let entrypoints_size = self.entrypoints_count as u32 * 8;
+
+        let transitions = align_up(entrypoints + entrypoints_size, align);
+
+        SectionOffsets {
+            str_blob,
+            regex_blob,
+            str_table,
+            regex_table,
+            node_types,
+            node_fields,
+            trivia,
+            type_defs,
+            type_members,
+            type_names,
+            entrypoints,
+            transitions,
+        }
+    }
+}
+
+/// Round up to the next multiple of `align`.
+fn align_up(value: u32, align: u32) -> u32 {
+    (value + align - 1) & !(align - 1)
 }
diff --git a/crates/plotnik-lib/src/bytecode/header_tests.rs b/crates/plotnik-lib/src/bytecode/header_tests.rs
@@ -20,21 +20,20 @@ fn header_roundtrip() {
         version: VERSION,
         checksum: 0x12345678,
         total_size: 1024,
-        str_blob_offset: 64,
-        str_table_offset: 128,
-        node_types_offset: 192,
-        node_fields_offset: 256,
-        trivia_offset: 320,
-        type_meta_offset: 384,
-        entrypoints_offset: 448,
-        transitions_offset: 512,
+        str_blob_size: 100,
+        regex_blob_size: 256,
         str_table_count: 10,
+        regex_table_count: 3,
         node_types_count: 20,
         node_fields_count: 5,
         trivia_count: 2,
+        type_defs_count: 8,
+        type_members_count: 12,
+        type_names_count: 4,
         entrypoints_count: 1,
         transitions_count: 15,
-        ..Default::default()
+        flags: 0,
+        _reserved: [0; 18],
     };
 
     let bytes = h.to_bytes();
@@ -69,3 +68,59 @@ fn header_flags_roundtrip() {
     assert!(decoded.is_linked());
     assert_eq!(decoded.flags, flags::LINKED);
 }
+
+#[test]
+fn compute_offsets_empty() {
+    let h = Header::default();
+    let offsets = h.compute_offsets();
+
+    // New order: blobs first, then tables
+    // All sections 64-byte aligned. With 0 counts, each table still has 1 sentinel entry (4 bytes)
+    assert_eq!(offsets.str_blob, 64);            // after header
+    assert_eq!(offsets.regex_blob, 64);          // 64 + align(0) = 64
+    assert_eq!(offsets.str_table, 64);           // 64 + align(0) = 64
+    assert_eq!(offsets.regex_table, 128);        // 64 + align(4) = 128
+    assert_eq!(offsets.node_types, 192);         // 128 + align(4) = 192
+    assert_eq!(offsets.node_fields, 192);        // 192 + align(0) = 192
+    assert_eq!(offsets.trivia, 192);
+    assert_eq!(offsets.type_defs, 192);
+    assert_eq!(offsets.type_members, 192);
+    assert_eq!(offsets.type_names, 192);
+    assert_eq!(offsets.entrypoints, 192);
+    assert_eq!(offsets.transitions, 192);
+}
+
+#[test]
+fn compute_offsets_with_data() {
+    let h = Header {
+        str_table_count: 5,     // (5+1)*4 = 24 bytes
+        regex_table_count: 2,   // (2+1)*4 = 12 bytes
+        node_types_count: 10,   // 10*4 = 40 bytes
+        node_fields_count: 5,   // 5*4 = 20 bytes
+        trivia_count: 3,        // 3*2 = 6 bytes
+        type_defs_count: 8,     // 8*4 = 32 bytes
+        type_members_count: 12, // 12*4 = 48 bytes
+        type_names_count: 4,    // 4*4 = 16 bytes
+        entrypoints_count: 2,   // 2*8 = 16 bytes
+        transitions_count: 20,  // 20*8 = 160 bytes
+        str_blob_size: 100,
+        regex_blob_size: 128,
+        ..Default::default()
+    };
+
+    let offsets = h.compute_offsets();
+
+    // New order: blobs first, then tables. All offsets 64-byte aligned.
+    assert_eq!(offsets.str_blob, 64);            // header end
+    assert_eq!(offsets.regex_blob, 192);         // 64 + 100 = 164 → 192
+    assert_eq!(offsets.str_table, 320);          // 192 + 128 = 320 (aligned)
+    assert_eq!(offsets.regex_table, 384);        // 320 + 24 = 344 → 384
+    assert_eq!(offsets.node_types, 448);         // 384 + 12 = 396 → 448
+    assert_eq!(offsets.node_fields, 512);        // 448 + 40 = 488 → 512
+    assert_eq!(offsets.trivia, 576);             // 512 + 20 = 532 → 576
+    assert_eq!(offsets.type_defs, 640);          // 576 + 6 = 582 → 640
+    assert_eq!(offsets.type_members, 704);       // 640 + 32 = 672 → 704
+    assert_eq!(offsets.type_names, 768);         // 704 + 48 = 752 → 768
+    assert_eq!(offsets.entrypoints, 832);        // 768 + 16 = 784 → 832
+    assert_eq!(offsets.transitions, 896);        // 832 + 16 = 848 → 896
+}