11const std = @import ("std" );
22const print = std .debug .print ;
3-
4- const Mode = enum { c , d };
5-
6- pub fn main () ! void {
7- var args = std .process .args ();
8- _ = args .skip (); const arg = args .next ();
9- if (arg == null ) { print ("Error: No args passed. Pass -c for compression , -d for decompression\n " , .{}); std .os .exit (1 ); }
10-
11- const mode = if (std .mem .eql (u8 , arg .? , "-d" )) Mode .d
12- else if (std .mem .eql (u8 , arg .? , "-c" )) Mode .c
13- else null ;
14- if (mode == null ) { print ("Error: Invalid arg. Pass -c for compression , -d for decompression\n " , .{}); std .os .exit (2 ); }
15-
16- var bufw = std .io .bufferedWriter (std .io .getStdOut ().writer ());
17- var writer = std .io .bitWriter (.Big , bufw .writer ());
18- var model = Model .init ();
19-
20- if (mode .? == .c ) {
21- const fileName = args .next ();
22- if (fileName == null ) { print ("Error: To compress use -c fileName\n " , .{}); std .os .exit (3 ); }
23- var path_buffer : [std .fs .MAX_PATH_BYTES ]u8 = undefined ;
24- const path = try std .fs .realpathZ (fileName .? , & path_buffer );
25- const file = try std .fs .openFileAbsolute (path , .{});
26- defer file .close ();
27- const size = (try file .stat ()).size ;
28- var bufr = std .io .bufferedReader (file .reader ());
29- var reader = std .io .bitReader (.Big , bufr .reader ());
30-
31- try writer .writeBits (size , 64 );
32- var ac = initAC (writer , Mode .c );
33- while (true ) {
34- const bit = reader .readBitsNoEof (u1 , 1 ) catch { break ; };
35- try ac .encode (bit , model .p ());
36- model .update (bit );
37- }
38- try ac .flush ();
39- try bufw .flush ();
40- } else {
41- var bufr = std .io .bufferedReader (std .io .getStdIn ().reader ());
42- var reader = std .io .bitReader (.Big , bufr .reader ());
43- const size = try reader .readBitsNoEof (u64 , 64 );
44- var ac = initAC (reader , Mode .d );
45-
46- var i : u64 = 0 ;
47- while (i / 8 < size ) : (i += 1 ) {
48- const bit = ac .decode (model .p ());
49- try writer .writeBits (bit , 1 );
50- model .update (bit );
51- }
52- try bufw .flush ();
53- }
54- }
55-
3+ const assert = std .debug .assert ;
4+ const File = std .fs .File ;
5+
6+ /// ============================= Model =============================
7+ /// A simple order0-ish model (with 12-bit context)
8+ /// To be replaced with mixer + micromodels
9+ /// MicroModels will share 2 hashtables, and hopefully 2 statetables - for big contexts, and for small ones
10+ /// Mixer should use vectors (since zig is chill like that)
5611const Model = struct {
5712 ctx : u12 ,
5813 data : [1 << 12 ]Counter ,
@@ -63,90 +18,205 @@ const Model = struct {
6318 pub fn p (self : Self ) u16 { return self .data [self .ctx ].p (); }
6419 pub fn update (self : * Self , bit : u1 ) void {
6520 self .data [self .ctx ].update (bit );
66- self .ctx <<= 1 ; self .ctx |= bit ; self . ctx &= ( 1 << 12 ) - 1 ;
21+ self .ctx <<= 1 ; self .ctx |= bit ;
6722 }
6823};
6924
25+ /// ============================= Counter =============================
26+ /// A simple u12 counter (takes 3-bytes of memory)
27+ /// To be replaced with a state table + state map
7028const Counter = struct {
71- // c0: u16, c1: u16,
72- c0 : u12 , c1 : u12 ,
29+ counts : [2 ]u12 ,
7330
7431 const Self = @This ();
75- pub fn init () Self { return Self { .c0 = 0 , . c1 = 0 }; }
32+ pub fn init () Self { return Self { .counts = [ _ ] u12 { 0 , 0 } }; }
7633 pub fn p (self : Self ) u16 {
77- const n0 = @as (u64 , self .c0 );
78- const n1 = @as (u64 , self .c1 );
34+ const n0 = @as (u64 , self .counts [ 0 ] );
35+ const n1 = @as (u64 , self .counts [ 1 ] );
7936 return @intCast (u16 , (1 << 16 ) * (n1 + 1 ) / (n1 + n0 + 2 ));
8037 }
8138 pub fn update (self : * Self , bit : u1 ) void {
82- // const maxCount = (1 << 16) - 1;
8339 const maxCount = (1 << 12 ) - 1 ;
84- if (self .c0 == maxCount or self .c1 == maxCount ) {
85- self .c0 >>= 1 ;
86- self .c1 >>= 1 ;
40+ if (self .counts [ 0 ] == maxCount or self .counts [ 1 ] == maxCount ) {
41+ self .counts [ 0 ] >>= 1 ;
42+ self .counts [ 1 ] >>= 1 ;
8743 }
88- if ( bit == 1 ) self .c1 += 1 else self . c0 += 1 ;
44+ self .counts [ bit ] += 1 ;
8945 }
9046};
9147
92- fn initAC (writer : anytype , comptime mode : Mode ) ArithmeticCoder (@TypeOf (writer ), mode ) { return ArithmeticCoder (@TypeOf (writer ), mode ).init (writer ); }
48+ /// ============================= Arithmetic coder =============================
49+ /// 32-bit (binary) arithmetic coder
50+ /// Use `initAC(writer, Mode.c)` for encoding, and `initAC(reader, Mode.d)` for decoding
51+ /// Initializing in wrong mode wouldn't compile because of the way zig emulates generics
52+ /// To encode: `try ac.encode(bit, p1)`, To decode: `const bit = ac.decode(p1)`
53+ /// Expected io is `std.io.BitReader` or `std.io.BitWriter`
54+ /// `flush()` should be called exactly once
55+ const Mode = enum { c , d }; // (compression, decompression) = (encode, decode)
56+
57+ fn initAC (writer : anytype , comptime mode : Mode ) ArithmeticCoder (@TypeOf (writer ), mode ) {
58+ return ArithmeticCoder (@TypeOf (writer ), mode ).init (writer );
59+ }
60+
9361fn ArithmeticCoder (comptime T : type , comptime mode : Mode ) type { return struct {
9462 io :T , x : if (mode == Mode .d ) u32 else void ,
9563 revBits : if (mode == Mode .c ) u64 else void ,
9664 x1 : u32 = 0 , x2 : u32 = (1 << 32 ) - 1 ,
9765
9866 const Self = @This ();
99- pub fn init (io : T ) Self {
67+ const Q1 : u32 = 1 << 30 ; const PREC_SHIFT : u32 = 31 ;
68+ const Q2 : u32 = 2 << 30 ; const RLOW_MOD : u32 = (1 << 31 ) - 1 ; // Modify x1 bits in E3 mapping, AND with
69+ const Q3 : u32 = 3 << 30 ; const RHIGH_MOD : u32 = (1 << 31 ) + 1 ; // Modify x2 bits in E3 mapping, OR with
70+
71+ pub fn init (io : T ) Self { // initialize fields, read state in decode mode
10072 var self = if (mode == .c ) Self { .io = io , .revBits = 0 , .x = {} }
10173 else if (mode == .d ) Self { .io = io , .x = 0 , .revBits = {} };
10274 if (mode == .d ) self .readState ();
10375 return self ;
10476 }
105- pub fn encode (self : * Self , bit : u1 , p : u16 ) ! void { return self .code (bit , p ); }
106- pub fn decode (self : * Self , p : u16 ) u1 { return self .code ({}, p ); }
107- pub fn flush (self : * Self ) ! void {
108- try self .writeBit (self .x2 >> 31 );
77+ pub fn encode (self : * Self , bit : u1 , p : u16 ) ! void { return self .proc (bit , p ); }
78+ pub fn decode (self : * Self , p : u16 ) u1 { return self .proc ({}, p ); }
79+ pub fn flush (self : * Self ) ! void { // flush leading byte to stream
80+ comptime { assert (mode == .c ); }
81+ try self .writeBit (self .x2 >> PREC_SHIFT );
10982 while (self .io .bit_count != 0 ) {
11083 self .x2 <<= 1 ; try self .writeBit (self .x2 >> 31 );
11184 }
11285 }
11386
114- fn readBit (self : * Self ) u1 { return self .io .readBitsNoEof (u1 , 1 ) catch 0 ; }
115- fn incParity (self : * Self ) void { self .revBits += 1 ; }
116- fn writeBit (self : * Self , bit : u32 ) ! void {
87+ fn readBit (self : * Self ) u1 { return self .io .readBitsNoEof (u1 , 1 ) catch 0 ; } // TODO: return 0 only on EOF, otherwise return error
88+ fn incParity (self : * Self ) void { self .revBits += 1 ; } // for E3 mapping
89+ fn writeBit (self : * Self , bit : u32 ) ! void { // writes bit, conscious of any E3 mappings
11790 try self .io .writeBits (bit , 1 );
11891 while (self .revBits > 0 ) {
11992 try self .io .writeBits (bit ^ 1 , 1 );
12093 self .revBits -= 1 ;
12194 }
12295 }
123- fn readState (self : * Self ) void {
96+ fn readState (self : * Self ) void { // reads 32-bits into state and pads with zeroes if necessary
12497 var bitsRead : usize = 0 ;
12598 var state = self .io .readBits (u32 , 32 , & bitsRead ) catch 0 ;
12699 self .x = state << @intCast (u5 , 32 - bitsRead );
127100 }
128101
129- fn code (self : * Self , bit_ : if (mode == .d ) void else u1 , prob : u16 ) if (mode == .d ) u1 else anyerror ! void {
102+ // processes a single bit -> decompresses a bit in decode mode, compresses a bit in encode mode
103+ fn proc (self : * Self , bit_ : if (mode == .d ) void else u1 , prob : u16 ) if (mode == .d ) u1 else anyerror ! void {
130104 const p = if (prob == 0 ) 1 else @as (u64 , prob ) << 16 ;
131105 const xmid = @intCast (u32 , self .x1 + ((@as (u64 , self .x2 - self .x1 ) * p ) >> 32 ));
132106
133107 const bit = if (mode == .c ) bit_ else @boolToInt (self .x <= xmid );
134108 if (bit == 1 ) self .x2 = xmid else self .x1 = xmid + 1 ;
135109
136- while ((self .x1 ^ self .x2 ) >> 31 == 0 ) {
137- if (mode == .c ) try self .writeBit (self .x1 >> 31 )
110+ while ((self .x1 ^ self .x2 ) >> PREC_SHIFT == 0 ) {
111+ if (mode == .c ) try self .writeBit (self .x1 >> PREC_SHIFT )
138112 else self .x = (self .x << 1 ) | self .readBit ();
139113 self .x1 <<= 1 ;
140114 self .x2 = (self .x2 << 1 ) | 1 ;
141115 }
142116
143- while (self .x1 >= ( 1 << 30 ) and self .x2 < ( 3 << 30 ) ) {
117+ while (self .x1 >= Q1 and self .x2 < Q3 ) {
144118 if (mode == .c ) self .incParity ()
145- else self .x = ((self .x << 1 ) ^ ( 2 << 30 ) ) | self .readBit ();
146- self .x1 = (self .x1 << 1 ) & (( 1 << 31 ) - 1 ) ;
147- self .x2 = (self .x2 << 1 ) | (( 1 << 31 ) + 1 ) ;
119+ else self .x = ((self .x << 1 ) ^ Q2 ) | self .readBit ();
120+ self .x1 = (self .x1 << 1 ) & RLOW_MOD ;
121+ self .x2 = (self .x2 << 1 ) | RHIGH_MOD ;
148122 }
149123
150124 if (mode == .d ) return bit ;
151125 }
152126};}
127+
128+ /// ============================ User Interface =============================
129+ pub fn main () ! void {
130+ var args = std .process .args ();
131+ _ = args .skip (); // skip program invokation
132+ const mode = parseMode (args .next ());
133+ const inFile = try parseFile (args .next (), FileOptions .read );
134+ const outFile = try parseFile (args .next (), FileOptions .create );
135+ defer inFile .close (); defer outFile .close ();
136+
137+ var timer = try std .time .Timer .start ();
138+
139+ var bufr = std .io .bufferedReader (inFile .reader ());
140+ var bufw = std .io .bufferedWriter (outFile .writer ());
141+ var reader = std .io .bitReader (.Big , bufr .reader ());
142+ var writer = std .io .bitWriter (.Big , bufw .writer ());
143+ var model = Model .init ();
144+
145+ if (mode == .c ) { // Compression
146+ const size = try getSize (inFile );
147+ try writer .writeBits (size , 64 );
148+
149+ var ac = initAC (writer , Mode .c );
150+ while (true ) {
151+ const bit = reader .readBitsNoEof (u1 , 1 ) catch { break ; };
152+ try ac .encode (bit , model .p ());
153+ model .update (bit );
154+ }
155+
156+ try ac .flush ();
157+ try bufw .flush ();
158+ } else { // Decompression
159+ const size = try reader .readBitsNoEof (u64 , 64 );
160+ var i : u64 = 0 ;
161+
162+ var ac = initAC (reader , Mode .d );
163+ while (i / 8 < size ) : (i += 1 ) {
164+ const bit = ac .decode (model .p ());
165+ try writer .writeBits (bit , 1 );
166+ model .update (bit );
167+ }
168+
169+ try bufw .flush ();
170+ }
171+
172+ const ns = @intToFloat (f64 , timer .lap ());
173+ const inSize = try getSize (inFile );
174+ const outSize = try getSize (outFile );
175+ reportResult (mode , inSize , outSize , ns );
176+ }
177+
178+ fn parseMode (arg : ? [:0 ]const u8 ) Mode {
179+ if (arg == null ) exit (1 );
180+ const mode = if (std .mem .eql (u8 , arg .? , "c" )) Mode .c
181+ else if (std .mem .eql (u8 , arg .? , "d" )) Mode .d
182+ else null ;
183+ if (mode == null ) exit (2 );
184+ return mode .? ;
185+ }
186+
187+ const FileOptions = enum { create , read };
188+ fn parseFile (arg : ? [:0 ]const u8 , options : FileOptions ) ! File {
189+ if (arg == null ) exit (3 );
190+ if (options == .create ) return std .fs .cwd ().createFileZ (arg .? , .{});
191+ var pathBuf : [std .fs .MAX_PATH_BYTES ]u8 = undefined ;
192+ const path = try std .fs .realpathZ (arg .? , & pathBuf );
193+ return std .fs .openFileAbsolute (path , .{});
194+ }
195+
196+ fn getSize (f : File ) ! u64 { return (try f .stat ()).size ; }
197+
198+ fn reportResult (mode : Mode , inSize : u64 , outSize : u64 , ns : f64 ) void {
199+ switch (mode ) {
200+ .c = > print ("Compressed {} -> {} in " , .{inSize , outSize }),
201+ .d = > print ("Decompressed {} -> {} in " , .{inSize , outSize })
202+ }
203+
204+ if (ns < 1000 ) { print ("{d:.0} ns\n " , .{ns }); return ; }
205+ const us = ns / 1000 ; if (us < 1000 ) { print ("{d:.3} us\n " , .{us }); return ; }
206+ const ms = us / 1000 ; if (ms < 1000 ) { print ("{d:.2} ms\n " , .{ms }); return ; }
207+ const s = ms / 1000 ; if (s < 300 ) { print ("{d:.2} sec\n " , .{s }); return ; }
208+ const m = s / 60 ; if (m < 60 ) { print ("{d:.2} mins\n " , .{m }); return ; }
209+ const h = m / 60 ; print ("{d:.2} hr\n " , .{h });
210+ }
211+
212+ fn exit (status : u8 ) void {
213+ print (
214+ \\gallop file compressor (C) 2022, Dimitar Rusev (mitiko)
215+ \\
216+ \\To compress: ./gallop c input output
217+ \\To decompress: ./gallop d input output
218+ \\Example: (./gallop c /data/book1 book1.bin) && (./gallop d book1.bin book1.orig) && (cmp book1.orig /data/book1)
219+ \\
220+ ,.{});
221+ std .os .exit (status );
222+ }
0 commit comments