Skip to content

Commit 3663606

Browse files
committed
Add support for signed and unsigned LEB128 to pack/unpack.
This commit adds a new pack format command `R` and `r` for unsigned and signed LEB128 encoding. The "r" mnemonic is because this is a "vaRiable" length encoding scheme. LEB128 is used in various formats including DWARF, WebAssembly, MQTT, and Protobuf.
1 parent 56b67f1 commit 3663606

File tree

4 files changed

+162
-3
lines changed

4 files changed

+162
-3
lines changed

pack.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,56 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
667667
}
668668
break;
669669

670+
case 'r': /* r for SLEB128 encoding (signed) */
671+
case 'R': /* R for ULEB128 encoding (unsigned) */
672+
{
673+
int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
674+
675+
if (type == 'r') {
676+
pack_flags |= INTEGER_PACK_2COMP;
677+
}
678+
679+
while (len-- > 0) {
680+
size_t numbytes;
681+
int sign;
682+
char *cp;
683+
684+
from = NEXTFROM;
685+
from = rb_to_int(from);
686+
numbytes = rb_absint_numwords(from, 7, NULL);
687+
if (numbytes == 0)
688+
numbytes = 1;
689+
VALUE buf = rb_str_new(NULL, numbytes);
690+
691+
sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, pack_flags);
692+
693+
if (sign < 0 && type == 'R') {
694+
rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128");
695+
}
696+
697+
if (type == 'r') {
698+
/* Check if we need an extra byte for sign extension */
699+
unsigned char last_byte = (unsigned char)RSTRING_PTR(buf)[numbytes - 1];
700+
if ((sign >= 0 && (last_byte & 0x40)) || /* positive but sign bit set */
701+
(sign < 0 && !(last_byte & 0x40))) { /* negative but sign bit clear */
702+
/* Need an extra byte */
703+
rb_str_resize(buf, numbytes + 1);
704+
RSTRING_PTR(buf)[numbytes] = sign < 0 ? 0x7f : 0x00;
705+
numbytes++;
706+
}
707+
}
708+
709+
cp = RSTRING_PTR(buf);
710+
while (1 < numbytes) {
711+
*cp |= 0x80;
712+
cp++;
713+
numbytes--;
714+
}
715+
716+
rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
717+
}
718+
}
719+
break;
670720
case 'u': /* uuencoded string */
671721
case 'm': /* base64 encoded string */
672722
from = NEXTFROM;
@@ -1558,6 +1608,29 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
15581608
}
15591609
break;
15601610

1611+
case 'r':
1612+
case 'R':
1613+
{
1614+
int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
1615+
1616+
if (type == 'r') {
1617+
pack_flags |= INTEGER_PACK_2COMP;
1618+
}
1619+
char *s0 = s;
1620+
while (len > 0 && s < send) {
1621+
if (*s & 0x80) {
1622+
s++;
1623+
}
1624+
else {
1625+
s++;
1626+
UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags));
1627+
len--;
1628+
s0 = s;
1629+
}
1630+
}
1631+
}
1632+
break;
1633+
15611634
case 'w':
15621635
{
15631636
char *s0 = s;

spec/ruby/core/array/pack/shared/basic.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# NOTE: it's just a plan of the Ruby core team
3838
it "warns that a directive is unknown" do
3939
# additional directive ('a') is required for the X directive
40-
-> { [@obj, @obj].pack("a R" + pack_format) }.should complain(/unknown pack directive 'R'/)
40+
-> { [@obj, @obj].pack("a K" + pack_format) }.should complain(/unknown pack directive 'K'/)
4141
-> { [@obj, @obj].pack("a 0" + pack_format) }.should complain(/unknown pack directive '0'/)
4242
-> { [@obj, @obj].pack("a :" + pack_format) }.should complain(/unknown pack directive ':'/)
4343
end
@@ -48,7 +48,7 @@
4848
# NOTE: Added this case just to not forget about the decision in the ticket
4949
it "raise ArgumentError when a directive is unknown" do
5050
# additional directive ('a') is required for the X directive
51-
-> { [@obj, @obj].pack("a R" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'R'/)
51+
-> { [@obj, @obj].pack("a K" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'K'/)
5252
-> { [@obj, @obj].pack("a 0" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive '0'/)
5353
-> { [@obj, @obj].pack("a :" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive ':'/)
5454
end

spec/ruby/core/string/unpack/shared/basic.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
ruby_version_is "3.3" do
1313
# https://bugs.ruby-lang.org/issues/19150
1414
it 'raise ArgumentError when a directive is unknown' do
15-
-> { "abcdefgh".unpack("a R" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive 'R'/)
15+
-> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive 'K'/)
1616
-> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive '0'/)
1717
-> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive ':'/)
1818
end

test/ruby/test_pack.rb

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,4 +936,90 @@ class Array
936936
assert_equal "oh no", v
937937
end;
938938
end
939+
940+
def test_pack_unpack_R
941+
# ULEB128 encoding (unsigned)
942+
assert_equal("\x00", [0].pack("R"))
943+
assert_equal("\x01", [1].pack("R"))
944+
assert_equal("\x7f", [127].pack("R"))
945+
assert_equal("\x80\x01", [128].pack("R"))
946+
assert_equal("\xff\x7f", [0x3fff].pack("R"))
947+
assert_equal("\x80\x80\x01", [0x4000].pack("R"))
948+
assert_equal("\xff\xff\xff\xff\x0f", [0xffffffff].pack("R"))
949+
assert_equal("\x80\x80\x80\x80\x10", [0x100000000].pack("R"))
950+
assert_equal("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01", [0xffffffffffffffff].pack("R"))
951+
952+
# Multiple values
953+
assert_equal("\x01\x02", [1, 2].pack("R*"))
954+
assert_equal("\x7f\x80\x01", [127, 128].pack("R*"))
955+
956+
# Negative numbers should raise an error
957+
assert_raise(ArgumentError) { [-1].pack("R") }
958+
assert_raise(ArgumentError) { [-100].pack("R") }
959+
960+
# Unpack tests
961+
assert_equal([0], "\x00".unpack("R"))
962+
assert_equal([1], "\x01".unpack("R"))
963+
assert_equal([127], "\x7f".unpack("R"))
964+
assert_equal([128], "\x80\x01".unpack("R"))
965+
assert_equal([0x3fff], "\xff\x7f".unpack("R"))
966+
assert_equal([0x4000], "\x80\x80\x01".unpack("R"))
967+
assert_equal([0xffffffff], "\xff\xff\xff\xff\x0f".unpack("R"))
968+
assert_equal([0x100000000], "\x80\x80\x80\x80\x10".unpack("R"))
969+
assert_equal([0xffffffffffffffff], "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01".unpack("R"))
970+
971+
# Multiple values
972+
assert_equal([1, 2], "\x01\x02".unpack("R*"))
973+
assert_equal([127, 128], "\x7f\x80\x01".unpack("R*"))
974+
975+
# Round-trip test
976+
values = [0, 1, 127, 128, 0x3fff, 0x4000, 0xffffffff, 0x100000000]
977+
assert_equal(values, values.pack("R*").unpack("R*"))
978+
end
979+
980+
def test_pack_unpack_r
981+
# SLEB128 encoding (signed)
982+
assert_equal("\x00", [0].pack("r"))
983+
assert_equal("\x01", [1].pack("r"))
984+
assert_equal("\x7f", [-1].pack("r"))
985+
assert_equal("\x7e", [-2].pack("r"))
986+
assert_equal("\xff\x00", [127].pack("r"))
987+
assert_equal("\x80\x01", [128].pack("r"))
988+
assert_equal("\x81\x7f", [-127].pack("r"))
989+
assert_equal("\x80\x7f", [-128].pack("r"))
990+
991+
# Larger positive numbers
992+
assert_equal("\xff\xff\x00", [0x3fff].pack("r"))
993+
assert_equal("\x80\x80\x01", [0x4000].pack("r"))
994+
995+
# Larger negative numbers
996+
assert_equal("\x81\x80\x7f", [-0x3fff].pack("r"))
997+
assert_equal("\x80\x80\x7f", [-0x4000].pack("r"))
998+
999+
# Multiple values
1000+
assert_equal("\x00\x01\x7f", [0, 1, -1].pack("r*"))
1001+
1002+
# Unpack tests
1003+
assert_equal([0], "\x00".unpack("r"))
1004+
assert_equal([1], "\x01".unpack("r"))
1005+
assert_equal([-1], "\x7f".unpack("r"))
1006+
assert_equal([-2], "\x7e".unpack("r"))
1007+
assert_equal([127], "\xff\x00".unpack("r"))
1008+
assert_equal([128], "\x80\x01".unpack("r"))
1009+
assert_equal([-127], "\x81\x7f".unpack("r"))
1010+
assert_equal([-128], "\x80\x7f".unpack("r"))
1011+
1012+
# Larger numbers
1013+
assert_equal([0x3fff], "\xff\xff\x00".unpack("r"))
1014+
assert_equal([0x4000], "\x80\x80\x01".unpack("r"))
1015+
assert_equal([-0x3fff], "\x81\x80\x7f".unpack("r"))
1016+
assert_equal([-0x4000], "\x80\x80\x7f".unpack("r"))
1017+
1018+
# Multiple values
1019+
assert_equal([0, 1, -1], "\x00\x01\x7f".unpack("r*"))
1020+
1021+
# Round-trip test
1022+
values = [0, 1, -1, 127, -127, 128, -128, 0x3fff, -0x3fff, 0x4000, -0x4000]
1023+
assert_equal(values, values.pack("r*").unpack("r*"))
1024+
end
9391025
end

0 commit comments

Comments
 (0)