From 8d13307586cc0e87b2d1164bd0b4dd2cc18f9dc2 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Mon, 5 Jan 2026 14:28:54 -0500 Subject: [PATCH] Isolate metadata serialization from device Previously, the `mgard_x::Metadata` structure had inline code that used protobuf to serialize its values into a buffer. This was problematic because the latest version of protobuf does not compile with nvcc. The issue is a feature of C++17 does not support (and it is unclear if it will ever be supported). Get around this problem by moving (the majority of) the serialization code into the MGARD library and compile it with the host compiler. To make this work, the code now has to serialize into a host buffer and then copy into a device buffer. However, the previous implementation simply copied things one at a time into the device buffer, so this should be no less performant. --- include/mgard-x/Metadata/Metadata.hpp | 976 +------------------------- src/mgard-x/CMakeLists.txt | 1 + src/mgard-x/Metadata/CMakeLists.txt | 4 + src/mgard-x/Metadata/Metadata.cpp | 739 +++++++++++++++++++ 4 files changed, 774 insertions(+), 946 deletions(-) create mode 100644 src/mgard-x/Metadata/CMakeLists.txt create mode 100644 src/mgard-x/Metadata/Metadata.cpp diff --git a/include/mgard-x/Metadata/Metadata.hpp b/include/mgard-x/Metadata/Metadata.hpp index e645d6c329..2f11d810f7 100644 --- a/include/mgard-x/Metadata/Metadata.hpp +++ b/include/mgard-x/Metadata/Metadata.hpp @@ -5,23 +5,19 @@ * Date: March 17, 2022 */ +#include "../Config/Config.h" #include "../RuntimeX/RuntimeX.h" #include "../Utilities/Types.h" #include "MGARDConfig.hpp" -#include "format.hpp" -#include "proto/mgard.pb.h" #include #include -#include - #ifndef MGARD_X_METADATA #define MGARD_X_METADATA namespace mgard_x { -template struct Metadata { - using Mem = MemoryManager; +struct MetadataBase { // about MGARD software uint8_t software_version[3]; uint8_t file_version[3]; @@ -66,7 +62,19 @@ template struct Metadata { enum processor_type ptype; -public: + void InitializeConfig(Config &config); + void PrintSummary(); + + std::vector Serialize(); + void Deserialize(const std::vector &serialized_data); + +protected: + static uint64_t SerializePreambleSize(); + uint64_t DeserializeSize(std::vector::const_iterator &iter); +}; + +template struct Metadata : MetadataBase { + using Mem = MemoryManager; template void FillForCompression(enum error_bound_type ebtype, T tol, T s, T norm, enum decomposition_type decomposition, @@ -209,950 +217,26 @@ template struct Metadata { } SERIALIZED_TYPE *Serialize(uint32_t &total_size) { - // return SerializeAll(total_size); - // PrintSummary(); - return SerializeAllWithProtobuf(total_size); - } - void Deserialize(SERIALIZED_TYPE *serialized_data) { - // DeserializeAll(serialized_data); - DeserializeAllWithProtobuf(serialized_data); - // PrintSummary(); - } - - void InitializeConfig(Config &config) { - config.domain_decomposition = ddtype; - config.decomposition = decomposition; - config.lossless = ltype; - config.huff_dict_size = huff_dict_size; - config.huff_block_size = huff_block_size; - config.reorder = reorder; - } - - void PrintSummary() { - std::cout << "=======Metadata Summary=======\n"; - std::cout << "Signature: "; - for (const char c : mgard::SIGNATURE) - std::cout << c; - std::cout << "\n"; - std::cout << "MGARD version: " << (int)software_version[0] << "." - << (int)software_version[1] << "." << (int)software_version[2] - << "\n"; - std::cout << "File format version: " << (int)file_version[0] << "." - << (int)file_version[1] << "." << (int)file_version[2] << "\n"; - std::cout << "Metadata size: " << metadata_size << "\n"; - std::cout << "Metadata crc32: " << metadata_crc32 << "\n"; - std::cout << "Endiness: "; - if (etype == endiness_type::Big_Endian) { - std::cout << "Big Endian\n"; - } else { - std::cout << "Little Endian\n"; - } - std::cout << "Data type: "; - if (dtype == data_type::Float) { - std::cout << "Float\n"; - } else if (dtype == data_type::Double) { - std::cout << "Double\n"; - } - std::cout << "Topology: "; - if (dstype == data_structure_type::Cartesian_Grid_Uniform) { - std::cout << "Uniform Grid\n"; - } else if (dstype == data_structure_type::Cartesian_Grid_Non_Uniform) { - std::cout << "Non-uniform Grid\n"; - } - std::cout << "Shape: "; - for (uint64_t &c : shape) - std::cout << c << " "; - std::cout << "\n"; - std::cout << "Function Decomposition: "; - if (decomposition == decomposition_type::MultiDim) { - std::cout << "MultiDim\n"; - } else if (decomposition == decomposition_type::SingleDim) { - std::cout << "SingleDim\n"; - } - std::cout << "Reorder: " << reorder << "\n"; - std::cout << "Domain Decomposition: "; - if (domain_decomposed) { - if (ddtype == domain_decomposition_type::MaxDim) { - std::cout << "MaxDim\n"; - } else if (ddtype == domain_decomposition_type::Variable) { - std::cout << "Variable\n"; - } else { - std::cout << "Block\n"; - } - std::cout << "Decomposed Dim: " << domain_decomposed_dim << "\n"; - std::cout << "Decomposed Size: " << domain_decomposed_size << "\n"; - } else { - std::cout << "No\n"; - } - std::cout << "Error bound mode: "; - if (ebtype == error_bound_type::REL) { - std::cout << "REL\n"; - } else if (ebtype == error_bound_type::ABS) { - std::cout << "ABS\n"; - } - std::cout << "Norm type: "; - if (ntype == norm_type::L_Inf) { - std::cout << "L_Inf\n"; - } else if (ntype == norm_type::L_2) { - std::cout << "L_2\n"; - } - std::cout << "Norm: " << norm << "\n"; - std::cout << "tol: " << tol << "\n"; - std::cout << "s: " << s << "\n"; - - std::cout << "Lossless: "; - if (ltype == mgard_x::lossless_type::Huffman) { - std::cout << "Huffman\n"; - std::cout << "Huffman dictionary size: " << huff_dict_size << "\n"; - std::cout << "Huffman block size: " << huff_block_size << "\n"; - } else if (ltype == mgard_x::lossless_type::Huffman_LZ4) { - std::cout << "Huffman_LZ4\n"; - std::cout << "Huffman dictionary size: " << huff_dict_size << "\n"; - std::cout << "Huffman block size: " << huff_block_size << "\n"; - } else if (ltype == mgard_x::lossless_type::Huffman_Zstd) { - std::cout << "Huffman_Zstd\n"; - std::cout << "Huffman dictionary size: " << huff_dict_size << "\n"; - std::cout << "Huffman block size: " << huff_block_size << "\n"; - } else if (ltype == mgard_x::lossless_type::CPU_Lossless) { - std::cout << "CPU_Lossless\n"; - } - - std::cout << "Backend: "; - if (ptype == processor_type::X_SERIAL) { - std::cout << "X_SERIAL\n"; - } else if (ptype == processor_type::X_CUDA) { - std::cout << "X_OPENMP\n"; - } else if (ptype == processor_type::X_OPENMP) { - std::cout << "X_CUDA\n"; - } else if (ptype == processor_type::X_HIP) { - std::cout << "X_HIP\n"; - } else if (ptype == processor_type::X_SYCL) { - std::cout << "X_SYCL\n"; - } - } - -private: - SERIALIZED_TYPE *SerializeAll(uint32_t &total_size) { - if (big_endian()) { - etype = endiness_type::Big_Endian; - } else { - etype = endiness_type::Little_Endian; - } - - total_size = 0; - - // about MGARD software - total_size += mgard::SIGNATURE.size(); - total_size += sizeof(software_version); - total_size += sizeof(file_version); - total_size += sizeof(metadata_size); - total_size += sizeof(metadata_crc32); - total_size += sizeof(ptype); - - // about compression - total_size += sizeof(ebtype); - if (ebtype == error_bound_type::REL) { - total_size += sizeof(norm); // norm - } - total_size += sizeof(tol); // tol - total_size += sizeof(ntype); - // if (ntype == norm_type::L_2) { - total_size += sizeof(s); // s - //} - total_size += sizeof(decomposition); - total_size += sizeof(l_target); // l_target; - total_size += sizeof(reorder); - total_size += sizeof(ltype); - if (ltype == lossless_type::Huffman || - ltype == lossless_type::Huffman_LZ4 || - ltype == lossless_type::Huffman_Zstd) { - total_size += sizeof(huff_dict_size); // dict size - total_size += sizeof(huff_block_size); // block size - } - - // about data - total_size += sizeof(dtype); - total_size += sizeof(etype); - total_size += sizeof(dstype); - total_size += sizeof(total_dims); // total_dims; - total_size += sizeof(shape[0]) * total_dims; // shape; - if (dstype == data_structure_type::Cartesian_Grid_Non_Uniform) { - size_t coord_size = 0; - for (DIM d = 0; d < total_dims; d++) { - coord_size += shape[d] * sizeof(double); - } - total_size += coord_size; - } - - total_size += sizeof(domain_decomposed); - if (domain_decomposed) { - total_size += sizeof(ddtype); - total_size += sizeof(domain_decomposed_dim); - total_size += sizeof(domain_decomposed_size); - } - - // initialize some fields - metadata_size = total_size; - - software_version[0] = MGARD_VERSION_MAJOR; - software_version[1] = MGARD_VERSION_MINOR; - software_version[2] = MGARD_VERSION_PATCH; - - file_version[0] = MGARD_FILE_VERSION_MAJOR; - file_version[1] = MGARD_FILE_VERSION_MINOR; - file_version[2] = MGARD_FILE_VERSION_PATCH; - - // to be replaced with actual CRC-32 checksum - metadata_crc32 = 0; - - // start serializing - SERIALIZED_TYPE *serialized_data; - Mem::Malloc1D(serialized_data, total_size, 0); - DeviceRuntime::SyncQueue(0); - // (SERIALIZED_TYPE *)std::malloc(total_size); - SERIALIZED_TYPE *p = serialized_data; - - SerializeSignature(p); - Serialize(software_version, p); - Serialize(file_version, p); - Serialize(metadata_size, p); - Serialize(metadata_crc32, p); - Serialize(ptype, p); - - Serialize(ebtype, p); - if (ebtype == error_bound_type::REL) { - Serialize(norm, p); - } - Serialize(tol, p); - Serialize(ntype, p); - // if (ntype == norm_type::L_2) { - Serialize(s, p); - //} - Serialize(decomposition, p); - Serialize(l_target, p); - Serialize(reorder, p); - Serialize(ltype, p); - if (ltype == lossless_type::Huffman || - ltype == lossless_type::Huffman_LZ4 || - ltype == lossless_type::Huffman_Zstd) { - Serialize(huff_dict_size, p); - Serialize(huff_block_size, p); - } - - Serialize(dtype, p); - Serialize(etype, p); - Serialize(dstype, p); - Serialize(total_dims, p); - SerializeShape(shape, p); - if (dstype == data_structure_type::Cartesian_Grid_Non_Uniform) { - SerializeCoords(coords, p); - } - - Serialize(domain_decomposed, p); - if (domain_decomposed) { - Serialize(ddtype, p); - Serialize(domain_decomposed_dim, p); - Serialize(domain_decomposed_size, p); - } - return serialized_data; - } - - void DeserializeAll(SERIALIZED_TYPE *serialized_data) { - SERIALIZED_TYPE *p = serialized_data; - - DeserializeSignature(p); - Deserialize(software_version, p); - Deserialize(file_version, p); - Deserialize(metadata_size, p); - Deserialize(metadata_crc32, p); - Deserialize(ptype, p); - - Deserialize(ebtype, p); - if (ebtype == error_bound_type::REL) { - Deserialize(norm, p); - } - Deserialize(tol, p); - Deserialize(ntype, p); - // if (ntype == norm_type::L_2) { - Deserialize(s, p); - //} - Deserialize(decomposition, p); - Deserialize(l_target, p); - Deserialize(reorder, p); - Deserialize(ltype, p); - if (ltype == lossless_type::Huffman || - ltype == lossless_type::Huffman_LZ4 || - ltype == lossless_type::Huffman_Zstd) { - Deserialize(huff_dict_size, p); - Deserialize(huff_block_size, p); - } - - Deserialize(dtype, p); - Deserialize(etype, p); - Deserialize(dstype, p); - Deserialize(total_dims, p); - DeserializeShape(shape, p); - if (dstype == data_structure_type::Cartesian_Grid_Non_Uniform) { - DeserializeCoords(coords, p); - } - - Deserialize(domain_decomposed, p); - if (domain_decomposed) { - Deserialize(ddtype, p); - Deserialize(domain_decomposed_dim, p); - Deserialize(domain_decomposed_size, p); - } - } - - SERIALIZED_TYPE *SerializeAllWithProtobuf(uint32_t &total_size) { - - mgard::pb::Header header; - - { // Version Number - software_version[0] = MGARD_VERSION_MAJOR; - software_version[1] = MGARD_VERSION_MINOR; - software_version[2] = MGARD_VERSION_PATCH; - - mgard::pb::VersionNumber &mgard_version_number = - *header.mutable_mgard_version(); - mgard_version_number.set_major_(MGARD_VERSION_MAJOR); - mgard_version_number.set_minor_(MGARD_VERSION_MINOR); - mgard_version_number.set_patch_(MGARD_VERSION_PATCH); - - file_version[0] = MGARD_FILE_VERSION_MAJOR; - file_version[1] = MGARD_FILE_VERSION_MINOR; - file_version[2] = MGARD_FILE_VERSION_PATCH; - - mgard::pb::VersionNumber &format_version_number = - *header.mutable_file_format_version(); - mgard_version_number.set_major_(MGARD_FILE_VERSION_MAJOR); - mgard_version_number.set_minor_(MGARD_FILE_VERSION_MINOR); - mgard_version_number.set_patch_(MGARD_FILE_VERSION_PATCH); - } - - { // Domain - mgard::pb::Domain &domain = *header.mutable_domain(); - domain.set_topology(mgard::pb::Domain::CARTESIAN_GRID); - mgard::pb::CartesianGridTopology &cartesian_grid_topology = - *domain.mutable_cartesian_grid_topology(); - cartesian_grid_topology.set_dimension(total_dims); - google::protobuf::RepeatedField &shape_ = - *cartesian_grid_topology.mutable_shape(); - shape_.Resize(total_dims, 0); - std::copy(shape.begin(), shape.end(), shape_.mutable_data()); - mgard::pb::Domain::Geometry geometry; - if (dstype == data_structure_type::Cartesian_Grid_Uniform) { - geometry = mgard::pb::Domain::UNIT_CUBE; - } else { - geometry = mgard::pb::Domain::EXPLICIT_CUBE; - mgard::pb::ExplicitCubeGeometry &explicit_cube_geometry = - *domain.mutable_explicit_cube_geometry(); - google::protobuf::RepeatedField &coordinates_ = - *explicit_cube_geometry.mutable_coordinates(); - - uint64_t totel_len = 0; - for (DIM d = 0; d < total_dims; d++) - totel_len += shape[d]; - coordinates_.Resize(totel_len, 0); - double *p = coordinates_.mutable_data(); - for (DIM d = 0; d < total_dims; d++) { - std::copy(coords[d].begin(), coords[d].end(), p); - p += shape[d]; - } - } - domain.set_geometry(geometry); - } - - { // Dataset - mgard::pb::Dataset &dataset = *header.mutable_dataset(); - if (dtype == data_type::Float) { - dataset.set_type(mgard::pb::Dataset::FLOAT); - } else if (dtype == data_type::Double) { - dataset.set_type(mgard::pb::Dataset::DOUBLE); - } - dataset.set_dimension(1); - } - - { // Error control - mgard::pb::ErrorControl &error = *header.mutable_error_control(); - if (ebtype == error_bound_type::ABS) { - error.set_mode(mgard::pb::ErrorControl::ABSOLUTE); - } else if (ebtype == error_bound_type::REL) { - error.set_mode(mgard::pb::ErrorControl::RELATIVE); - error.set_norm_of_original_data(norm); - } - if (ntype == norm_type::L_Inf) { - error.set_norm(mgard::pb::ErrorControl::L_INFINITY); - error.set_s(s); - } else { - error.set_norm(mgard::pb::ErrorControl::S_NORM); - error.set_s(s); - } - error.set_tolerance(tol); - } - - { // Domain Decomposition - mgard::pb::DomainDecomposition &domainDecomposition = - *header.mutable_domain_decomposition(); - if (domain_decomposed) { - if (ddtype == domain_decomposition_type::MaxDim) { - domainDecomposition.set_method( - mgard::pb::DomainDecomposition::MAX_DIMENSION); - } else if (ddtype == domain_decomposition_type::Block) { - domainDecomposition.set_method(mgard::pb::DomainDecomposition::BLOCK); - } else if (ddtype == domain_decomposition_type::Variable) { - domainDecomposition.set_method( - mgard::pb::DomainDecomposition::VARIABLE); - } - } else { - domainDecomposition.set_method( - mgard::pb::DomainDecomposition::NOOP_METHOD); - } - domainDecomposition.set_decomposition_dimension(domain_decomposed_dim); - domainDecomposition.set_decomposition_size(domain_decomposed_size); - } - - { // Function Decomposition - mgard::pb::FunctionDecomposition &function_decomposition = - *header.mutable_function_decomposition(); - function_decomposition.set_transform( - mgard::pb::FunctionDecomposition::MULTILEVEL_COEFFICIENTS); - if (decomposition == decomposition_type::MultiDim) { - function_decomposition.set_hierarchy( - mgard::pb::FunctionDecomposition::MULTIDIMENSION_WITH_GHOST_NODES); - } else if (decomposition == decomposition_type::SingleDim) { - function_decomposition.set_hierarchy( - mgard::pb::FunctionDecomposition:: - ONE_DIM_AT_A_TIME_WITH_GHOST_NODES); - } else if (decomposition == decomposition_type::Hybrid) { - function_decomposition.set_hierarchy( - mgard::pb::FunctionDecomposition::HYBRID_HIERARCHY); - } - function_decomposition.set_l_target(l_target); - } - - { // Quantization - mgard::pb::Quantization &quantization = *header.mutable_quantization(); - if (otype == operation_type::Compression) { - quantization.set_method( - mgard::pb::Quantization::COEFFICIENTWISE_LINEAR); - quantization.set_bin_widths(mgard::pb::Quantization::PER_COEFFICIENT); - quantization.set_type(mgard::pb::Quantization::INT64_T); - quantization.set_big_endian(big_endian()); - if (big_endian()) { - etype = endiness_type::Big_Endian; - } else { - etype = endiness_type::Little_Endian; - } - } else { - quantization.set_method(mgard::pb::Quantization::NOOP_QUANTIZATION); - } - } - - { // MDR - mgard::pb::BitplaneEncoding &bitplane_encoding = - *header.mutable_bitplane_encoding(); - if (otype == operation_type::MDR) { - bitplane_encoding.set_method( - mgard::pb::BitplaneEncoding::GROUPED_BITPLANE_ENCODING); - bitplane_encoding.set_type(mgard::pb::BitplaneEncoding::INT32_T); - bitplane_encoding.set_number_bitplanes(number_bitplanes); - bitplane_encoding.set_big_endian(big_endian()); - } else { - bitplane_encoding.set_method( - mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING); - } - } - - { // Encoding - mgard::pb::Encoding &encoding = *header.mutable_encoding(); - if (reorder == 0) { - encoding.set_preprocessor(mgard::pb::Encoding::NOOP_PREPROCESSOR); - } else { - encoding.set_preprocessor(mgard::pb::Encoding::SHUFFLE); - } - if (ltype == mgard_x::lossless_type::Huffman) { - encoding.set_compressor(mgard::pb::Encoding::X_HUFFMAN); - encoding.set_huffman_dictionary_size(huff_dict_size); - encoding.set_huffman_block_size(huff_block_size); - } else if (ltype == mgard_x::lossless_type::Huffman_LZ4) { - encoding.set_compressor(mgard::pb::Encoding::X_HUFFMAN_LZ4); - encoding.set_huffman_dictionary_size(huff_dict_size); - encoding.set_huffman_block_size(huff_block_size); - } else if (ltype == mgard_x::lossless_type::Huffman_Zstd) { - encoding.set_compressor(mgard::pb::Encoding::X_HUFFMAN_ZSTD); - encoding.set_huffman_dictionary_size(huff_dict_size); - encoding.set_huffman_block_size(huff_block_size); - } else if (ltype == mgard_x::lossless_type::CPU_Lossless) { - encoding.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD); - } - } - - { // Device - mgard::pb::Device &device = *header.mutable_device(); - if (ptype == processor_type::X_SERIAL) { - device.set_backend(mgard::pb::Device::X_SERIAL); - } else if (ptype == processor_type::X_OPENMP) { - device.set_backend(mgard::pb::Device::X_OPENMP); - } else if (ptype == processor_type::X_CUDA) { - device.set_backend(mgard::pb::Device::X_CUDA); - } else if (ptype == processor_type::X_HIP) { - device.set_backend(mgard::pb::Device::X_HIP); - } else if (ptype == processor_type::X_SYCL) { - device.set_backend(mgard::pb::Device::X_SYCL); - } - } - - // Serialize protobuf - std::vector header_bytes = SerializeProtoBuf(header); - uint64_t header_size = header_bytes.size(); - uint32_t header_crc32 = - ComputeCRC32(header_bytes.data(), header_bytes.size()); - - total_size = 0; - total_size += mgard::SIGNATURE.size(); - total_size += sizeof(uint64_t); // header size - total_size += sizeof(uint32_t); // crc32 size - total_size += header_size; // header size - - metadata_size = total_size; - - // start serializing - SERIALIZED_TYPE - *serialized_data; // = (SERIALIZED_TYPE *)std::malloc(total_size); - Mem::Malloc1D(serialized_data, total_size, 0); + std::vector data_h = MetadataBase::Serialize(); + total_size = data_h.size(); + SERIALIZED_TYPE *data_d; + Mem::Malloc1D(data_d, total_size, 0); + Mem::Copy1D(data_d, data_h.data(), total_size, 0); DeviceRuntime::SyncQueue(0); - SERIALIZED_TYPE *p = serialized_data; - - SerializeSignature(p); - Serialize(header_size, p); - Serialize(header_crc32, p); - SerializeBytes(header_bytes, p); - - return serialized_data; - } - - void DeserializeAllWithProtobuf(SERIALIZED_TYPE *serialized_data) { - SERIALIZED_TYPE *p = serialized_data; - uint64_t header_size = 0; - uint32_t header_crc32 = 0; - DeserializeSignature(p); - Deserialize(header_size, p); - Deserialize(header_crc32, p); - - if (header_crc32 != ComputeCRC32(p, header_size)) { - std::cout << log::log_err << "header CRC32 mismatch.\n"; - exit(-1); - } - - metadata_size = 0; - metadata_size += mgard::SIGNATURE.size(); - metadata_size += sizeof(uint64_t); // header size - metadata_size += sizeof(uint32_t); // crc32 size - metadata_size += header_size; // header size - - mgard::pb::Header header = DeserializeProtoBuf(p, header_size); - - { // Version Number - const mgard::pb::VersionNumber mgard_version_number = - header.mgard_version(); - software_version[0] = mgard_version_number.major_(); - software_version[1] = mgard_version_number.minor_(); - software_version[2] = mgard_version_number.patch_(); - if (software_version[0] > MGARD_VERSION_MAJOR) { - std::cout << log::log_err << "MGARD version mismatch.\n"; - exit(-1); - } - - const mgard::pb::VersionNumber format_version_number = - header.file_format_version(); - file_version[0] = format_version_number.major_(); - file_version[1] = format_version_number.minor_(); - file_version[2] = format_version_number.patch_(); - if (file_version[0] > MGARD_FILE_VERSION_MAJOR) { - std::cout << log::log_err << "MGARD file format version mismatch.\n"; - exit(-1); - } - } - - { // Domain - const mgard::pb::Domain &domain = header.domain(); - const mgard::pb::CartesianGridTopology cartesian_grid_topology = - domain.cartesian_grid_topology(); - total_dims = cartesian_grid_topology.dimension(); - const google::protobuf::RepeatedField shape_ = - cartesian_grid_topology.shape(); - if (total_dims != shape_.size()) { - std::cout << log::log_err - << "grid shape does not match given dimension.\n"; - exit(-1); - } - shape = std::vector(total_dims); - std::copy(shape_.begin(), shape_.end(), shape.begin()); - - const mgard::pb::Domain::Geometry geometry = domain.geometry(); - if (geometry == mgard::pb::Domain::UNIT_CUBE) { - dstype = data_structure_type::Cartesian_Grid_Uniform; - } else if (geometry == mgard::pb::Domain::EXPLICIT_CUBE) { - dstype = data_structure_type::Cartesian_Grid_Non_Uniform; - const mgard::pb::ExplicitCubeGeometry explicit_cube_geometry = - domain.explicit_cube_geometry(); - const google::protobuf::RepeatedField coordinates = - explicit_cube_geometry.coordinates(); - uint64_t totel_len = 0; - for (DIM d = 0; d < total_dims; d++) - totel_len += shape[d]; - if (totel_len != coordinates.size()) { - std::cout << log::log_err - << "mismatch between number of node coordinates and grid " - "shape.\n"; - exit(-1); - } - using It = google::protobuf::RepeatedField::const_iterator; - It p = coordinates.begin(); - coords = std::vector>(total_dims); - for (size_t d = 0; d < total_dims; d++) { - const It q = p + shape[d]; - coords[d] = std::vector(shape[d]); - std::copy(p, q, coords[d].begin()); - p = q; - } - assert(p == coordinates.end()); - } - } - - { // Dataset - const mgard::pb::Dataset dataset = header.dataset(); - if (dataset.type() == mgard::pb::Dataset::FLOAT) { - dtype = data_type::Float; - } else if (dataset.type() == mgard::pb::Dataset::DOUBLE) { - dtype = data_type::Double; - } - assert(dataset.dimension() == 1); - } - - { // Error control - const mgard::pb::ErrorControl error = header.error_control(); - if (error.mode() == mgard::pb::ErrorControl::ABSOLUTE) { - ebtype = error_bound_type::ABS; - } else if (error.mode() == mgard::pb::ErrorControl::RELATIVE) { - ebtype = error_bound_type::REL; - norm = error.norm_of_original_data(); - } - - if (error.norm() == mgard::pb::ErrorControl::L_INFINITY) { - ntype = norm_type::L_Inf; - s = std::numeric_limits::infinity(); - } else if (error.norm() == mgard::pb::ErrorControl::S_NORM) { - ntype = norm_type::L_2; - s = error.s(); - } - tol = error.tolerance(); - } - - { // Domain Decomposition - const mgard::pb::DomainDecomposition domainDecomposition = - header.domain_decomposition(); - if (domainDecomposition.method() != - mgard::pb::DomainDecomposition::NOOP_METHOD) { - domain_decomposed = true; - if (domainDecomposition.method() == - mgard::pb::DomainDecomposition::MAX_DIMENSION) { - ddtype = domain_decomposition_type::MaxDim; - } else if (domainDecomposition.method() == - mgard::pb::DomainDecomposition::BLOCK) { - ddtype = domain_decomposition_type::Block; - } else if (domainDecomposition.method() == - mgard::pb::DomainDecomposition::VARIABLE) { - ddtype = domain_decomposition_type::Variable; - } - - domain_decomposed_dim = domainDecomposition.decomposition_dimension(); - domain_decomposed_size = domainDecomposition.decomposition_size(); - } else { - domain_decomposed = false; - } - } - - { // Function Decomposition - const mgard::pb::FunctionDecomposition function_decomposition = - header.function_decomposition(); - assert(function_decomposition.transform() == - mgard::pb::FunctionDecomposition::MULTILEVEL_COEFFICIENTS); - if (function_decomposition.hierarchy() == - mgard::pb::FunctionDecomposition::MULTIDIMENSION_WITH_GHOST_NODES) { - decomposition = decomposition_type::MultiDim; - } else if (function_decomposition.hierarchy() == - mgard::pb::FunctionDecomposition:: - ONE_DIM_AT_A_TIME_WITH_GHOST_NODES) { - decomposition = decomposition_type::SingleDim; - } else if (function_decomposition.hierarchy() == - mgard::pb::FunctionDecomposition::HYBRID_HIERARCHY) { - decomposition = decomposition_type::Hybrid; - } else { - std::cout << log::log_err - << "this decomposition hierarchy mismatch the hierarchy used " - "in MGARD-X.\n"; - exit(-1); - } - l_target = function_decomposition.l_target(); - } - - { // Quantization - const mgard::pb::Quantization quantization = header.quantization(); - if (quantization.method() != mgard::pb::Quantization::NOOP_QUANTIZATION) { - assert(quantization.bin_widths() == - mgard::pb::Quantization::PER_COEFFICIENT); - assert(quantization.type() == mgard::pb::Quantization::INT64_T); - assert(quantization.big_endian() == big_endian()); - if (big_endian()) { - etype = endiness_type::Big_Endian; - } else { - etype = endiness_type::Little_Endian; - } - } - } - - { // MDR - const mgard::pb::BitplaneEncoding bitplane_encoding = - header.bitplane_encoding(); - if (bitplane_encoding.method() != - mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING) { - number_bitplanes = bitplane_encoding.number_bitplanes(); - assert(bitplane_encoding.big_endian() == big_endian()); - } - } - - { - const mgard::pb::Quantization quantization = header.quantization(); - const mgard::pb::BitplaneEncoding bitplane_encoding = - header.bitplane_encoding(); - if (quantization.method() != mgard::pb::Quantization::NOOP_QUANTIZATION && - bitplane_encoding.method() != - mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING || - quantization.method() == mgard::pb::Quantization::NOOP_QUANTIZATION && - bitplane_encoding.method() == - mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING) { - std::cout << log::log_err - << "cannot determine whether this is compressed or " - "refactored data.\n"; - exit(-1); - } - } - - if (otype == operation_type::MDR) { // MDR - mgard::pb::BitplaneEncoding &bitplane_encoding = - *header.mutable_bitplane_encoding(); - bitplane_encoding.set_method( - mgard::pb::BitplaneEncoding::GROUPED_BITPLANE_ENCODING); - bitplane_encoding.set_type(mgard::pb::BitplaneEncoding::INT32_T); - bitplane_encoding.set_number_bitplanes(number_bitplanes); - bitplane_encoding.set_big_endian(big_endian()); - } - - { // Encoding - const mgard::pb::Encoding encoding = header.encoding(); - if (encoding.preprocessor() == mgard::pb::Encoding::SHUFFLE) { - reorder = 1; - } else { - reorder = 0; - } - if (encoding.compressor() == mgard::pb::Encoding::X_HUFFMAN) { - ltype = mgard_x::lossless_type::Huffman; - huff_dict_size = encoding.huffman_dictionary_size(); - huff_block_size = encoding.huffman_block_size(); - } else if (encoding.compressor() == mgard::pb::Encoding::X_HUFFMAN_LZ4) { - ltype = mgard_x::lossless_type::Huffman_LZ4; - huff_dict_size = encoding.huffman_dictionary_size(); - huff_block_size = encoding.huffman_block_size(); - } else if (encoding.compressor() == mgard::pb::Encoding::X_HUFFMAN_ZSTD) { - ltype = mgard_x::lossless_type::Huffman_Zstd; - huff_dict_size = encoding.huffman_dictionary_size(); - huff_block_size = encoding.huffman_block_size(); - } else if (encoding.compressor() == - mgard::pb::Encoding::CPU_HUFFMAN_ZSTD) { - ltype = mgard_x::lossless_type::CPU_Lossless; - } else { - std::cout << log::log_err << "unknown lossless compressor type.\n"; - exit(-1); - } - } - - { // Device - const mgard::pb::Device device = header.device(); - if (device.backend() == mgard::pb::Device::X_SERIAL) { - ptype = processor_type::X_SERIAL; - } else if (device.backend() == mgard::pb::Device::X_OPENMP) { - ptype = processor_type::X_OPENMP; - } else if (device.backend() == mgard::pb::Device::X_CUDA) { - ptype = processor_type::X_CUDA; - } else if (device.backend() == mgard::pb::Device::X_HIP) { - ptype = processor_type::X_HIP; - } else if (device.backend() == mgard::pb::Device::X_SYCL) { - ptype = processor_type::X_SYCL; - } else if (device.backend() == mgard::pb::Device::CPU) { - std::cout << log::log_err - << "this data was not compressed with MGARD-X.\n"; - exit(-1); - } - } - } - - template void Serialize(T &item, SERIALIZED_TYPE *&p) { - if constexpr (std::is_integral::value) { - T in = item; - for (int i = 0; i < sizeof(T); i++) { - // *(p + i) = in; - Mem::Copy1D(p + i, (SERIALIZED_TYPE *)&in, 1, 0); - DeviceRuntime::SyncQueue(0); - in = in >> 8; - } - } else { - // std::memcpy(p, &item, sizeof(item)); - Mem::Copy1D((T *)p, &item, 1, 0); - DeviceRuntime::SyncQueue(0); - } - p += sizeof(item); - } - - template void Deserialize(T &item, SERIALIZED_TYPE *&p) { - if constexpr (std::is_integral::value) { - T out = 0; - for (int i = sizeof(T) - 1; i >= 0; i--) { - out = out << 8; - // out = out + *(p + i); - SERIALIZED_TYPE t; - Mem::Copy1D(&t, p + i, 1, 0); - DeviceRuntime::SyncQueue(0); - out = out + t; - } - item = out; - } else { - // std::memcpy(&item, p, sizeof(item)); - Mem::Copy1D(&item, (T *)p, 1, 0); - DeviceRuntime::SyncQueue(0); - } - p += sizeof(item); - } - - void SerializeSignature(SERIALIZED_TYPE *&p) { - for (char c : mgard::SIGNATURE) { - Serialize(c, p); - } + return data_d; } - - // This function does not assign to a signature data member. Instead, it just - // checks that the deserialized signature matches `mgard::SIGNATURE`. - void DeserializeSignature(SERIALIZED_TYPE *&p) { - for (const char c : mgard::SIGNATURE) { - char c_; - Deserialize(c_, p); - if (c_ != c) { - std::cout << log::log_err << "signature mismatch.\n"; - exit(-1); - } - } - } - - void SerializeShape(std::vector &shape, SERIALIZED_TYPE *&p) { - for (size_t d = 0; d < shape.size(); d++) { - Serialize(shape[d], p); - } - } - - void DeserializeShape(std::vector &shape, SERIALIZED_TYPE *&p) { - shape = std::vector(total_dims); - for (size_t d = 0; d < shape.size(); d++) { - Deserialize(shape[d], p); - } - } - - void SerializeCoords(std::vector> &coords, - SERIALIZED_TYPE *&p) { - for (size_t d = 0; d < coords.size(); d++) { - for (size_t i = 0; i < shape[d]; i++) { - Serialize(coords[d][i], p); - } - } - } - - void DeserializeCoords(std::vector> &coords, - SERIALIZED_TYPE *&p) { - coords = std::vector>(total_dims); - for (size_t d = 0; d < total_dims; d++) { - coords[d] = std::vector(shape[d]); - for (size_t i = 0; i < shape[d]; i++) { - Deserialize(coords[d][i], p); - } - } - } - - void SerializeBytes(std::vector data, SERIALIZED_TYPE *&p) { - for (size_t i = 0; i < data.size(); i++) { - Serialize(data[i], p); - } - } - - void DeserializeBytes(std::vector &data, size_t size, - SERIALIZED_TYPE *&p) { - data = std::vector(size); - for (size_t i = 0; i < data.size(); i++) { - Deserialize(data[i], p); - } - } - - uint32_t ComputeCRC32(SERIALIZED_TYPE *data, size_t size) { - // `crc32_z` takes a `z_size_t`. - if (size > std::numeric_limits::max()) { - std::cout << log::log_err - << "buffer is too large (size would overflow.\n"; - } - SERIALIZED_TYPE *data_h = (SERIALIZED_TYPE *)malloc(size); - Mem::Copy1D(data_h, data, size, 0); + void Deserialize(const SERIALIZED_TYPE *serialized_data) { + // Do a partial deserialize to get the size of the buffer. + std::vector data_h(MetadataBase::SerializePreambleSize()); + Mem::Copy1D(data_h.data(), serialized_data, data_h.size(), 0); DeviceRuntime::SyncQueue(0); - uLong crc32_ = crc32_z(0, Z_NULL, 0); - crc32_ = crc32_z(crc32_, static_cast(data_h), size); - free(data_h); - return crc32_; - } - - std::vector SerializeProtoBuf(mgard::pb::Header &header) { - size_t header_size = header.ByteSize(); - std::vector header_bytes(header_size); - header.SerializeToArray(header_bytes.data(), header_size); - return header_bytes; - } + auto data_h_iter = data_h.cbegin(); + std::uint64_t total_size = MetadataBase::DeserializeSize(data_h_iter); - mgard::pb::Header DeserializeProtoBuf(SERIALIZED_TYPE *header_bytes, - uint64_t header_size) { - // The `CodedInputStream` constructor takes an `int`. - if (header_size > std::numeric_limits::max()) { - std::cout << log::log_err - << "header is too large (size would overflow).\n"; - } - SERIALIZED_TYPE *header_bytes_h = (SERIALIZED_TYPE *)malloc(header_size); - Mem::Copy1D(header_bytes_h, header_bytes, header_size, 0); + data_h.resize(total_size); + Mem::Copy1D(data_h.data(), serialized_data, total_size, 0); DeviceRuntime::SyncQueue(0); - mgard::pb::Header header; - google::protobuf::io::CodedInputStream stream( - static_cast(header_bytes_h), - header_size); - if (not header.ParseFromCodedStream(&stream)) { - throw std::runtime_error( - "header parsing encountered read or format error"); - } - if (not stream.ConsumedEntireMessage()) { - throw std::runtime_error("part of header left unparsed"); - } - free(header_bytes_h); - return header; - } - - template bool big_endian() { - static_assert(std::is_integral::value, - "can only check endianness of integral types"); - const Int n = 1; - return not *reinterpret_cast(&n); + MetadataBase::Deserialize(data_h); } }; diff --git a/src/mgard-x/CMakeLists.txt b/src/mgard-x/CMakeLists.txt index c3a8df5359..488d5501e4 100644 --- a/src/mgard-x/CMakeLists.txt +++ b/src/mgard-x/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory (CompressionHighLevel) if (MGARD_ENABLE_MDR) add_subdirectory (MDRHighLevel) endif() +add_subdirectory (Metadata) add_subdirectory (RuntimeX) set(MGARD_X_SERIAL_SRC ${MGARD_X_SERIAL_SRC} PARENT_SCOPE) set(MGARD_X_OPENMP_SRC ${MGARD_X_OPENMP_SRC} PARENT_SCOPE) diff --git a/src/mgard-x/Metadata/CMakeLists.txt b/src/mgard-x/Metadata/CMakeLists.txt new file mode 100644 index 0000000000..1ef19c88c2 --- /dev/null +++ b/src/mgard-x/Metadata/CMakeLists.txt @@ -0,0 +1,4 @@ +list(APPEND MGARD_X_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/Metadata.cpp) + +set(MGARD_X_SRC ${MGARD_X_SRC} PARENT_SCOPE) diff --git a/src/mgard-x/Metadata/Metadata.cpp b/src/mgard-x/Metadata/Metadata.cpp new file mode 100644 index 0000000000..4ffaae3de0 --- /dev/null +++ b/src/mgard-x/Metadata/Metadata.cpp @@ -0,0 +1,739 @@ +/* + * Copyright 2026, Oak Ridge National Laboratory. + * MGARD-X: MultiGrid Adaptive Reduction of Data Portable across GPUs and CPUs + * Author: Kenneth Moreland (morelandkd@ornl.gov) + * Date: January 5, 2026 + */ + +#include "mgard-x/Metadata/Metadata.hpp" +#include "format.hpp" +#include "proto/mgard.pb.h" + +#include + +using mgard_x::SERIALIZED_TYPE; + +using SerializedIter = std::vector::const_iterator; + +namespace { + +template bool big_endian() { + static_assert(std::is_integral::value, + "can only check endianness of integral types"); + const Int n = 1; + return not *reinterpret_cast(&n); +} + +uint32_t ComputeCRC32(const std::vector &data, + std::size_t start = 0) { + // `crc32_z` takes a `z_size_t`. + if (data.size() - start > std::numeric_limits::max()) { + std::cout << mgard_x::log::log_err + << "buffer is too large (size would overflow.\n"; + } + uLong crc32_ = crc32_z(0, Z_NULL, 0); + crc32_ = crc32_z(crc32_, static_cast(data.data() + start), + data.size() - start); + return crc32_; +} + +std::vector SerializeProtoBuf(mgard::pb::Header &header) { + size_t header_size = header.ByteSize(); + std::vector header_bytes(header_size); + header.SerializeToArray(header_bytes.data(), header_size); + return header_bytes; +} + +mgard::pb::Header +DeserializeProtoBuf(const std::vector header_bytes, + uint64_t offset) { + uint64_t header_size = header_bytes.size() - offset; + + // The `CodedInputStream` constructor takes an `int`. + if (header_size > std::numeric_limits::max()) { + std::cout << mgard_x::log::log_err + << "header is too large (size would overflow).\n"; + } + mgard::pb::Header header; + google::protobuf::io::CodedInputStream stream( + static_cast(header_bytes.data() + + offset), + header_size); + if (not header.ParseFromCodedStream(&stream)) { + throw std::runtime_error("header parsing encountered read or format error"); + } + if (not stream.ConsumedEntireMessage()) { + throw std::runtime_error("part of header left unparsed"); + } + return header; +} + +template +void Serialize(const T &item, std::vector &vec) { + if constexpr (std::is_integral::value) { + T in = item; + for (int i = 0; i < sizeof(T); i++) { + vec.push_back(in); + in = in >> 8; + } + } else { + const SERIALIZED_TYPE *in = + reinterpret_cast(&item); + vec.insert(vec.end(), in, in + sizeof(item)); + } +} + +template void Deserialize(T &item, SerializedIter &iter) { + if constexpr (std::is_integral::value) { + T out = 0; + for (int i = sizeof(T) - 1; i >= 0; i--) { + out = out << 8; + out = out + *(iter + i); + } + item = out; + } else { + SERIALIZED_TYPE *out = reinterpret_cast(&item); + std::copy(iter, iter + sizeof(T), out); + } + iter += sizeof(T); +} + +void SerializeSignature(std::vector &vec) { + for (char c : mgard::SIGNATURE) { + Serialize(c, vec); + } +} + +// This function does not assign to a signature data member. Instead, it just +// checks that the deserialized signature matches `mgard::SIGNATURE`. +void DeserializeSignature(SerializedIter &iter) { + for (const char c : mgard::SIGNATURE) { + char c_; + Deserialize(c_, iter); + if (c_ != c) { + std::cout << mgard_x::log::log_err << "signature mismatch.\n"; + exit(-1); + } + } +} + +void SerializeBytes(const std::vector &data, + std::vector &vec) { + vec.insert(vec.end(), data.begin(), data.end()); +} + +} // anonymous namespace + +namespace mgard_x { + +void MetadataBase::InitializeConfig(Config &config) { + config.domain_decomposition = ddtype; + config.decomposition = decomposition; + config.lossless = ltype; + config.huff_dict_size = huff_dict_size; + config.huff_block_size = huff_block_size; + config.reorder = reorder; +} + +void MetadataBase::PrintSummary() { + std::cout << "=======Metadata Summary=======\n"; + std::cout << "Signature: "; + for (const char c : mgard::SIGNATURE) + std::cout << c; + std::cout << "\n"; + std::cout << "MGARD version: " << (int)software_version[0] << "." + << (int)software_version[1] << "." << (int)software_version[2] + << "\n"; + std::cout << "File format version: " << (int)file_version[0] << "." + << (int)file_version[1] << "." << (int)file_version[2] << "\n"; + std::cout << "Metadata size: " << metadata_size << "\n"; + std::cout << "Metadata crc32: " << metadata_crc32 << "\n"; + std::cout << "Endiness: "; + if (etype == endiness_type::Big_Endian) { + std::cout << "Big Endian\n"; + } else { + std::cout << "Little Endian\n"; + } + std::cout << "Data type: "; + if (dtype == data_type::Float) { + std::cout << "Float\n"; + } else if (dtype == data_type::Double) { + std::cout << "Double\n"; + } + std::cout << "Topology: "; + if (dstype == data_structure_type::Cartesian_Grid_Uniform) { + std::cout << "Uniform Grid\n"; + } else if (dstype == data_structure_type::Cartesian_Grid_Non_Uniform) { + std::cout << "Non-uniform Grid\n"; + } + std::cout << "Shape: "; + for (uint64_t &c : shape) + std::cout << c << " "; + std::cout << "\n"; + std::cout << "Function Decomposition: "; + if (decomposition == decomposition_type::MultiDim) { + std::cout << "MultiDim\n"; + } else if (decomposition == decomposition_type::SingleDim) { + std::cout << "SingleDim\n"; + } + std::cout << "Reorder: " << reorder << "\n"; + std::cout << "Domain Decomposition: "; + if (domain_decomposed) { + if (ddtype == domain_decomposition_type::MaxDim) { + std::cout << "MaxDim\n"; + } else if (ddtype == domain_decomposition_type::Variable) { + std::cout << "Variable\n"; + } else { + std::cout << "Block\n"; + } + std::cout << "Decomposed Dim: " << domain_decomposed_dim << "\n"; + std::cout << "Decomposed Size: " << domain_decomposed_size << "\n"; + } else { + std::cout << "No\n"; + } + std::cout << "Error bound mode: "; + if (ebtype == error_bound_type::REL) { + std::cout << "REL\n"; + } else if (ebtype == error_bound_type::ABS) { + std::cout << "ABS\n"; + } + std::cout << "Norm type: "; + if (ntype == norm_type::L_Inf) { + std::cout << "L_Inf\n"; + } else if (ntype == norm_type::L_2) { + std::cout << "L_2\n"; + } + std::cout << "Norm: " << norm << "\n"; + std::cout << "tol: " << tol << "\n"; + std::cout << "s: " << s << "\n"; + + std::cout << "Lossless: "; + if (ltype == mgard_x::lossless_type::Huffman) { + std::cout << "Huffman\n"; + std::cout << "Huffman dictionary size: " << huff_dict_size << "\n"; + std::cout << "Huffman block size: " << huff_block_size << "\n"; + } else if (ltype == mgard_x::lossless_type::Huffman_LZ4) { + std::cout << "Huffman_LZ4\n"; + std::cout << "Huffman dictionary size: " << huff_dict_size << "\n"; + std::cout << "Huffman block size: " << huff_block_size << "\n"; + } else if (ltype == mgard_x::lossless_type::Huffman_Zstd) { + std::cout << "Huffman_Zstd\n"; + std::cout << "Huffman dictionary size: " << huff_dict_size << "\n"; + std::cout << "Huffman block size: " << huff_block_size << "\n"; + } else if (ltype == mgard_x::lossless_type::CPU_Lossless) { + std::cout << "CPU_Lossless\n"; + } + + std::cout << "Backend: "; + if (ptype == processor_type::X_SERIAL) { + std::cout << "X_SERIAL\n"; + } else if (ptype == processor_type::X_CUDA) { + std::cout << "X_OPENMP\n"; + } else if (ptype == processor_type::X_OPENMP) { + std::cout << "X_CUDA\n"; + } else if (ptype == processor_type::X_HIP) { + std::cout << "X_HIP\n"; + } else if (ptype == processor_type::X_SYCL) { + std::cout << "X_SYCL\n"; + } +} + +std::uint64_t MetadataBase::SerializePreambleSize() { + std::uint64_t preamble_size = 0; + preamble_size += mgard::SIGNATURE.size(); + preamble_size += sizeof(uint64_t); // header size + preamble_size += sizeof(uint32_t); // crc32 size + return preamble_size; +} + +std::vector MetadataBase::Serialize() { + mgard::pb::Header header; + + { // Version Number + software_version[0] = MGARD_VERSION_MAJOR; + software_version[1] = MGARD_VERSION_MINOR; + software_version[2] = MGARD_VERSION_PATCH; + + mgard::pb::VersionNumber &mgard_version_number = + *header.mutable_mgard_version(); + mgard_version_number.set_major_(MGARD_VERSION_MAJOR); + mgard_version_number.set_minor_(MGARD_VERSION_MINOR); + mgard_version_number.set_patch_(MGARD_VERSION_PATCH); + + file_version[0] = MGARD_FILE_VERSION_MAJOR; + file_version[1] = MGARD_FILE_VERSION_MINOR; + file_version[2] = MGARD_FILE_VERSION_PATCH; + + mgard::pb::VersionNumber &format_version_number = + *header.mutable_file_format_version(); + mgard_version_number.set_major_(MGARD_FILE_VERSION_MAJOR); + mgard_version_number.set_minor_(MGARD_FILE_VERSION_MINOR); + mgard_version_number.set_patch_(MGARD_FILE_VERSION_PATCH); + } + + { // Domain + mgard::pb::Domain &domain = *header.mutable_domain(); + domain.set_topology(mgard::pb::Domain::CARTESIAN_GRID); + mgard::pb::CartesianGridTopology &cartesian_grid_topology = + *domain.mutable_cartesian_grid_topology(); + cartesian_grid_topology.set_dimension(total_dims); + google::protobuf::RepeatedField &shape_ = + *cartesian_grid_topology.mutable_shape(); + shape_.Resize(total_dims, 0); + std::copy(shape.begin(), shape.end(), shape_.mutable_data()); + mgard::pb::Domain::Geometry geometry; + if (dstype == data_structure_type::Cartesian_Grid_Uniform) { + geometry = mgard::pb::Domain::UNIT_CUBE; + } else { + geometry = mgard::pb::Domain::EXPLICIT_CUBE; + mgard::pb::ExplicitCubeGeometry &explicit_cube_geometry = + *domain.mutable_explicit_cube_geometry(); + google::protobuf::RepeatedField &coordinates_ = + *explicit_cube_geometry.mutable_coordinates(); + + uint64_t totel_len = 0; + for (DIM d = 0; d < total_dims; d++) + totel_len += shape[d]; + coordinates_.Resize(totel_len, 0); + double *p = coordinates_.mutable_data(); + for (DIM d = 0; d < total_dims; d++) { + std::copy(coords[d].begin(), coords[d].end(), p); + p += shape[d]; + } + } + domain.set_geometry(geometry); + } + + { // Dataset + mgard::pb::Dataset &dataset = *header.mutable_dataset(); + if (dtype == data_type::Float) { + dataset.set_type(mgard::pb::Dataset::FLOAT); + } else if (dtype == data_type::Double) { + dataset.set_type(mgard::pb::Dataset::DOUBLE); + } + dataset.set_dimension(1); + } + + { // Error control + mgard::pb::ErrorControl &error = *header.mutable_error_control(); + if (ebtype == error_bound_type::ABS) { + error.set_mode(mgard::pb::ErrorControl::ABSOLUTE); + } else if (ebtype == error_bound_type::REL) { + error.set_mode(mgard::pb::ErrorControl::RELATIVE); + error.set_norm_of_original_data(norm); + } + if (ntype == norm_type::L_Inf) { + error.set_norm(mgard::pb::ErrorControl::L_INFINITY); + error.set_s(s); + } else { + error.set_norm(mgard::pb::ErrorControl::S_NORM); + error.set_s(s); + } + error.set_tolerance(tol); + } + + { // Domain Decomposition + mgard::pb::DomainDecomposition &domainDecomposition = + *header.mutable_domain_decomposition(); + if (domain_decomposed) { + if (ddtype == domain_decomposition_type::MaxDim) { + domainDecomposition.set_method( + mgard::pb::DomainDecomposition::MAX_DIMENSION); + } else if (ddtype == domain_decomposition_type::Block) { + domainDecomposition.set_method(mgard::pb::DomainDecomposition::BLOCK); + } else if (ddtype == domain_decomposition_type::Variable) { + domainDecomposition.set_method( + mgard::pb::DomainDecomposition::VARIABLE); + } + } else { + domainDecomposition.set_method( + mgard::pb::DomainDecomposition::NOOP_METHOD); + } + domainDecomposition.set_decomposition_dimension(domain_decomposed_dim); + domainDecomposition.set_decomposition_size(domain_decomposed_size); + } + + { // Function Decomposition + mgard::pb::FunctionDecomposition &function_decomposition = + *header.mutable_function_decomposition(); + function_decomposition.set_transform( + mgard::pb::FunctionDecomposition::MULTILEVEL_COEFFICIENTS); + if (decomposition == decomposition_type::MultiDim) { + function_decomposition.set_hierarchy( + mgard::pb::FunctionDecomposition::MULTIDIMENSION_WITH_GHOST_NODES); + } else if (decomposition == decomposition_type::SingleDim) { + function_decomposition.set_hierarchy( + mgard::pb::FunctionDecomposition::ONE_DIM_AT_A_TIME_WITH_GHOST_NODES); + } else if (decomposition == decomposition_type::Hybrid) { + function_decomposition.set_hierarchy( + mgard::pb::FunctionDecomposition::HYBRID_HIERARCHY); + } + function_decomposition.set_l_target(l_target); + } + + { // Quantization + mgard::pb::Quantization &quantization = *header.mutable_quantization(); + if (otype == operation_type::Compression) { + quantization.set_method(mgard::pb::Quantization::COEFFICIENTWISE_LINEAR); + quantization.set_bin_widths(mgard::pb::Quantization::PER_COEFFICIENT); + quantization.set_type(mgard::pb::Quantization::INT64_T); + quantization.set_big_endian(big_endian()); + if (big_endian()) { + etype = endiness_type::Big_Endian; + } else { + etype = endiness_type::Little_Endian; + } + } else { + quantization.set_method(mgard::pb::Quantization::NOOP_QUANTIZATION); + } + } + + { // MDR + mgard::pb::BitplaneEncoding &bitplane_encoding = + *header.mutable_bitplane_encoding(); + if (otype == operation_type::MDR) { + bitplane_encoding.set_method( + mgard::pb::BitplaneEncoding::GROUPED_BITPLANE_ENCODING); + bitplane_encoding.set_type(mgard::pb::BitplaneEncoding::INT32_T); + bitplane_encoding.set_number_bitplanes(number_bitplanes); + bitplane_encoding.set_big_endian(big_endian()); + } else { + bitplane_encoding.set_method( + mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING); + } + } + + { // Encoding + mgard::pb::Encoding &encoding = *header.mutable_encoding(); + if (reorder == 0) { + encoding.set_preprocessor(mgard::pb::Encoding::NOOP_PREPROCESSOR); + } else { + encoding.set_preprocessor(mgard::pb::Encoding::SHUFFLE); + } + if (ltype == mgard_x::lossless_type::Huffman) { + encoding.set_compressor(mgard::pb::Encoding::X_HUFFMAN); + encoding.set_huffman_dictionary_size(huff_dict_size); + encoding.set_huffman_block_size(huff_block_size); + } else if (ltype == mgard_x::lossless_type::Huffman_LZ4) { + encoding.set_compressor(mgard::pb::Encoding::X_HUFFMAN_LZ4); + encoding.set_huffman_dictionary_size(huff_dict_size); + encoding.set_huffman_block_size(huff_block_size); + } else if (ltype == mgard_x::lossless_type::Huffman_Zstd) { + encoding.set_compressor(mgard::pb::Encoding::X_HUFFMAN_ZSTD); + encoding.set_huffman_dictionary_size(huff_dict_size); + encoding.set_huffman_block_size(huff_block_size); + } else if (ltype == mgard_x::lossless_type::CPU_Lossless) { + encoding.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD); + } + } + + { // Device + mgard::pb::Device &device = *header.mutable_device(); + if (ptype == processor_type::X_SERIAL) { + device.set_backend(mgard::pb::Device::X_SERIAL); + } else if (ptype == processor_type::X_OPENMP) { + device.set_backend(mgard::pb::Device::X_OPENMP); + } else if (ptype == processor_type::X_CUDA) { + device.set_backend(mgard::pb::Device::X_CUDA); + } else if (ptype == processor_type::X_HIP) { + device.set_backend(mgard::pb::Device::X_HIP); + } else if (ptype == processor_type::X_SYCL) { + device.set_backend(mgard::pb::Device::X_SYCL); + } + } + + // Serialize protobuf + std::vector header_bytes = SerializeProtoBuf(header); + uint64_t header_size = header_bytes.size(); + uint32_t header_crc32 = ComputeCRC32(header_bytes); + + metadata_size = SerializePreambleSize() + header_size; + + // start serializing + std::vector serialized_data; + serialized_data.reserve(metadata_size); + + ::SerializeSignature(serialized_data); + ::Serialize(header_size, serialized_data); + ::Serialize(header_crc32, serialized_data); + ::SerializeBytes(header_bytes, serialized_data); + + return serialized_data; +} + +uint64_t MetadataBase::DeserializeSize( + std::vector::const_iterator &iter) { + uint64_t header_size = 0; + ::DeserializeSignature(iter); + ::Deserialize(header_size, iter); + + header_size += SerializePreambleSize(); + + return header_size; +} + +void MetadataBase::Deserialize( + const std::vector &serialized_data) { + metadata_size = serialized_data.size(); + + SerializedIter iter = serialized_data.cbegin(); + DeserializeSize(iter); + + uint32_t header_crc32 = 0; + ::Deserialize(header_crc32, iter); + + // Actual serialized data is after header starting at index offset. + uint64_t offset = std::distance(serialized_data.begin(), iter); + + if (header_crc32 != ComputeCRC32(serialized_data, offset)) { + std::cout << log::log_err << "header CRC32 mismatch.\n"; + exit(-1); + } + + mgard::pb::Header header = DeserializeProtoBuf(serialized_data, offset); + + { // Version Number + const mgard::pb::VersionNumber mgard_version_number = + header.mgard_version(); + software_version[0] = mgard_version_number.major_(); + software_version[1] = mgard_version_number.minor_(); + software_version[2] = mgard_version_number.patch_(); + if (software_version[0] > MGARD_VERSION_MAJOR) { + std::cout << log::log_err << "MGARD version mismatch.\n"; + exit(-1); + } + + const mgard::pb::VersionNumber format_version_number = + header.file_format_version(); + file_version[0] = format_version_number.major_(); + file_version[1] = format_version_number.minor_(); + file_version[2] = format_version_number.patch_(); + if (file_version[0] > MGARD_FILE_VERSION_MAJOR) { + std::cout << log::log_err << "MGARD file format version mismatch.\n"; + exit(-1); + } + } + + { // Domain + const mgard::pb::Domain &domain = header.domain(); + const mgard::pb::CartesianGridTopology cartesian_grid_topology = + domain.cartesian_grid_topology(); + total_dims = cartesian_grid_topology.dimension(); + const google::protobuf::RepeatedField shape_ = + cartesian_grid_topology.shape(); + if (total_dims != shape_.size()) { + std::cout << log::log_err + << "grid shape does not match given dimension.\n"; + exit(-1); + } + shape = std::vector(total_dims); + std::copy(shape_.begin(), shape_.end(), shape.begin()); + + const mgard::pb::Domain::Geometry geometry = domain.geometry(); + if (geometry == mgard::pb::Domain::UNIT_CUBE) { + dstype = data_structure_type::Cartesian_Grid_Uniform; + } else if (geometry == mgard::pb::Domain::EXPLICIT_CUBE) { + dstype = data_structure_type::Cartesian_Grid_Non_Uniform; + const mgard::pb::ExplicitCubeGeometry explicit_cube_geometry = + domain.explicit_cube_geometry(); + const google::protobuf::RepeatedField coordinates = + explicit_cube_geometry.coordinates(); + uint64_t totel_len = 0; + for (DIM d = 0; d < total_dims; d++) + totel_len += shape[d]; + if (totel_len != coordinates.size()) { + std::cout << log::log_err + << "mismatch between number of node coordinates and grid " + "shape.\n"; + exit(-1); + } + using It = google::protobuf::RepeatedField::const_iterator; + It p = coordinates.begin(); + coords = std::vector>(total_dims); + for (size_t d = 0; d < total_dims; d++) { + const It q = p + shape[d]; + coords[d] = std::vector(shape[d]); + std::copy(p, q, coords[d].begin()); + p = q; + } + assert(p == coordinates.end()); + } + } + + { // Dataset + const mgard::pb::Dataset dataset = header.dataset(); + if (dataset.type() == mgard::pb::Dataset::FLOAT) { + dtype = data_type::Float; + } else if (dataset.type() == mgard::pb::Dataset::DOUBLE) { + dtype = data_type::Double; + } + assert(dataset.dimension() == 1); + } + + { // Error control + const mgard::pb::ErrorControl error = header.error_control(); + if (error.mode() == mgard::pb::ErrorControl::ABSOLUTE) { + ebtype = error_bound_type::ABS; + } else if (error.mode() == mgard::pb::ErrorControl::RELATIVE) { + ebtype = error_bound_type::REL; + norm = error.norm_of_original_data(); + } + + if (error.norm() == mgard::pb::ErrorControl::L_INFINITY) { + ntype = norm_type::L_Inf; + s = std::numeric_limits::infinity(); + } else if (error.norm() == mgard::pb::ErrorControl::S_NORM) { + ntype = norm_type::L_2; + s = error.s(); + } + tol = error.tolerance(); + } + + { // Domain Decomposition + const mgard::pb::DomainDecomposition domainDecomposition = + header.domain_decomposition(); + if (domainDecomposition.method() != + mgard::pb::DomainDecomposition::NOOP_METHOD) { + domain_decomposed = true; + if (domainDecomposition.method() == + mgard::pb::DomainDecomposition::MAX_DIMENSION) { + ddtype = domain_decomposition_type::MaxDim; + } else if (domainDecomposition.method() == + mgard::pb::DomainDecomposition::BLOCK) { + ddtype = domain_decomposition_type::Block; + } else if (domainDecomposition.method() == + mgard::pb::DomainDecomposition::VARIABLE) { + ddtype = domain_decomposition_type::Variable; + } + + domain_decomposed_dim = domainDecomposition.decomposition_dimension(); + domain_decomposed_size = domainDecomposition.decomposition_size(); + } else { + domain_decomposed = false; + } + } + + { // Function Decomposition + const mgard::pb::FunctionDecomposition function_decomposition = + header.function_decomposition(); + assert(function_decomposition.transform() == + mgard::pb::FunctionDecomposition::MULTILEVEL_COEFFICIENTS); + if (function_decomposition.hierarchy() == + mgard::pb::FunctionDecomposition::MULTIDIMENSION_WITH_GHOST_NODES) { + decomposition = decomposition_type::MultiDim; + } else if (function_decomposition.hierarchy() == + mgard::pb::FunctionDecomposition:: + ONE_DIM_AT_A_TIME_WITH_GHOST_NODES) { + decomposition = decomposition_type::SingleDim; + } else if (function_decomposition.hierarchy() == + mgard::pb::FunctionDecomposition::HYBRID_HIERARCHY) { + decomposition = decomposition_type::Hybrid; + } else { + std::cout << log::log_err + << "this decomposition hierarchy mismatch the hierarchy used " + "in MGARD-X.\n"; + exit(-1); + } + l_target = function_decomposition.l_target(); + } + + { // Quantization + const mgard::pb::Quantization quantization = header.quantization(); + if (quantization.method() != mgard::pb::Quantization::NOOP_QUANTIZATION) { + assert(quantization.bin_widths() == + mgard::pb::Quantization::PER_COEFFICIENT); + assert(quantization.type() == mgard::pb::Quantization::INT64_T); + assert(quantization.big_endian() == big_endian()); + if (big_endian()) { + etype = endiness_type::Big_Endian; + } else { + etype = endiness_type::Little_Endian; + } + } + } + + { // MDR + const mgard::pb::BitplaneEncoding bitplane_encoding = + header.bitplane_encoding(); + if (bitplane_encoding.method() != + mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING) { + number_bitplanes = bitplane_encoding.number_bitplanes(); + assert(bitplane_encoding.big_endian() == big_endian()); + } + } + + { + const mgard::pb::Quantization quantization = header.quantization(); + const mgard::pb::BitplaneEncoding bitplane_encoding = + header.bitplane_encoding(); + if (quantization.method() != mgard::pb::Quantization::NOOP_QUANTIZATION && + bitplane_encoding.method() != + mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING || + quantization.method() == mgard::pb::Quantization::NOOP_QUANTIZATION && + bitplane_encoding.method() == + mgard::pb::BitplaneEncoding::NOOP_BITPLANE_ENCODING) { + std::cout << log::log_err + << "cannot determine whether this is compressed or " + "refactored data.\n"; + exit(-1); + } + } + + if (otype == operation_type::MDR) { // MDR + mgard::pb::BitplaneEncoding &bitplane_encoding = + *header.mutable_bitplane_encoding(); + bitplane_encoding.set_method( + mgard::pb::BitplaneEncoding::GROUPED_BITPLANE_ENCODING); + bitplane_encoding.set_type(mgard::pb::BitplaneEncoding::INT32_T); + bitplane_encoding.set_number_bitplanes(number_bitplanes); + bitplane_encoding.set_big_endian(big_endian()); + } + + { // Encoding + const mgard::pb::Encoding encoding = header.encoding(); + if (encoding.preprocessor() == mgard::pb::Encoding::SHUFFLE) { + reorder = 1; + } else { + reorder = 0; + } + if (encoding.compressor() == mgard::pb::Encoding::X_HUFFMAN) { + ltype = mgard_x::lossless_type::Huffman; + huff_dict_size = encoding.huffman_dictionary_size(); + huff_block_size = encoding.huffman_block_size(); + } else if (encoding.compressor() == mgard::pb::Encoding::X_HUFFMAN_LZ4) { + ltype = mgard_x::lossless_type::Huffman_LZ4; + huff_dict_size = encoding.huffman_dictionary_size(); + huff_block_size = encoding.huffman_block_size(); + } else if (encoding.compressor() == mgard::pb::Encoding::X_HUFFMAN_ZSTD) { + ltype = mgard_x::lossless_type::Huffman_Zstd; + huff_dict_size = encoding.huffman_dictionary_size(); + huff_block_size = encoding.huffman_block_size(); + } else if (encoding.compressor() == mgard::pb::Encoding::CPU_HUFFMAN_ZSTD) { + ltype = mgard_x::lossless_type::CPU_Lossless; + } else { + std::cout << log::log_err << "unknown lossless compressor type.\n"; + exit(-1); + } + } + + { // Device + const mgard::pb::Device device = header.device(); + if (device.backend() == mgard::pb::Device::X_SERIAL) { + ptype = processor_type::X_SERIAL; + } else if (device.backend() == mgard::pb::Device::X_OPENMP) { + ptype = processor_type::X_OPENMP; + } else if (device.backend() == mgard::pb::Device::X_CUDA) { + ptype = processor_type::X_CUDA; + } else if (device.backend() == mgard::pb::Device::X_HIP) { + ptype = processor_type::X_HIP; + } else if (device.backend() == mgard::pb::Device::X_SYCL) { + ptype = processor_type::X_SYCL; + } else if (device.backend() == mgard::pb::Device::CPU) { + std::cout << log::log_err + << "this data was not compressed with MGARD-X.\n"; + exit(-1); + } + } +} + +} // namespace mgard_x