-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[df] Fix casting of projected cardinality fields to int #21738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
77edd8c
c5860c1
b561fe8
996699f
3100fea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |||||||||||||||||
| #include <TSystem.h> | ||||||||||||||||||
|
|
||||||||||||||||||
| #include <cassert> | ||||||||||||||||||
| #include <limits> | ||||||||||||||||||
| #include <memory> | ||||||||||||||||||
| #include <mutex> | ||||||||||||||||||
| #include <string> | ||||||||||||||||||
|
|
@@ -49,32 +50,27 @@ | |||||||||||||||||
| // clang-format on | ||||||||||||||||||
|
|
||||||||||||||||||
| namespace ROOT::Internal::RDF { | ||||||||||||||||||
| /// An artificial field that transforms an RNTuple column that contains the offset of collections into | ||||||||||||||||||
| /// collection sizes. It is used to provide the "number of" RDF columns for collections, e.g. | ||||||||||||||||||
| /// `R_rdf_sizeof_jets` for a collection named `jets`. | ||||||||||||||||||
| /// | ||||||||||||||||||
| /// This field owns the collection offset field but instead of exposing the collection offsets it exposes | ||||||||||||||||||
| /// the collection sizes (offset(N+1) - offset(N)). For the time being, we offer this functionality only in RDataFrame. | ||||||||||||||||||
| /// TODO(jblomer): consider providing a general set of useful virtual fields as part of RNTuple. | ||||||||||||||||||
| class RRDFCardinalityField final : public ROOT::RFieldBase { | ||||||||||||||||||
| class RRDFCardinalityFieldBase : public ROOT::RFieldBase { | ||||||||||||||||||
| protected: | ||||||||||||||||||
| std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view newName) const final | ||||||||||||||||||
| { | ||||||||||||||||||
| return std::make_unique<RRDFCardinalityField>(newName); | ||||||||||||||||||
| } | ||||||||||||||||||
| void ConstructValue(void *where) const final { *static_cast<std::size_t *>(where) = 0; } | ||||||||||||||||||
|
|
||||||||||||||||||
| // We construct these fields and know that they match the page source | ||||||||||||||||||
| void ReconcileOnDiskField(const RNTupleDescriptor &) final {} | ||||||||||||||||||
|
|
||||||||||||||||||
| public: | ||||||||||||||||||
| RRDFCardinalityField(std::string_view name) | ||||||||||||||||||
| : ROOT::RFieldBase(name, "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */) | ||||||||||||||||||
| RRDFCardinalityFieldBase(std::string_view name, std::string_view type) | ||||||||||||||||||
| : ROOT::RFieldBase(name, type, ROOT::ENTupleStructure::kPlain, false /* isSimple */) | ||||||||||||||||||
| { | ||||||||||||||||||
| } | ||||||||||||||||||
| RRDFCardinalityField(RRDFCardinalityField &&other) = default; | ||||||||||||||||||
| RRDFCardinalityField &operator=(RRDFCardinalityField &&other) = default; | ||||||||||||||||||
| ~RRDFCardinalityField() override = default; | ||||||||||||||||||
|
|
||||||||||||||||||
| // Field is only used for reading | ||||||||||||||||||
| void GenerateColumns() final { throw RException(R__FAIL("Cardinality fields must only be used for reading")); } | ||||||||||||||||||
| void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final | ||||||||||||||||||
| { | ||||||||||||||||||
| GenerateColumnsImpl<ROOT::Internal::RColumnIndex>(desc); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| public: | ||||||||||||||||||
| RRDFCardinalityFieldBase(RRDFCardinalityFieldBase &&other) = default; | ||||||||||||||||||
| RRDFCardinalityFieldBase &operator=(RRDFCardinalityFieldBase &&other) = default; | ||||||||||||||||||
| ~RRDFCardinalityFieldBase() override = default; | ||||||||||||||||||
|
Comment on lines
+71
to
+73
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For completeness
Suggested change
|
||||||||||||||||||
|
|
||||||||||||||||||
| const RColumnRepresentations &GetColumnRepresentations() const final | ||||||||||||||||||
| { | ||||||||||||||||||
|
|
@@ -85,23 +81,52 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { | |||||||||||||||||
| {}); | ||||||||||||||||||
| return representations; | ||||||||||||||||||
| } | ||||||||||||||||||
| // Field is only used for reading | ||||||||||||||||||
| void GenerateColumns() final { throw RException(R__FAIL("Cardinality fields must only be used for reading")); } | ||||||||||||||||||
| void GenerateColumns(const ROOT::RNTupleDescriptor &desc) final | ||||||||||||||||||
| }; | ||||||||||||||||||
|
|
||||||||||||||||||
| /// An artificial field that transforms an RNTuple column that contains the offset of collections into | ||||||||||||||||||
| /// collection sizes. It is used to provide the "number of" RDF columns for collections, e.g. | ||||||||||||||||||
| /// `R_rdf_sizeof_jets` for a collection named `jets`. | ||||||||||||||||||
| /// | ||||||||||||||||||
| /// This is similar to the RCardinalityField but it presents itself as an integer type. | ||||||||||||||||||
| /// The template argument T must be an integral type. | ||||||||||||||||||
| template <typename T> | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we could already SFINAE-out the non-integral types |
||||||||||||||||||
| class RRDFCardinalityField final : public RRDFCardinalityFieldBase { | ||||||||||||||||||
| inline void CheckSize(ROOT::NTupleSize_t size) const | ||||||||||||||||||
| { | ||||||||||||||||||
| GenerateColumnsImpl<ROOT::Internal::RColumnIndex>(desc); | ||||||||||||||||||
| if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, std::uint64_t>) | ||||||||||||||||||
| return; | ||||||||||||||||||
| if (size > std::numeric_limits<T>::max()) { | ||||||||||||||||||
| throw RException(R__FAIL(std::string("integer overflow in field ") + GetFieldName())); | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| size_t GetValueSize() const final { return sizeof(std::size_t); } | ||||||||||||||||||
| size_t GetAlignment() const final { return alignof(std::size_t); } | ||||||||||||||||||
| protected: | ||||||||||||||||||
| std::unique_ptr<ROOT::RFieldBase> CloneImpl(std::string_view newName) const final | ||||||||||||||||||
| { | ||||||||||||||||||
| return std::make_unique<RRDFCardinalityField>(newName); | ||||||||||||||||||
| } | ||||||||||||||||||
| void ConstructValue(void *where) const final { *static_cast<T *>(where) = 0; } | ||||||||||||||||||
|
|
||||||||||||||||||
| public: | ||||||||||||||||||
| RRDFCardinalityField(std::string_view name) | ||||||||||||||||||
| : RRDFCardinalityFieldBase(name, ROOT::Internal::GetRenormalizedTypeName(typeid(T))) | ||||||||||||||||||
| { | ||||||||||||||||||
| } | ||||||||||||||||||
| RRDFCardinalityField(RRDFCardinalityField &&other) = default; | ||||||||||||||||||
| RRDFCardinalityField &operator=(RRDFCardinalityField &&other) = default; | ||||||||||||||||||
| ~RRDFCardinalityField() override = default; | ||||||||||||||||||
|
Comment on lines
+115
to
+117
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly here
Suggested change
|
||||||||||||||||||
|
|
||||||||||||||||||
| size_t GetValueSize() const final { return sizeof(T); } | ||||||||||||||||||
| size_t GetAlignment() const final { return alignof(T); } | ||||||||||||||||||
|
Comment on lines
+119
to
+120
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
|
|
||||||||||||||||||
| /// Get the number of elements of the collection identified by globalIndex | ||||||||||||||||||
| void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final | ||||||||||||||||||
| { | ||||||||||||||||||
| RNTupleLocalIndex collectionStart; | ||||||||||||||||||
| ROOT::NTupleSize_t size; | ||||||||||||||||||
| fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &size); | ||||||||||||||||||
| *static_cast<std::size_t *>(to) = size; | ||||||||||||||||||
| CheckSize(size); | ||||||||||||||||||
| *static_cast<T *>(to) = size; | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /// Get the number of elements of the collection identified by clusterIndex | ||||||||||||||||||
|
|
@@ -110,7 +135,8 @@ class RRDFCardinalityField final : public ROOT::RFieldBase { | |||||||||||||||||
| RNTupleLocalIndex collectionStart; | ||||||||||||||||||
| ROOT::NTupleSize_t size; | ||||||||||||||||||
| fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &size); | ||||||||||||||||||
| *static_cast<std::size_t *>(to) = size; | ||||||||||||||||||
| CheckSize(size); | ||||||||||||||||||
| *static_cast<T *>(to) = size; | ||||||||||||||||||
| } | ||||||||||||||||||
| }; | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
@@ -144,7 +170,8 @@ class RArraySizeField final : public ROOT::RFieldBase { | |||||||||||||||||
|
|
||||||||||||||||||
| public: | ||||||||||||||||||
| RArraySizeField(std::string_view name, std::size_t arrayLength) | ||||||||||||||||||
| : ROOT::RFieldBase(name, "std::size_t", ROOT::ENTupleStructure::kPlain, false /* isSimple */), | ||||||||||||||||||
| : ROOT::RFieldBase(name, ROOT::Internal::GetRenormalizedTypeName(typeid(std::size_t)), | ||||||||||||||||||
| ROOT::ENTupleStructure::kPlain, false /* isSimple */), | ||||||||||||||||||
| fArrayLength(arrayLength) | ||||||||||||||||||
| { | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
@@ -325,6 +352,18 @@ void ROOT::RDF::RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::st | |||||||||||||||||
| if (!fieldOrException) | ||||||||||||||||||
| return; | ||||||||||||||||||
| auto valueField = fieldOrException.Unwrap(); | ||||||||||||||||||
| if (const auto cardinalityField = dynamic_cast<const ROOT::RCardinalityField *>(valueField.get())) { | ||||||||||||||||||
| // Cardinality fields in RDataFrame are presented as integers | ||||||||||||||||||
| if (cardinalityField->As32Bit()) { | ||||||||||||||||||
| valueField = | ||||||||||||||||||
| std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::uint32_t>>(fieldDesc.GetFieldName()); | ||||||||||||||||||
| } else if (cardinalityField->As64Bit()) { | ||||||||||||||||||
| valueField = | ||||||||||||||||||
| std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::uint64_t>>(fieldDesc.GetFieldName()); | ||||||||||||||||||
| } else { | ||||||||||||||||||
| R__ASSERT(false); | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would a message like the following make sense?
Suggested change
|
||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
| valueField->SetOnDiskId(fieldId); | ||||||||||||||||||
| for (auto &f : *valueField) { | ||||||||||||||||||
| f.SetOnDiskId(desc.FindFieldId(f.GetFieldName(), f.GetParent()->GetOnDiskId())); | ||||||||||||||||||
|
|
@@ -337,7 +376,7 @@ void ROOT::RDF::RNTupleDS::AddField(const ROOT::RNTupleDescriptor &desc, std::st | |||||||||||||||||
| if (info.fNRepetitions > 0) { | ||||||||||||||||||
| cardinalityField = std::make_unique<ROOT::Internal::RDF::RArraySizeField>(name, info.fNRepetitions); | ||||||||||||||||||
| } else { | ||||||||||||||||||
| cardinalityField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField>(name); | ||||||||||||||||||
| cardinalityField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::size_t>>(name); | ||||||||||||||||||
| } | ||||||||||||||||||
| cardinalityField->SetOnDiskId(info.fFieldId); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
@@ -475,7 +514,7 @@ ROOT::RFieldBase *ROOT::RDF::RNTupleDS::GetFieldWithTypeChecks(std::string_view | |||||||||||||||||
| // If the field corresponding to the provided name is not a cardinality column and the requested type is different | ||||||||||||||||||
| // from the proto field that was created when the data source was constructed, we first have to create an | ||||||||||||||||||
| // alternative proto field for the column reader. Otherwise, we can directly use the existing proto field. | ||||||||||||||||||
| if (fieldName.substr(0, 13) != "R_rdf_sizeof_" && requestedType != fColumnTypes[index]) { | ||||||||||||||||||
| if (requestedType != fColumnTypes[index]) { | ||||||||||||||||||
| auto &altProtoFields = fAlternativeProtoFields[index]; | ||||||||||||||||||
|
|
||||||||||||||||||
| // If we can find the requested type in the registered alternative protofields, return the corresponding field | ||||||||||||||||||
|
|
@@ -488,12 +527,41 @@ ROOT::RFieldBase *ROOT::RDF::RNTupleDS::GetFieldWithTypeChecks(std::string_view | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Otherwise, create a new protofield and register it in the alternatives before returning | ||||||||||||||||||
| auto newAltProtoFieldOrException = ROOT::RFieldBase::Create(std::string(fieldName), requestedType); | ||||||||||||||||||
| if (!newAltProtoFieldOrException) { | ||||||||||||||||||
| throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType + | ||||||||||||||||||
| "\" for column \"" + std::string(fieldName) + "\""); | ||||||||||||||||||
| std::unique_ptr<RFieldBase> newAltProtoField; | ||||||||||||||||||
| const std::string strName = std::string(fieldName); | ||||||||||||||||||
| if (dynamic_cast<ROOT::Internal::RDF::RRDFCardinalityFieldBase *>(fProtoFields[index].get())) { | ||||||||||||||||||
| if (requestedType == "bool") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<bool>>(strName); | ||||||||||||||||||
| } else if (requestedType == "char") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<char>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::int8_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::int8_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::uint8_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::uint8_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::int16_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::int16_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::uint16_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::uint16_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::int32_t") { | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make sure, when we get to this point we have already normalized the requested type, right? e.g. |
||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::int32_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::uint32_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::uint32_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::int64_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::int64_t>>(strName); | ||||||||||||||||||
| } else if (requestedType == "std::uint64_t") { | ||||||||||||||||||
| newAltProtoField = std::make_unique<ROOT::Internal::RDF::RRDFCardinalityField<std::uint64_t>>(strName); | ||||||||||||||||||
| } else { | ||||||||||||||||||
| throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType + | ||||||||||||||||||
| "\" for column \"" + std::string(fieldName) + "\""); | ||||||||||||||||||
| } | ||||||||||||||||||
| } else { | ||||||||||||||||||
| auto newAltProtoFieldOrException = ROOT::RFieldBase::Create(strName, requestedType); | ||||||||||||||||||
| if (!newAltProtoFieldOrException) { | ||||||||||||||||||
| throw std::runtime_error("RNTupleDS: Could not create field with type \"" + requestedType + | ||||||||||||||||||
| "\" for column \"" + std::string(fieldName) + "\""); | ||||||||||||||||||
| } | ||||||||||||||||||
| newAltProtoField = newAltProtoFieldOrException.Unwrap(); | ||||||||||||||||||
| } | ||||||||||||||||||
| auto newAltProtoField = newAltProtoFieldOrException.Unwrap(); | ||||||||||||||||||
| newAltProtoField->SetOnDiskId(fProtoFields[index]->GetOnDiskId()); | ||||||||||||||||||
| auto *newField = newAltProtoField.get(); | ||||||||||||||||||
| altProtoFields.emplace_back(std::move(newAltProtoField)); | ||||||||||||||||||
|
|
||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For my understanding, by using the
typename, the field is exposed and readable by the user as the integer type and not just asRNTupleCardinality, correct?