apache · tlopex · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026 · gemini-code-assist
diff --git a/include/tvm/relax/attrs/vision.h b/include/tvm/relax/attrs/vision.h
@@ -73,6 +73,23 @@ struct ROIAlignAttrs : public AttrsNodeReflAdapter<ROIAlignAttrs> {
   TVM_FFI_DECLARE_OBJECT_INFO_FINAL("relax.attrs.ROIAlignAttrs", ROIAlignAttrs, BaseAttrsNode);
 };  // struct ROIAlignAttrs
 
+/*! \brief Attributes used in ROIPool operator */
+struct ROIPoolAttrs : public AttrsNodeReflAdapter<ROIPoolAttrs> {
+  ffi::Array<int64_t> pooled_size;
+  double spatial_scale;
+  ffi::String layout;
+
+  static void RegisterReflection() {
+    namespace refl = tvm::ffi::reflection;
+    refl::ObjectDef<ROIPoolAttrs>()
+        .def_ro("pooled_size", &ROIPoolAttrs::pooled_size, "Output size of roi pool.")
+        .def_ro("spatial_scale", &ROIPoolAttrs::spatial_scale,
+                "Ratio of input feature map height (or width) to raw image height (or width).")
+        .def_ro("layout", &ROIPoolAttrs::layout, "Dimension ordering of the input data.");
+  }
+  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("relax.attrs.ROIPoolAttrs", ROIPoolAttrs, BaseAttrsNode);
+};  // struct ROIPoolAttrs
+
 /*! \brief Attributes used in GetValidCounts operator */
 struct GetValidCountsAttrs : public AttrsNodeReflAdapter<GetValidCountsAttrs> {
   double score_threshold;
@@ -132,7 +149,6 @@ struct NonMaximumSuppressionAttrs
                                     NonMaximumSuppressionAttrs, BaseAttrsNode);
 };  // struct NonMaximumSuppressionAttrs
 
-
 /*! \brief Attributes for multibox_transform_loc (SSD / TFLite-style box decode). */
 struct MultiboxTransformLocAttrs : public AttrsNodeReflAdapter<MultiboxTransformLocAttrs> {
   bool clip;

diff --git a/python/tvm/relax/frontend/onnx/onnx_frontend.py b/python/tvm/relax/frontend/onnx/onnx_frontend.py
@@ -2517,6 +2517,28 @@ def _impl_v16(cls, bb, inputs, attr, params):
         return cls._impl(bb, inputs, attr, params, b"half_pixel")
 
 
+class MaxRoiPool(OnnxOpConverter):
+    """Converts an onnx MaxRoiPool node into an equivalent Relax expression."""
+
+    @classmethod
+    def _impl_v1(cls, bb, inputs, attr, params):
+        if len(inputs) != 2:
+            raise ValueError("MaxRoiPool expects exactly 2 inputs")
+
+        pooled_shape = attr.get("pooled_shape")
+        if pooled_shape is None:
+            raise ValueError("MaxRoiPool requires pooled_shape attribute")
+
+        spatial_scale = attr.get("spatial_scale", 1.0)
+        return relax.op.vision.roi_pool(
+            inputs[0],
+            inputs[1],
+            pooled_size=tuple(pooled_shape),
+            spatial_scale=spatial_scale,
+            layout="NCHW",
+        )
+
+
 class Range(OnnxOpConverter):
     """Converts an onnx Range node into an equivalent Relax expression."""
 
@@ -4177,7 +4199,7 @@ def _get_convert_map():
         "OneHot": OneHot,
         "Unique": Unique,
         "NonZero": NonZero,
-        # "MaxRoiPool": MaxRoiPool,
+        "MaxRoiPool": MaxRoiPool,
         "RoiAlign": RoiAlign,
         "NonMaxSuppression": NonMaxSuppression,
         "AllClassNMS": AllClassNMS,

diff --git a/python/tvm/relax/op/__init__.py b/python/tvm/relax/op/__init__.py
@@ -163,6 +163,7 @@
     multibox_transform_loc,
     non_max_suppression,
     roi_align,
+    roi_pool,
 )
 
 

diff --git a/python/tvm/relax/op/op_attrs.py b/python/tvm/relax/op/op_attrs.py
@@ -261,6 +261,11 @@ class ROIAlignAttrs(Attrs):
     """Attributes for vision.roi_align"""
 
 
+@tvm_ffi.register_object("relax.attrs.ROIPoolAttrs")
+class ROIPoolAttrs(Attrs):
+    """Attributes for vision.roi_pool"""
+
+
 @tvm_ffi.register_object("relax.attrs.MultiboxTransformLocAttrs")
 class MultiboxTransformLocAttrs(Attrs):
     """Attributes for vision.multibox_transform_loc"""

diff --git a/python/tvm/relax/op/vision/__init__.py b/python/tvm/relax/op/vision/__init__.py
@@ -20,3 +20,4 @@
 from .multibox_transform_loc import *
 from .nms import *
 from .roi_align import *
+from .roi_pool import *
diff --git a/python/tvm/relax/op/vision/roi_pool.py b/python/tvm/relax/op/vision/roi_pool.py
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ROI Pool operator"""
+
+from ..base import Expr
+from . import _ffi_api
+
+
+def roi_pool(
+    data: Expr,
+    rois: Expr,
+    pooled_size: int | tuple[int, int] | list[int],
+    spatial_scale: float,
+    layout: str = "NCHW",
+):
+    """ROI Pool operator.
+
+    Parameters
+    ----------
+    data : relax.Expr
+        4-D input tensor.
+
+    rois : relax.Expr
+        2-D input tensor with shape `(num_roi, 5)` in
+        `[batch_idx, x1, y1, x2, y2]` format.
+
+    pooled_size : Union[int, Tuple[int, int], List[int]]
+        Output pooled size.
+
+    spatial_scale : float
+        Ratio of input feature map height (or width) to raw image height (or width).
+
+    layout : str, optional
+        Layout of the input data. Currently only `NCHW` is supported.
+
+    Returns
+    -------
+    result : relax.Expr
+        The computed result.
+    """
+    if isinstance(pooled_size, int):
+        pooled_size = (pooled_size, pooled_size)
+    return _ffi_api.roi_pool(data, rois, pooled_size, spatial_scale, layout)
diff --git a/python/tvm/relax/transform/legalize_ops/vision.py b/python/tvm/relax/transform/legalize_ops/vision.py
@@ -150,6 +150,18 @@ def _non_max_suppression(block_builder: BlockBuilder, call: Call) -> Expr:
     )
 
 
+@register_legalize("relax.vision.roi_pool")
+def _roi_pool(bb: BlockBuilder, call: Call) -> Expr:
+    return bb.call_te(
+        topi.vision.roi_pool,
+        call.args[0],
+        call.args[1],
+        pooled_size=call.attrs.pooled_size,
+        spatial_scale=call.attrs.spatial_scale,
+        layout=call.attrs.layout,
+    )
+
+
 @register_legalize("relax.vision.multibox_transform_loc")
 def _multibox_transform_loc(bb: BlockBuilder, call: Call) -> Expr:
     variances = tuple(float(x) for x in call.attrs.variances)

diff --git a/python/tvm/runtime/support.py b/python/tvm/runtime/support.py
@@ -146,10 +146,17 @@ def method(*args, **kwargs):
     fields = metadata.get("fields", [])
     methods = metadata.get("methods", [])
 
-    class TVMDerivedObject(metadata["cls"]):  # type: ignore
+    base_cls = metadata["cls"]
+    slots = []
+    if getattr(base_cls, "__dictoffset__", 0) == 0:
+        slots.append("__dict__")
+    if getattr(base_cls, "__weakrefoffset__", 0) == 0:
+        slots.append("__weakref__")
+
+    class TVMDerivedObject(base_cls):  # type: ignore
         """The derived object to avoid cyclic dependency."""
 
-        __slots__ = ("__dict__", "__weakref__",)
+        __slots__ = tuple(slots)
 
         _cls = cls
         _type = "TVMDerivedObject"

diff --git a/python/tvm/s_tir/meta_schedule/utils.py b/python/tvm/s_tir/meta_schedule/utils.py
@@ -106,10 +106,17 @@ def method(*args, **kwargs):
     fields = metadata.get("fields", [])
     methods = metadata.get("methods", [])
 
-    class TVMDerivedObject(metadata["cls"]):  # type: ignore
+    base_cls = metadata["cls"]
+    slots = []
+    if getattr(base_cls, "__dictoffset__", 0) == 0:
+        slots.append("__dict__")
+    if getattr(base_cls, "__weakrefoffset__", 0) == 0:
+        slots.append("__weakref__")
+
+    class TVMDerivedObject(base_cls):  # type: ignore
         """The derived object to avoid cyclic dependency."""
 
-        __slots__ = ("__dict__", "__weakref__",)
+        __slots__ = tuple(slots)
 
         _cls = cls
         _type = "TVMDerivedObject"

diff --git a/python/tvm/topi/vision/__init__.py b/python/tvm/topi/vision/__init__.py
@@ -20,3 +20,4 @@
 from .multibox_transform_loc import *
 from .nms import *
 from .roi_align import *
+from .roi_pool import *
diff --git a/python/tvm/topi/vision/roi_pool.py b/python/tvm/topi/vision/roi_pool.py
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+"""ROI Pool operator"""
+
+import tvm
+from tvm import te
+
+
+def roi_pool_nchw(data, rois, pooled_size, spatial_scale):
+    """ROI pool operator in NCHW layout."""
+    _, channel, height, width = data.shape
+    num_roi, _ = rois.shape
+
+    if isinstance(pooled_size, int):
+        pooled_size_h = pooled_size_w = pooled_size
+    else:
+        pooled_size_h, pooled_size_w = pooled_size
+
+    zero = tvm.tirx.const(0.0, data.dtype)
+    roi_dtype = rois.dtype
+
+    neg_inf = tvm.tirx.const(float("-inf"), data.dtype)
+
+    def _bin_bounds(i, ph, pw):
+        roi = rois[i]
+        roi_start_w = te.round(roi[1] * spatial_scale).astype("int32")
+        roi_start_h = te.round(roi[2] * spatial_scale).astype("int32")
+        roi_end_w = te.round(roi[3] * spatial_scale).astype("int32")
+        roi_end_h = te.round(roi[4] * spatial_scale).astype("int32")
+
+        roi_h = te.max(roi_end_h - roi_start_h + 1, tvm.tirx.const(1, "int32"))
+        roi_w = te.max(roi_end_w - roi_start_w + 1, tvm.tirx.const(1, "int32"))
+
+        bin_h = tvm.tirx.Cast(roi_dtype, roi_h) / tvm.tirx.const(float(pooled_size_h), roi_dtype)
+        bin_w = tvm.tirx.Cast(roi_dtype, roi_w) / tvm.tirx.const(float(pooled_size_w), roi_dtype)
+
+        hstart = te.floor(tvm.tirx.Cast(roi_dtype, ph) * bin_h).astype("int32")
+        wstart = te.floor(tvm.tirx.Cast(roi_dtype, pw) * bin_w).astype("int32")
+        hend = te.ceil(tvm.tirx.Cast(roi_dtype, ph + 1) * bin_h).astype("int32")
+        wend = te.ceil(tvm.tirx.Cast(roi_dtype, pw + 1) * bin_w).astype("int32")
+
+        hstart = te.min(te.max(hstart + roi_start_h, 0), height)
+        hend = te.min(te.max(hend + roi_start_h, 0), height)
+        wstart = te.min(te.max(wstart + roi_start_w, 0), width)
+        wend = te.min(te.max(wend + roi_start_w, 0), width)
+        return hstart, hend, wstart, wend
+
+    def _sample(i, c, ph, pw):
+        roi = rois[i]
+        batch_index = roi[0].astype("int32")
+        hstart, hend, wstart, wend = _bin_bounds(i, ph, pw)
+        valid = tvm.tirx.all(hstart <= rh, rh < hend, wstart <= rw, rw < wend)
+        return tvm.tirx.if_then_else(valid, data[batch_index, c, rh, rw], neg_inf)
+
+    def _is_empty(i, ph, pw):
+        hstart, hend, wstart, wend = _bin_bounds(i, ph, pw)
+        return tvm.tirx.any(hend <= hstart, wend <= wstart)
+
+    rh = te.reduce_axis((0, height), name="rh")
+    rw = te.reduce_axis((0, width), name="rw")
+    pooled = te.compute(
+        (num_roi, channel, pooled_size_h, pooled_size_w),
+        lambda i, c, ph, pw: te.max(_sample(i, c, ph, pw), axis=[rh, rw]),
+        tag="pool,roi_pool_nchw",
+    )
+
+    return te.compute(
+        (num_roi, channel, pooled_size_h, pooled_size_w),
+        lambda i, c, ph, pw: tvm.tirx.if_then_else(
+            _is_empty(i, ph, pw), zero, pooled[i, c, ph, pw]
+        ),
+    )
-    return te.compute(
-        (num_roi, channel, pooled_size_h, pooled_size_w),
-        lambda i, c, ph, pw: tvm.tirx.if_then_else(
-            _is_empty(i, ph, pw), zero, pooled[i, c, ph, pw]
-        ),
-    )
+    return te.compute(
+        (num_roi, channel, pooled_size_h, pooled_size_w),
+        lambda i, c, ph, pw: tvm.tirx.if_then_else(
+            pooled[i, c, ph, pw] == neg_inf, zero, pooled[i, c, ph, pw]
+        ),
+    )
-    return te.compute(
-        (num_roi, channel, pooled_size_h, pooled_size_w),
-        lambda i, c, ph, pw: tvm.tirx.if_then_else(
-            _is_empty(i, ph, pw), zero, pooled[i, c, ph, pw]
-        ),
-    )
+    return te.compute(
+        (num_roi, channel, pooled_size_h, pooled_size_w),
+        lambda i, c, ph, pw: tvm.tirx.if_then_else(
+            pooled[i, c, ph, pw] == neg_inf, zero, pooled[i, c, ph, pw]
+        ),
+    )
+
+
+def roi_pool(data, rois, pooled_size, spatial_scale, layout="NCHW"):
+    """ROI pool operator."""
+    if layout == "NCHW":
+        return roi_pool_nchw(data, rois, pooled_size, spatial_scale)
+    raise ValueError(f"Unsupported layout for roi_pool: {layout}")