From c3194ba0ffb574f23a667de5626dc2e4526c18cc Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Wed, 11 Mar 2026 14:34:40 -0700 Subject: [PATCH 1/8] Lazy update fabric xforms. --- .../isaaclab_newton/physics/newton_manager.py | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py index a8f7d1ba786c..324a16db627e 100644 --- a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py +++ b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py @@ -89,6 +89,7 @@ class NewtonManager(PhysicsManager): _usdrt_stage = None _newton_index_attr = "newton:index" _clone_physics_only = False + _transforms_dirty: bool = False # Model changes (callbacks use unified system from PhysicsManager) _model_changes: set[int] = set() @@ -143,10 +144,14 @@ def forward(cls) -> None: def sync_transforms_to_usd(cls) -> None: """Write Newton body_q to USD Fabric world matrices for Kit viewport / RTX rendering. - No-op when ``_usdrt_stage`` is None (i.e. Kit visualizer is not active). - Called by :class:`~isaaclab.sim.scene_data_providers.NewtonSceneDataProvider` at render - cadence (Kit), and after each physics step when using Newton+RTX so the renderer sees - updated poses. + No-op when ``_usdrt_stage`` is None (i.e. Kit visualizer is not active) + or when transforms have not changed since the last sync. + + This method is called at render cadence by + :class:`~isaaclab.sim.scene_data_providers.NewtonSceneDataProvider`. + Physics stepping marks transforms dirty via :meth:`_mark_transforms_dirty` + so that the expensive Fabric hierarchy update only runs once per render + frame rather than after every physics step. Uses ``wp.fabricarray`` directly (no ``isaacsim.physics.newton`` extension needed). The Warp kernel reads ``state_0.body_q[newton_index[i]]`` and writes the @@ -154,6 +159,8 @@ def sync_transforms_to_usd(cls) -> None: """ if cls._usdrt_stage is None or cls._model is None or cls._state_0 is None: return + if not cls._transforms_dirty: + return try: import usdrt @@ -180,9 +187,19 @@ def sync_transforms_to_usd(cls) -> None: cls._usdrt_stage.GetFabricId(), cls._usdrt_stage.GetStageIdAsStageId() ) fabric_hierarchy.update_world_xforms() + cls._transforms_dirty = False except Exception as exc: logger.debug("[NewtonManager] sync_transforms_to_usd: %s", exc) + @classmethod + def _mark_transforms_dirty(cls) -> None: + """Flag that physics state has changed and Fabric needs re-sync. + + Called by :meth:`_simulate` after stepping. The actual sync is deferred + to :meth:`sync_transforms_to_usd`, which runs at render cadence. + """ + cls._transforms_dirty = True + @classmethod def step(cls) -> None: """Step the physics simulation.""" @@ -254,6 +271,7 @@ def clear(cls): cls._graph = None cls._newton_stage_path = None cls._usdrt_stage = None + cls._transforms_dirty = False cls._up_axis = "Z" cls._model_changes = set() cls._views = [] @@ -318,6 +336,7 @@ def start_simulation(cls) -> None: if not xformable_prim.HasWorldXform(): xformable_prim.SetWorldXformFromUsd() + cls._mark_transforms_dirty() cls.sync_transforms_to_usd() @classmethod @@ -530,9 +549,9 @@ def step_fn(state_0, state_1): for sensor in cls._newton_contact_sensors.values(): sensor.update(cls._state_0, eval_contacts) - # Sync Newton state to USD/Fabric for RTX rendering (e.g., Newton Physics + RTX Renderer preset) + # Mark transforms dirty so the next render-cadence sync picks them up. if cls._usdrt_stage is not None: - cls.sync_transforms_to_usd() + cls._mark_transforms_dirty() @classmethod def get_solver_convergence_steps(cls) -> dict[str, float | int]: From 181be179811195037b47f24d7822e053a3e330fa Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Wed, 11 Mar 2026 16:10:56 -0700 Subject: [PATCH 2/8] cuda graph fix --- .../isaaclab_newton/physics/newton_manager.py | 215 +++++++++++++++++- 1 file changed, 205 insertions(+), 10 deletions(-) diff --git a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py index 324a16db627e..fecb8d125fd6 100644 --- a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py +++ b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py @@ -7,12 +7,24 @@ from __future__ import annotations +import ctypes import inspect import logging from typing import TYPE_CHECKING import numpy as np import warp as wp + +# Load CUDA runtime for relaxed-mode graph capture (RTX-compatible). +# cudaStreamCaptureModeRelaxed (2) allows the RTX compositor's background +# CUDA stream to keep running during capture without invalidating it. +try: + _cudart = ctypes.CDLL("libcudart.so.12") +except OSError: + try: + _cudart = ctypes.CDLL("libcudart.so") + except OSError: + _cudart = None from newton import Axis, CollisionPipeline, Contacts, Control, Model, ModelBuilder, State, eval_fk from newton._src.usd.schemas import SchemaResolverNewton, SchemaResolverPhysx from newton.sensors import SensorContact as NewtonContactSensor @@ -83,6 +95,7 @@ class NewtonManager(PhysicsManager): # CUDA graphing _graph = None + _graph_capture_pending: bool = False # USD/Fabric sync _newton_stage_path = None @@ -214,12 +227,28 @@ def step(cls) -> None: cls._solver.notify_model_changed(change) cls._model_changes = set() - # Step simulation (graphed or not; _graph is None when RTX/Fabric sync is active or on CPU) + # Lazy CUDA graph capture: deferred from initialize_solver() when RTX was active. + # By the time step() is first called, RTX has fully initialized (all cudaImportExternalMemory + # calls are done) and is idle between render frames — giving us a clean capture window. cfg = PhysicsManager._cfg - if cfg is not None and cfg.use_cuda_graph and cls._graph is not None and "cuda" in PhysicsManager._device: # type: ignore[union-attr] + device = PhysicsManager._device + if cls._graph_capture_pending and cfg is not None and cfg.use_cuda_graph and "cuda" in device: # type: ignore[union-attr] + cls._graph_capture_pending = False + cls._graph = cls._capture_relaxed_graph(device) + if cls._graph is not None: + logger.info("Newton CUDA graph captured (deferred relaxed mode, RTX-compatible)") + else: + logger.warning("Newton deferred CUDA graph capture failed; using eager execution") + + # Step simulation (graphed or not; _graph is None when capture is disabled or failed) + if cfg is not None and cfg.use_cuda_graph and cls._graph is not None and "cuda" in device: # type: ignore[union-attr] wp.capture_launch(cls._graph) + # Fabric sync is excluded from the graph (wp.synchronize_device is non-capturable), + # so run it eagerly here after the graph replay. + if cls._usdrt_stage is not None: + cls.sync_transforms_to_usd() else: - with wp.ScopedDevice(PhysicsManager._device): + with wp.ScopedDevice(device): cls._simulate() # Debug convergence info @@ -269,6 +298,7 @@ def clear(cls): cls._newton_contact_sensors = {} cls._report_contacts = False cls._graph = None + cls._graph_capture_pending = False cls._newton_stage_path = None cls._usdrt_stage = None cls._transforms_dirty = False @@ -498,18 +528,183 @@ def initialize_solver(cls) -> None: device = PhysicsManager._device - # Skip CUDA graph when syncing to USD/Fabric for RTX: capture conflicts with RTX/Replicator - # using the legacy stream (cudaErrorStreamCaptureImplicit). - use_cuda_graph = cfg.use_cuda_graph and (cls._usdrt_stage is None) # type: ignore[union-attr] + use_cuda_graph = cfg.use_cuda_graph and "cuda" in device # type: ignore[union-attr] with Timer(name="newton_cuda_graph", msg="CUDA graph took:"): - if use_cuda_graph and "cuda" in device: - with wp.ScopedCapture() as capture: - cls._simulate() - cls._graph = capture.graph + if use_cuda_graph: + if cls._usdrt_stage is None: + # No RTX active — use standard Warp capture (cudaStreamCaptureModeGlobal). + with wp.ScopedCapture() as capture: + cls._simulate() + cls._graph = capture.graph + logger.info("Newton CUDA graph captured (standard Warp mode)") + else: + # RTX is active during initialization — cudaImportExternalMemory and other + # non-capturable RTX ops run on background CUDA streams right now. + # Defer capture to the first step() call, after RTX is fully initialized + # and idle between render frames (clean capture window). + cls._graph = None + cls._graph_capture_pending = True + logger.info("Newton CUDA graph capture deferred until first step() (RTX active)") else: cls._graph = None + @classmethod + def _capture_relaxed_graph(cls, device: str): + """Capture Newton physics (only) as a CUDA graph, RTX-compatible. + + Uses a hybrid approach to work around two conflicting requirements: + + 1. RTX background threads use CUDA's legacy stream (stream 0) for async operations + like ``cudaImportExternalMemory``. A standard ``wp.ScopedCapture()`` uses + ``cudaStreamCaptureModeThreadLocal`` on Warp's default stream (a blocking stream). + A blocking stream synchronises implicitly with legacy stream 0, so RTX ops inside + the capture window fail with error 906. + + 2. ``mujoco_warp`` calls ``wp.capture_while`` inside ``solver.solve()``. + ``wp.capture_while`` checks ``device.captures`` (populated by ``wp.capture_begin``) + to decide whether to insert a conditional graph node (graph-capture path) or to run + eagerly with ``wp.synchronize_stream`` (non-capture path). Without an entry in + ``device.captures``, it synchronises the capturing stream — which raises "Cannot + synchronize stream while graph capture is active". + + Solution: + + - Create a **non-blocking** stream (``cudaStreamNonBlocking = 0x01``): no implicit sync + with legacy stream 0, so RTX background threads are unaffected (avoids error 906). + - Start the capture externally via ``cudaStreamBeginCapture`` with + ``cudaStreamCaptureModeRelaxed`` so no other CUDA activity is disrupted. + - Call ``wp.capture_begin(external=True, stream=fresh_stream)``: + this registers the capture in Warp's ``device.captures`` *without* calling + ``cudaStreamBeginCapture`` (already done) and *without* changing device-wide memory + pool attributes (avoids error 900 in RTX's ``cudaMallocAsync``). + - Run ``_simulate_physics_only()`` inside ``ScopedStream(fresh_stream)``: + kernels dispatch to ``fresh_stream`` and are captured; ``wp.capture_while`` finds the + active capture and inserts a conditional graph node instead of synchronising. + - Call ``wp.capture_end(stream=fresh_stream)`` to finalise the Warp-level capture. + - Call ``cudaStreamEndCapture`` to close the CUDA stream capture and get the graph. + + Warmup run pre-allocates all MuJoCo-Warp scratch buffers so no ``cudaMalloc`` occurs during + capture. ``sync_transforms_to_usd`` (which calls ``wp.synchronize_device``) is + excluded from the capture and runs eagerly in ``step()`` after ``wp.capture_launch``. + + Returns a ``wp.Graph`` on success, or ``None`` on failure. + """ + if _cudart is None: + logger.warning("libcudart not available; cannot use relaxed graph capture") + return None + + # Warmup: pre-allocate all MuJoCo-Warp scratch buffers so the capture window has + # no new cudaMalloc calls (which are forbidden inside graph capture). + with wp.ScopedDevice(device): + cls._simulate_physics_only() + wp.synchronize_stream(wp.get_stream(device)) + + # Create a non-blocking stream (cudaStreamNonBlocking = 0x01). + raw_handle = ctypes.c_void_p() + ret = _cudart.cudaStreamCreateWithFlags(ctypes.byref(raw_handle), ctypes.c_uint(0x01)) + if ret != 0: + logger.warning("cudaStreamCreateWithFlags(NonBlocking) failed (code %d)", ret) + return None + fresh_handle = raw_handle.value + fresh_stream = wp.Stream(device, cuda_stream=fresh_handle, owner=False) + + # Start capture in relaxed mode BEFORE entering ScopedStream. + ret = _cudart.cudaStreamBeginCapture(ctypes.c_void_p(fresh_handle), ctypes.c_int(2)) + if ret != 0: + _cudart.cudaStreamDestroy(ctypes.c_void_p(fresh_handle)) + logger.warning("cudaStreamBeginCapture(relaxed) failed (code %d)", ret) + return None + + try: + wp.capture_begin(stream=fresh_stream, external=True) + except Exception as exc: + raw_graph = ctypes.c_void_p() + _cudart.cudaStreamEndCapture(ctypes.c_void_p(fresh_handle), ctypes.byref(raw_graph)) + if raw_graph.value: + _cudart.cudaGraphDestroy(raw_graph) + _cudart.cudaStreamDestroy(ctypes.c_void_p(fresh_handle)) + logger.warning("wp.capture_begin(external=True) failed: %s", exc) + return None + + err_during_capture = None + with wp.ScopedStream(fresh_stream, sync_enter=False): + try: + cls._simulate_physics_only() + except Exception as exc: + err_during_capture = exc + + if err_during_capture is None: + try: + graph = wp.capture_end(stream=fresh_stream) + except Exception as exc: + err_during_capture = exc + graph = None + else: + try: + wp.capture_end(stream=fresh_stream) + except Exception: + pass + graph = None + + raw_graph = ctypes.c_void_p() + end_ret = _cudart.cudaStreamEndCapture(ctypes.c_void_p(fresh_handle), ctypes.byref(raw_graph)) + _cudart.cudaStreamDestroy(ctypes.c_void_p(fresh_handle)) + + if err_during_capture is not None: + if raw_graph.value: + _cudart.cudaGraphDestroy(raw_graph) + logger.warning("Newton graph capture aborted during simulate: %s", err_during_capture) + return None + + if end_ret != 0 or not raw_graph.value: + logger.warning("cudaStreamEndCapture failed (code %d)", end_ret) + return None + + graph.graph = raw_graph + graph.graph_exec = None + return graph + + @classmethod + def _simulate_physics_only(cls) -> None: + """Run one physics step without Fabric/USD sync — safe for CUDA graph capture. + + Used by :meth:`_capture_relaxed_graph` to capture only the pure physics kernels. + ``sync_transforms_to_usd`` is excluded because it calls ``wp.synchronize_device`` + (forbidden inside graph capture) and ``wp.fabricarray`` (device-wide allocation). + The caller (``step()``) is responsible for calling ``sync_transforms_to_usd()`` + eagerly after ``wp.capture_launch``. + """ + if cls._needs_collision_pipeline: + cls._collision_pipeline.collide(cls._state_0, cls._contacts) + contacts = cls._contacts + else: + contacts = None + + def step_fn(state_0, state_1): + cls._solver.step(state_0, state_1, cls._control, contacts, cls._solver_dt) + + if cls._use_single_state: + for i in range(cls._num_substeps): + step_fn(cls._state_0, cls._state_0) + cls._state_0.clear_forces() + else: + cfg = PhysicsManager._cfg + need_copy_on_last_substep = (cfg is not None and cfg.use_cuda_graph) and cls._num_substeps % 2 == 1 # type: ignore[union-attr] + for i in range(cls._num_substeps): + step_fn(cls._state_0, cls._state_1) + if need_copy_on_last_substep and i == cls._num_substeps - 1: + cls._state_0.assign(cls._state_1) + else: + cls._state_0, cls._state_1 = cls._state_1, cls._state_0 + cls._state_0.clear_forces() + + if cls._report_contacts: + eval_contacts = contacts if contacts is not None else cls._contacts + cls._solver.update_contacts(eval_contacts, cls._state_0) + for sensor in cls._newton_contact_sensors.values(): + sensor.update(cls._state_0, eval_contacts) + @classmethod def _simulate(cls) -> None: """Run one simulation step with substeps.""" From a3d8e00a2365d39dde025c55f28629292773d046 Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Wed, 11 Mar 2026 16:11:34 -0700 Subject: [PATCH 3/8] write images --- .../renderers/isaac_rtx_renderer.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py index 22b07f13def0..ffba9463d86a 100644 --- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py +++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py @@ -8,7 +8,9 @@ from __future__ import annotations import json +import logging import math +import os import weakref from dataclasses import dataclass from typing import TYPE_CHECKING, Any @@ -48,6 +50,12 @@ class IsaacRtxRenderData: sensor: SensorBase | None = None +_logger = logging.getLogger(__name__) + +_TILED_DUMP_DIR = "/tmp/il-rtx" +_tiled_frame_counter = 0 + + class IsaacRtxRenderer(BaseRenderer): """Isaac RTX backend using Omniverse Replicator for tiled camera rendering. @@ -248,6 +256,10 @@ def tiling_grid_shape(): if data_type in SIMPLE_SHADING_MODES: tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous() + # Dump the tiled image to disk (first visual data type per frame only) + if tiled_data_buffer.dtype == wp.uint8: + self._save_tiled_image(tiled_data_buffer) + wp.launch( kernel=reshape_tiled_image, dim=(view_count, cfg.height, cfg.width), @@ -280,6 +292,20 @@ def tiling_grid_shape(): 0.0 if cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1] ) + def _save_tiled_image(self, tiled_data: wp.array): + """Save the raw tiled render product to disk as a PNG.""" + global _tiled_frame_counter + os.makedirs(_TILED_DUMP_DIR, exist_ok=True) + path = os.path.join(_TILED_DUMP_DIR, f"frame_{_tiled_frame_counter:06d}.png") + _tiled_frame_counter += 1 + try: + from PIL import Image + + img_np = wp.to_torch(tiled_data).cpu().numpy() + Image.fromarray(img_np).save(path) + except Exception: + _logger.warning("Failed to save tiled image to %s", path, exc_info=True) + def write_output(self, render_data: IsaacRtxRenderData, output_name: str, output_data: torch.Tensor): """No-op for Isaac RTX - all outputs written in render(). See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.write_output`.""" From 5c0145aa50219af8f613b5df9afe73763f7d77fe Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Thu, 12 Mar 2026 10:22:59 -0700 Subject: [PATCH 4/8] Works. --- source/isaaclab/isaaclab/physics/physics_manager.py | 11 +++++++++++ source/isaaclab/isaaclab/sim/simulation_context.py | 1 + .../isaaclab_newton/physics/newton_manager.py | 13 ++++++++----- .../isaaclab_physx/renderers/isaac_rtx_renderer.py | 4 ++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/source/isaaclab/isaaclab/physics/physics_manager.py b/source/isaaclab/isaaclab/physics/physics_manager.py index 5b94ede481d5..5676fa8ef02b 100644 --- a/source/isaaclab/isaaclab/physics/physics_manager.py +++ b/source/isaaclab/isaaclab/physics/physics_manager.py @@ -265,6 +265,17 @@ def step(cls) -> None: """Step physics simulation by one timestep (physics only, no rendering).""" pass + @classmethod + def pre_render(cls) -> None: + """Sync deferred physics state to the rendering backend. + + Called by :meth:`~isaaclab.sim.SimulationContext.render` before cameras + and visualizers read scene data. The default implementation is a no-op. + Backends that defer transform writes (e.g. Newton's dirty-flag pattern) + should override this to flush pending updates. + """ + pass + @classmethod def close(cls) -> None: """Clean up physics resources. diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py index 1d8551137f93..bb1daf6b0e5d 100644 --- a/source/isaaclab/isaaclab/sim/simulation_context.py +++ b/source/isaaclab/isaaclab/sim/simulation_context.py @@ -620,6 +620,7 @@ def render(self, mode: int | None = None) -> None: every physics step). Camera sensors drive their configured renderer when fetching data, so this method remains backend-agnostic. """ + self.physics_manager.pre_render() self.update_visualizers(self.get_rendering_dt()) # Call render callbacks diff --git a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py index fecb8d125fd6..cd9be10c49a1 100644 --- a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py +++ b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py @@ -153,6 +153,11 @@ def forward(cls) -> None: """Update articulation kinematics without stepping physics.""" eval_fk(cls._model, cls._state_0.joint_q, cls._state_0.joint_qd, cls._state_0, None) + @classmethod + def pre_render(cls) -> None: + """Flush deferred Fabric writes before cameras/visualizers read the scene.""" + cls.sync_transforms_to_usd() + @classmethod def sync_transforms_to_usd(cls) -> None: """Write Newton body_q to USD Fabric world matrices for Kit viewport / RTX rendering. @@ -160,8 +165,8 @@ def sync_transforms_to_usd(cls) -> None: No-op when ``_usdrt_stage`` is None (i.e. Kit visualizer is not active) or when transforms have not changed since the last sync. - This method is called at render cadence by - :class:`~isaaclab.sim.scene_data_providers.NewtonSceneDataProvider`. + Called at render cadence by :meth:`pre_render` (via + :meth:`~isaaclab.sim.SimulationContext.render`). Physics stepping marks transforms dirty via :meth:`_mark_transforms_dirty` so that the expensive Fabric hierarchy update only runs once per render frame rather than after every physics step. @@ -243,10 +248,8 @@ def step(cls) -> None: # Step simulation (graphed or not; _graph is None when capture is disabled or failed) if cfg is not None and cfg.use_cuda_graph and cls._graph is not None and "cuda" in device: # type: ignore[union-attr] wp.capture_launch(cls._graph) - # Fabric sync is excluded from the graph (wp.synchronize_device is non-capturable), - # so run it eagerly here after the graph replay. if cls._usdrt_stage is not None: - cls.sync_transforms_to_usd() + cls._mark_transforms_dirty() else: with wp.ScopedDevice(device): cls._simulate() diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py index ffba9463d86a..143ca6643e91 100644 --- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py +++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py @@ -257,8 +257,8 @@ def tiling_grid_shape(): tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous() # Dump the tiled image to disk (first visual data type per frame only) - if tiled_data_buffer.dtype == wp.uint8: - self._save_tiled_image(tiled_data_buffer) + #if tiled_data_buffer.dtype == wp.uint8: + # self._save_tiled_image(tiled_data_buffer) wp.launch( kernel=reshape_tiled_image, From c5e109d344019ae425b0a03c7511a90ab44c7203 Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Thu, 12 Mar 2026 19:22:25 -0700 Subject: [PATCH 5/8] Cubric bindings --- .../isaaclab_newton/physics/_cubric.py | 345 ++++++++++++++++++ .../isaaclab_newton/physics/newton_manager.py | 114 ++++-- 2 files changed, 437 insertions(+), 22 deletions(-) create mode 100644 source/isaaclab_newton/isaaclab_newton/physics/_cubric.py diff --git a/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py b/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py new file mode 100644 index 000000000000..68453ee77a93 --- /dev/null +++ b/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py @@ -0,0 +1,345 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Pure-Python ctypes bindings for the cubric GPU transform-hierarchy API. + +Acquires the ``omni::cubric::IAdapter`` carb interface directly from the +Carbonite framework and wraps its function-pointer methods so that Newton +can call cubric's GPU transform propagation without C++ pybind11 changes. + +The flow mirrors PhysX's ``DirectGpuHelper::updateXForms_GPU()``: + +1. ``IAdapter::create`` → allocate a cubric adapter ID +2. ``IAdapter::bindToStage`` → bind to the current Fabric stage +3. ``IAdapter::compute`` → GPU kernel: propagate world transforms +4. ``IAdapter::release`` → free the adapter + +When cubric is unavailable (e.g. CPU-only machine, plugin not loaded), the +caller falls back to the CPU ``update_world_xforms()`` path. +""" + +from __future__ import annotations + +import ctypes +import logging + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Carb Framework struct layout (CARB_ABI function-pointer offsets, x86_64) +# --------------------------------------------------------------------------- +# Counting only CARB_ABI fields from the top of ``struct Framework``: +# 0: loadPluginsEx +# 8: unloadAllPlugins +# 16: acquireInterfaceWithClient +# 24: tryAcquireInterfaceWithClient ← we use this one +_FW_OFF_TRY_ACQUIRE = 24 + +# --------------------------------------------------------------------------- +# IAdapter struct layout (from omni/cubric/IAdapter.h) +# --------------------------------------------------------------------------- +# 0: getAttribute +# 8: create(AdapterId*) +# 16: refcount +# 24: retain +# 32: release(AdapterId) +# 40: bindToStage(AdapterId, const FabricId&) +# 48: unbind +# 56: compute(AdapterId, options, dirtyMode, outFlags*) +_IA_OFF_CREATE = 8 +_IA_OFF_RELEASE = 32 +_IA_OFF_BIND = 40 +_IA_OFF_COMPUTE = 56 + +# AdapterId sentinel +_INVALID_ADAPTER_ID = ctypes.c_uint64(~0).value + +# AdapterComputeOptions flags (from IAdapter.h) +_OPT_FORCE_UPDATE = 1 << 0 # Force update, ignoring invalidation status +_OPT_FORCE_STATE_RECONSTRUCTION = 1 << 1 # Force full rebuild of internal accel structures +_OPT_SKIP_ISOLATED = 1 << 2 # Skip prims with connectivity degree 0 +_OPT_RIGID_BODY = 1 << 3 # Use PhysicsRigidBodyAPI tag for inverse propagation + +# Newton prims get tagged with PhysicsRigidBodyAPI at init time so +# cubric's eRigidBody mode can distinguish rigid-body buckets +# (Inverse: preserve world matrix written by Newton, derive local) +# from non-rigid-body buckets (Forward: propagate to children). +# eForceUpdate is ORed in to bypass the change-listener check. +_OPT_DEFAULT = _OPT_RIGID_BODY | _OPT_FORCE_UPDATE + +# AdapterDirtyMode +_DIRTY_ALL = 0 # eAll — dirty all prims in the stage +_DIRTY_COARSE = 1 # eCoarse — dirty all prims in visited buckets + + +# --------------------------------------------------------------------------- +# ctypes struct mirrors +# --------------------------------------------------------------------------- +class _Version(ctypes.Structure): + _fields_ = [("major", ctypes.c_uint32), ("minor", ctypes.c_uint32)] + + +class _InterfaceDesc(ctypes.Structure): + """``carb::InterfaceDesc`` — {const char* name, Version version}.""" + _fields_ = [ + ("name", ctypes.c_char_p), + ("version", _Version), + ] + + +def _read_u64(addr: int) -> int: + return ctypes.c_uint64.from_address(addr).value + + +def _dump_fn_ptrs(base: int, names: list[str], label: str) -> None: + """Log function pointer values at 8-byte intervals from *base*.""" + for i, name in enumerate(names): + addr = _read_u64(base + i * 8) + tag = "OK" if addr else "NULL" + logger.info(" %s+%d (%s) = 0x%016x [%s]", label, i * 8, name, addr, tag) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- +class CubricBindings: + """Typed wrappers around the cubric ``IAdapter`` API. + + Call :meth:`initialize` once; if it returns ``True``, the four adapter + methods are available. + """ + + def __init__(self) -> None: + self._ia_ptr: int = 0 + self._create_fn = None + self._release_fn = None + self._bind_fn = None + self._compute_fn = None + self._log_count = 0 + + # -- lifecycle ----------------------------------------------------------- + + def initialize(self) -> bool: + """Acquire the cubric ``IAdapter`` from the carb framework.""" + # Ensure the omni.cubric extension (native carb plugin) is loaded. + try: + import omni.kit.app + + ext_mgr = omni.kit.app.get_app().get_extension_manager() + cubric_enabled = ext_mgr.is_extension_enabled("omni.cubric") + logger.info("omni.cubric extension enabled: %s", cubric_enabled) + if not cubric_enabled: + logger.info("Enabling omni.cubric extension") + ext_mgr.set_extension_enabled_immediate("omni.cubric", True) + cubric_enabled = ext_mgr.is_extension_enabled("omni.cubric") + logger.info("omni.cubric after enable: %s", cubric_enabled) + except Exception as exc: + logger.warning("Cannot enable omni.cubric: %s", exc) + return False + + # Get Framework* via libcarb.so acquireFramework (singleton). + try: + libcarb = ctypes.CDLL("libcarb.so") + except OSError: + logger.warning("Could not load libcarb.so") + return False + + # Check which symbols libcarb exports for framework access + for sym_name in ("acquireFramework", "carbGetSdkVersion", "isFrameworkValid"): + try: + sym = getattr(libcarb, sym_name, None) + logger.info("libcarb.%s: %s", sym_name, "found" if sym else "missing") + except Exception: + logger.info("libcarb.%s: not accessible", sym_name) + + # Verify framework is alive + try: + libcarb.isFrameworkValid.restype = ctypes.c_bool + libcarb.isFrameworkValid.argtypes = [] + fw_valid = libcarb.isFrameworkValid() + logger.info("isFrameworkValid() = %s", fw_valid) + except Exception as exc: + logger.warning("isFrameworkValid() failed: %s", exc) + + # Get SDK version for diagnostics + try: + libcarb.carbGetSdkVersion.restype = ctypes.c_char_p + libcarb.carbGetSdkVersion.argtypes = [] + sdk_ver = libcarb.carbGetSdkVersion() + logger.info("carbGetSdkVersion() = %s", sdk_ver) + except Exception as exc: + logger.info("carbGetSdkVersion() failed: %s", exc) + + libcarb.acquireFramework.restype = ctypes.c_void_p + libcarb.acquireFramework.argtypes = [ctypes.c_char_p, _Version] + fw_ptr = libcarb.acquireFramework(b"isaaclab.cubric", _Version(0, 0)) + if not fw_ptr: + logger.warning("acquireFramework returned null") + return False + logger.info("carb Framework* = 0x%016x", fw_ptr) + + # Dump first several framework function pointers for diagnosis + fw_fn_names = [ + "loadPluginsEx", + "unloadAllPlugins", + "acquireInterfaceWithClient", + "tryAcquireInterfaceWithClient", + ] + _dump_fn_ptrs(fw_ptr, fw_fn_names, "Framework") + + # Read tryAcquireInterfaceWithClient fn-ptr from Framework. + try_acquire_addr = _read_u64(fw_ptr + _FW_OFF_TRY_ACQUIRE) + if try_acquire_addr == 0: + logger.warning("tryAcquireInterfaceWithClient is null in Framework") + return False + logger.info("tryAcquireInterfaceWithClient addr = 0x%016x", try_acquire_addr) + + try_acquire_fn = ctypes.CFUNCTYPE( + ctypes.c_void_p, # return: void* (IAdapter*) + ctypes.c_char_p, # clientName + _InterfaceDesc, # desc (by value) + ctypes.c_char_p, # pluginName + )(try_acquire_addr) + + desc = _InterfaceDesc( + name=b"omni::cubric::IAdapter", + version=_Version(0, 1), + ) + logger.info( + "Calling tryAcquireInterfaceWithClient(" + "client=%r, iface=%r, ver=%d.%d, plugin=%r)", + b"carb.scripting-python.plugin", + desc.name, + desc.version.major, + desc.version.minor, + None, + ) + ia_ptr = try_acquire_fn(b"carb.scripting-python.plugin", desc, None) + if not ia_ptr: + # Try without client name restriction + logger.info("First attempt returned null; retrying with client=None") + ia_ptr = try_acquire_fn(None, desc, None) + if not ia_ptr: + # Try acquireInterfaceWithClient (offset 16) which logs errors + logger.info("tryAcquire returned null; trying acquireInterfaceWithClient") + acquire_addr = _read_u64(fw_ptr + 16) + if acquire_addr: + acquire_fn = ctypes.CFUNCTYPE( + ctypes.c_void_p, + ctypes.c_char_p, + _InterfaceDesc, + ctypes.c_char_p, + )(acquire_addr) + ia_ptr = acquire_fn(b"isaaclab.cubric", desc, None) + if not ia_ptr: + logger.warning( + "All IAdapter acquisition attempts returned null — " + "cubric plugin may not be registered or interface mismatch" + ) + return False + self._ia_ptr = ia_ptr + logger.info("IAdapter* = 0x%016x", ia_ptr) + + # Dump all IAdapter function pointers + ia_fn_names = [ + "getAttribute", "create", "refcount", "retain", + "release", "bindToStage", "unbind", "compute", + ] + _dump_fn_ptrs(ia_ptr, ia_fn_names, "IAdapter") + + # Wrap the four IAdapter function pointers we need. + create_addr = _read_u64(ia_ptr + _IA_OFF_CREATE) + release_addr = _read_u64(ia_ptr + _IA_OFF_RELEASE) + bind_addr = _read_u64(ia_ptr + _IA_OFF_BIND) + compute_addr = _read_u64(ia_ptr + _IA_OFF_COMPUTE) + + if not all([create_addr, release_addr, bind_addr, compute_addr]): + logger.warning("One or more IAdapter function pointers are null") + return False + + # create(AdapterId* out) -> bool + self._create_fn = ctypes.CFUNCTYPE( + ctypes.c_bool, ctypes.POINTER(ctypes.c_uint64), + )(create_addr) + + # release(AdapterId) -> bool + self._release_fn = ctypes.CFUNCTYPE( + ctypes.c_bool, ctypes.c_uint64, + )(release_addr) + + # bindToStage(AdapterId, const FabricId&) -> bool + # FabricId is uint64, passed by const-ref → pointer on x86_64 + self._bind_fn = ctypes.CFUNCTYPE( + ctypes.c_bool, ctypes.c_uint64, ctypes.POINTER(ctypes.c_uint64), + )(bind_addr) + + # compute(AdapterId, options, dirtyMode, outAccountFlags*) -> bool + self._compute_fn = ctypes.CFUNCTYPE( + ctypes.c_bool, + ctypes.c_uint64, # adapterId + ctypes.c_uint32, # options (AdapterComputeOptions) + ctypes.c_int32, # dirtyMode (AdapterDirtyMode) + ctypes.c_void_p, # outAccountFlags* (nullable) + )(compute_addr) + + logger.info( + "cubric IAdapter bindings ready (opts=0x%x [eRigidBody|eForceUpdate], dirty=%d [eAll])", + _OPT_DEFAULT, + _DIRTY_ALL, + ) + return True + + @property + def available(self) -> bool: + return self._ia_ptr != 0 + + # -- cubric adapter methods ---------------------------------------------- + + def create_adapter(self) -> int | None: + """Create a cubric adapter. Returns an adapter ID or ``None``.""" + if not self._create_fn: + return None + adapter_id = ctypes.c_uint64(_INVALID_ADAPTER_ID) + ok = self._create_fn(ctypes.byref(adapter_id)) + if not ok or adapter_id.value == _INVALID_ADAPTER_ID: + logger.warning("IAdapter::create failed") + return None + logger.info("cubric adapter created (id=%d)", adapter_id.value) + return adapter_id.value + + def bind_to_stage(self, adapter_id: int, fabric_id: int) -> bool: + """Bind the adapter to a Fabric stage.""" + if not self._bind_fn: + return False + fid = ctypes.c_uint64(fabric_id) + ok = self._bind_fn(adapter_id, ctypes.byref(fid)) + if not ok: + logger.warning("IAdapter::bindToStage failed (adapter=%d, fabricId=%d)", adapter_id, fabric_id) + elif self._log_count < 3: + logger.info("IAdapter::bindToStage ok (adapter=%d, fabricId=%d)", adapter_id, fabric_id) + return ok + + def compute(self, adapter_id: int) -> bool: + """Run the GPU transform-hierarchy compute pass. + + Uses ``eRigidBody | eForceUpdate`` with ``eAll`` dirty mode. + ``eRigidBody`` makes cubric apply Inverse propagation on buckets + tagged with ``PhysicsRigidBodyAPI`` (keeps Newton's world transforms, + derives local) and Forward on everything else (propagates to children). + ``eForceUpdate`` bypasses the change-listener dirty check. + """ + if not self._compute_fn: + return False + flags = ctypes.c_uint32(0) + ok = self._compute_fn(adapter_id, _OPT_DEFAULT, _DIRTY_ALL, ctypes.byref(flags)) + if not ok: + logger.warning("IAdapter::compute returned false (flags=0x%x)", flags.value) + return ok + + def release_adapter(self, adapter_id: int) -> None: + """Release an adapter.""" + if not adapter_id or not self._release_fn: + return + self._release_fn(adapter_id) diff --git a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py index cd9be10c49a1..50bf97e70e5a 100644 --- a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py +++ b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py @@ -104,6 +104,10 @@ class NewtonManager(PhysicsManager): _clone_physics_only = False _transforms_dirty: bool = False + # cubric GPU transform hierarchy (replaces CPU update_world_xforms) + _cubric = None + _cubric_adapter: int | None = None + # Model changes (callbacks use unified system from PhysicsManager) _model_changes: set[int] = set() @@ -174,6 +178,12 @@ def sync_transforms_to_usd(cls) -> None: Uses ``wp.fabricarray`` directly (no ``isaacsim.physics.newton`` extension needed). The Warp kernel reads ``state_0.body_q[newton_index[i]]`` and writes the corresponding ``mat44d`` to ``omni:fabric:worldMatrix`` for each prim. + + When cubric is available the method mirrors PhysX's ``DirectGpuHelper`` + pattern: pause Fabric change tracking, write transforms, resume tracking, + then call ``IAdapter::compute`` on the GPU to propagate the hierarchy and + notify the Fabric Scene Delegate. Otherwise it falls back to the CPU + ``update_world_xforms()`` path. """ if cls._usdrt_stage is None or cls._model is None or cls._state_0 is None: return @@ -182,32 +192,70 @@ def sync_transforms_to_usd(cls) -> None: try: import usdrt - selection = cls._usdrt_stage.SelectPrims( - require_attrs=[ - (usdrt.Sdf.ValueTypeNames.Matrix4d, "omni:fabric:worldMatrix", usdrt.Usd.Access.ReadWrite), - (usdrt.Sdf.ValueTypeNames.UInt, cls._newton_index_attr, usdrt.Usd.Access.Read), - ], - device=str(PhysicsManager._device), - ) - if selection.GetCount() == 0: - return - fabric_transforms = wp.fabricarray(selection, "omni:fabric:worldMatrix") - newton_indices = wp.fabricarray(selection, cls._newton_index_attr) - wp.launch( - _set_fabric_transforms, - dim=newton_indices.shape[0], - inputs=[fabric_transforms, newton_indices, cls._state_0.body_q], - device=PhysicsManager._device, - ) - wp.synchronize_device(PhysicsManager._device) + # Lazy adapter creation: deferred from initialize_solver() to avoid + # startup-ordering issues with the cubric plugin. + if cls._cubric is not None and cls._cubric.available and cls._cubric_adapter is None: + cls._cubric_adapter = cls._cubric.create_adapter() + if cls._cubric_adapter is not None: + logger.info("cubric GPU transform hierarchy enabled") + else: + logger.warning("cubric adapter creation failed; falling back to update_world_xforms()") + cls._cubric = None + + use_cubric = cls._cubric is not None and cls._cubric_adapter is not None + + fabric_hierarchy = None if hasattr(usdrt, "hierarchy"): fabric_hierarchy = usdrt.hierarchy.IFabricHierarchy().get_fabric_hierarchy( cls._usdrt_stage.GetFabricId(), cls._usdrt_stage.GetStageIdAsStageId() ) - fabric_hierarchy.update_world_xforms() - cls._transforms_dirty = False - except Exception as exc: - logger.debug("[NewtonManager] sync_transforms_to_usd: %s", exc) + + # Pause hierarchy change tracking BEFORE SelectPrims. + # SelectPrims with ReadWrite access calls getAttributeArrayGpu + # internally, which marks Fabric buffers dirty. If tracking is + # still active at that point the hierarchy records the change and + # Kit's updateWorldXforms will do an expensive connectivity + # rebuild every frame. PhysX avoids this via ScopedUSDRT which + # pauses tracking before any Fabric writes. + if use_cubric and fabric_hierarchy is not None: + fabric_hierarchy.track_world_xform_changes(False) + fabric_hierarchy.track_local_xform_changes(False) + + try: + selection = cls._usdrt_stage.SelectPrims( + require_attrs=[ + (usdrt.Sdf.ValueTypeNames.Matrix4d, "omni:fabric:worldMatrix", usdrt.Usd.Access.ReadWrite), + (usdrt.Sdf.ValueTypeNames.UInt, cls._newton_index_attr, usdrt.Usd.Access.Read), + ], + device=str(PhysicsManager._device), + ) + if selection.GetCount() == 0: + return + + fabric_transforms = wp.fabricarray(selection, "omni:fabric:worldMatrix") + newton_indices = wp.fabricarray(selection, cls._newton_index_attr) + wp.launch( + _set_fabric_transforms, + dim=newton_indices.shape[0], + inputs=[fabric_transforms, newton_indices, cls._state_0.body_q], + device=PhysicsManager._device, + ) + wp.synchronize_device(PhysicsManager._device) + + cls._transforms_dirty = False + + if use_cubric and fabric_hierarchy is not None: + fabric_id = cls._usdrt_stage.GetFabricId().id + cls._cubric.bind_to_stage(cls._cubric_adapter, fabric_id) + cls._cubric.compute(cls._cubric_adapter) + elif fabric_hierarchy is not None: + fabric_hierarchy.update_world_xforms() + finally: + if use_cubric and fabric_hierarchy is not None: + fabric_hierarchy.track_world_xform_changes(True) + fabric_hierarchy.track_local_xform_changes(True) + except Exception: + logger.exception("[NewtonManager] sync_transforms_to_usd FAILED") @classmethod def _mark_transforms_dirty(cls) -> None: @@ -288,6 +336,10 @@ def is_fabric_enabled(cls) -> bool: @classmethod def clear(cls): """Clear all Newton-specific state (callbacks cleared by super().close()).""" + if cls._cubric is not None and cls._cubric_adapter is not None: + cls._cubric.release_adapter(cls._cubric_adapter) + cls._cubric = None + cls._cubric_adapter = None cls._builder = None cls._model = None cls._solver = None @@ -365,6 +417,10 @@ def start_simulation(cls) -> None: prim = cls._usdrt_stage.GetPrimAtPath(prim_path) prim.CreateAttribute(cls._newton_index_attr, usdrt.Sdf.ValueTypeNames.UInt, True) prim.GetAttribute(cls._newton_index_attr).Set(i) + # Tag with PhysicsRigidBodyAPI so cubric's eRigidBody mode + # applies Inverse propagation (preserves Newton's world + # transforms and derives local) instead of Forward. + prim.AddAppliedSchema("PhysicsRigidBodyAPI") xformable_prim = usdrt.Rt.Xformable(prim) if not xformable_prim.HasWorldXform(): xformable_prim.SetWorldXformFromUsd() @@ -529,6 +585,20 @@ def initialize_solver(cls) -> None: # Initialize contacts and collision pipeline cls._initialize_contacts() + # Prepare cubric ctypes bindings (acquires IAdapter from carb framework). + # Adapter creation is deferred to the first sync_transforms_to_usd() call + # at render time to avoid any startup-ordering issues with the cubric + # plugin initialisation. + if cls._usdrt_stage is not None: + from isaaclab_newton.physics._cubric import CubricBindings + + cls._cubric = CubricBindings() + if cls._cubric.initialize(): + logger.info("cubric bindings ready (adapter deferred to first render)") + else: + logger.warning("cubric bindings init failed; falling back to update_world_xforms()") + cls._cubric = None + device = PhysicsManager._device use_cuda_graph = cfg.use_cuda_graph and "cuda" in device # type: ignore[union-attr] From c383820deb63a149020784a60eef974cb7e87dd6 Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Fri, 13 Mar 2026 09:17:49 -0700 Subject: [PATCH 6/8] Remove debugging code. --- .../isaaclab_newton/physics/_cubric.py | 105 +++--------------- 1 file changed, 14 insertions(+), 91 deletions(-) diff --git a/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py b/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py index 68453ee77a93..f9c4d0da4454 100644 --- a/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py +++ b/source/isaaclab_newton/isaaclab_newton/physics/_cubric.py @@ -93,14 +93,6 @@ def _read_u64(addr: int) -> int: return ctypes.c_uint64.from_address(addr).value -def _dump_fn_ptrs(base: int, names: list[str], label: str) -> None: - """Log function pointer values at 8-byte intervals from *base*.""" - for i, name in enumerate(names): - addr = _read_u64(base + i * 8) - tag = "OK" if addr else "NULL" - logger.info(" %s+%d (%s) = 0x%016x [%s]", label, i * 8, name, addr, tag) - - # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- @@ -117,7 +109,6 @@ def __init__(self) -> None: self._release_fn = None self._bind_fn = None self._compute_fn = None - self._log_count = 0 # -- lifecycle ----------------------------------------------------------- @@ -128,13 +119,11 @@ def initialize(self) -> bool: import omni.kit.app ext_mgr = omni.kit.app.get_app().get_extension_manager() - cubric_enabled = ext_mgr.is_extension_enabled("omni.cubric") - logger.info("omni.cubric extension enabled: %s", cubric_enabled) - if not cubric_enabled: - logger.info("Enabling omni.cubric extension") + if not ext_mgr.is_extension_enabled("omni.cubric"): ext_mgr.set_extension_enabled_immediate("omni.cubric", True) - cubric_enabled = ext_mgr.is_extension_enabled("omni.cubric") - logger.info("omni.cubric after enable: %s", cubric_enabled) + if not ext_mgr.is_extension_enabled("omni.cubric"): + logger.warning("Failed to enable omni.cubric extension") + return False except Exception as exc: logger.warning("Cannot enable omni.cubric: %s", exc) return False @@ -146,55 +135,18 @@ def initialize(self) -> bool: logger.warning("Could not load libcarb.so") return False - # Check which symbols libcarb exports for framework access - for sym_name in ("acquireFramework", "carbGetSdkVersion", "isFrameworkValid"): - try: - sym = getattr(libcarb, sym_name, None) - logger.info("libcarb.%s: %s", sym_name, "found" if sym else "missing") - except Exception: - logger.info("libcarb.%s: not accessible", sym_name) - - # Verify framework is alive - try: - libcarb.isFrameworkValid.restype = ctypes.c_bool - libcarb.isFrameworkValid.argtypes = [] - fw_valid = libcarb.isFrameworkValid() - logger.info("isFrameworkValid() = %s", fw_valid) - except Exception as exc: - logger.warning("isFrameworkValid() failed: %s", exc) - - # Get SDK version for diagnostics - try: - libcarb.carbGetSdkVersion.restype = ctypes.c_char_p - libcarb.carbGetSdkVersion.argtypes = [] - sdk_ver = libcarb.carbGetSdkVersion() - logger.info("carbGetSdkVersion() = %s", sdk_ver) - except Exception as exc: - logger.info("carbGetSdkVersion() failed: %s", exc) - libcarb.acquireFramework.restype = ctypes.c_void_p libcarb.acquireFramework.argtypes = [ctypes.c_char_p, _Version] fw_ptr = libcarb.acquireFramework(b"isaaclab.cubric", _Version(0, 0)) if not fw_ptr: logger.warning("acquireFramework returned null") return False - logger.info("carb Framework* = 0x%016x", fw_ptr) - - # Dump first several framework function pointers for diagnosis - fw_fn_names = [ - "loadPluginsEx", - "unloadAllPlugins", - "acquireInterfaceWithClient", - "tryAcquireInterfaceWithClient", - ] - _dump_fn_ptrs(fw_ptr, fw_fn_names, "Framework") - - # Read tryAcquireInterfaceWithClient fn-ptr from Framework. + + # Read tryAcquireInterfaceWithClient fn-ptr from Framework vtable. try_acquire_addr = _read_u64(fw_ptr + _FW_OFF_TRY_ACQUIRE) if try_acquire_addr == 0: logger.warning("tryAcquireInterfaceWithClient is null in Framework") return False - logger.info("tryAcquireInterfaceWithClient addr = 0x%016x", try_acquire_addr) try_acquire_fn = ctypes.CFUNCTYPE( ctypes.c_void_p, # return: void* (IAdapter*) @@ -207,24 +159,14 @@ def initialize(self) -> bool: name=b"omni::cubric::IAdapter", version=_Version(0, 1), ) - logger.info( - "Calling tryAcquireInterfaceWithClient(" - "client=%r, iface=%r, ver=%d.%d, plugin=%r)", - b"carb.scripting-python.plugin", - desc.name, - desc.version.major, - desc.version.minor, - None, - ) + + # Try several acquisition strategies — the required client name + # varies across Kit configurations. ia_ptr = try_acquire_fn(b"carb.scripting-python.plugin", desc, None) if not ia_ptr: - # Try without client name restriction - logger.info("First attempt returned null; retrying with client=None") ia_ptr = try_acquire_fn(None, desc, None) if not ia_ptr: - # Try acquireInterfaceWithClient (offset 16) which logs errors - logger.info("tryAcquire returned null; trying acquireInterfaceWithClient") - acquire_addr = _read_u64(fw_ptr + 16) + acquire_addr = _read_u64(fw_ptr + 16) # acquireInterfaceWithClient if acquire_addr: acquire_fn = ctypes.CFUNCTYPE( ctypes.c_void_p, @@ -235,19 +177,11 @@ def initialize(self) -> bool: ia_ptr = acquire_fn(b"isaaclab.cubric", desc, None) if not ia_ptr: logger.warning( - "All IAdapter acquisition attempts returned null — " - "cubric plugin may not be registered or interface mismatch" + "Could not acquire omni::cubric::IAdapter — " + "cubric plugin may not be registered or interface version mismatch" ) return False self._ia_ptr = ia_ptr - logger.info("IAdapter* = 0x%016x", ia_ptr) - - # Dump all IAdapter function pointers - ia_fn_names = [ - "getAttribute", "create", "refcount", "retain", - "release", "bindToStage", "unbind", "compute", - ] - _dump_fn_ptrs(ia_ptr, ia_fn_names, "IAdapter") # Wrap the four IAdapter function pointers we need. create_addr = _read_u64(ia_ptr + _IA_OFF_CREATE) @@ -259,23 +193,19 @@ def initialize(self) -> bool: logger.warning("One or more IAdapter function pointers are null") return False - # create(AdapterId* out) -> bool self._create_fn = ctypes.CFUNCTYPE( ctypes.c_bool, ctypes.POINTER(ctypes.c_uint64), )(create_addr) - # release(AdapterId) -> bool self._release_fn = ctypes.CFUNCTYPE( ctypes.c_bool, ctypes.c_uint64, )(release_addr) - # bindToStage(AdapterId, const FabricId&) -> bool - # FabricId is uint64, passed by const-ref → pointer on x86_64 + # FabricId is uint64, passed by const-ref -> pointer on x86_64 self._bind_fn = ctypes.CFUNCTYPE( ctypes.c_bool, ctypes.c_uint64, ctypes.POINTER(ctypes.c_uint64), )(bind_addr) - # compute(AdapterId, options, dirtyMode, outAccountFlags*) -> bool self._compute_fn = ctypes.CFUNCTYPE( ctypes.c_bool, ctypes.c_uint64, # adapterId @@ -284,11 +214,7 @@ def initialize(self) -> bool: ctypes.c_void_p, # outAccountFlags* (nullable) )(compute_addr) - logger.info( - "cubric IAdapter bindings ready (opts=0x%x [eRigidBody|eForceUpdate], dirty=%d [eAll])", - _OPT_DEFAULT, - _DIRTY_ALL, - ) + logger.info("cubric IAdapter bindings ready") return True @property @@ -306,7 +232,6 @@ def create_adapter(self) -> int | None: if not ok or adapter_id.value == _INVALID_ADAPTER_ID: logger.warning("IAdapter::create failed") return None - logger.info("cubric adapter created (id=%d)", adapter_id.value) return adapter_id.value def bind_to_stage(self, adapter_id: int, fabric_id: int) -> bool: @@ -317,8 +242,6 @@ def bind_to_stage(self, adapter_id: int, fabric_id: int) -> bool: ok = self._bind_fn(adapter_id, ctypes.byref(fid)) if not ok: logger.warning("IAdapter::bindToStage failed (adapter=%d, fabricId=%d)", adapter_id, fabric_id) - elif self._log_count < 3: - logger.info("IAdapter::bindToStage ok (adapter=%d, fabricId=%d)", adapter_id, fabric_id) return ok def compute(self, adapter_id: int) -> bool: From b9870dbca1d298902153c079277d1093e8882791 Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Fri, 13 Mar 2026 09:22:22 -0700 Subject: [PATCH 7/8] Revert "write images" This reverts commit 46830e6a6e3150da5f0fb378528ab78253e0dea0. --- .../renderers/isaac_rtx_renderer.py | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py index 143ca6643e91..22b07f13def0 100644 --- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py +++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py @@ -8,9 +8,7 @@ from __future__ import annotations import json -import logging import math -import os import weakref from dataclasses import dataclass from typing import TYPE_CHECKING, Any @@ -50,12 +48,6 @@ class IsaacRtxRenderData: sensor: SensorBase | None = None -_logger = logging.getLogger(__name__) - -_TILED_DUMP_DIR = "/tmp/il-rtx" -_tiled_frame_counter = 0 - - class IsaacRtxRenderer(BaseRenderer): """Isaac RTX backend using Omniverse Replicator for tiled camera rendering. @@ -256,10 +248,6 @@ def tiling_grid_shape(): if data_type in SIMPLE_SHADING_MODES: tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous() - # Dump the tiled image to disk (first visual data type per frame only) - #if tiled_data_buffer.dtype == wp.uint8: - # self._save_tiled_image(tiled_data_buffer) - wp.launch( kernel=reshape_tiled_image, dim=(view_count, cfg.height, cfg.width), @@ -292,20 +280,6 @@ def tiling_grid_shape(): 0.0 if cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1] ) - def _save_tiled_image(self, tiled_data: wp.array): - """Save the raw tiled render product to disk as a PNG.""" - global _tiled_frame_counter - os.makedirs(_TILED_DUMP_DIR, exist_ok=True) - path = os.path.join(_TILED_DUMP_DIR, f"frame_{_tiled_frame_counter:06d}.png") - _tiled_frame_counter += 1 - try: - from PIL import Image - - img_np = wp.to_torch(tiled_data).cpu().numpy() - Image.fromarray(img_np).save(path) - except Exception: - _logger.warning("Failed to save tiled image to %s", path, exc_info=True) - def write_output(self, render_data: IsaacRtxRenderData, output_name: str, output_data: torch.Tensor): """No-op for Isaac RTX - all outputs written in render(). See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.write_output`.""" From a0248bc61e941b4401c5fe97f98f056766868cec Mon Sep 17 00:00:00 2001 From: Nathan Cournia Date: Fri, 13 Mar 2026 09:59:25 -0700 Subject: [PATCH 8/8] Simplify cuda graph code. --- .../isaaclab_newton/physics/newton_manager.py | 41 +++---------------- 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py index 50bf97e70e5a..ca0c8b52eac1 100644 --- a/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py +++ b/source/isaaclab_newton/isaaclab_newton/physics/newton_manager.py @@ -780,44 +780,13 @@ def step_fn(state_0, state_1): @classmethod def _simulate(cls) -> None: - """Run one simulation step with substeps.""" + """Run one simulation step with substeps and USD sync. - # MJWarp can use its internal collision pipeline. - if cls._needs_collision_pipeline: - cls._collision_pipeline.collide(cls._state_0, cls._contacts) - contacts = cls._contacts - else: - contacts = None - - def step_fn(state_0, state_1): - cls._solver.step(state_0, state_1, cls._control, contacts, cls._solver_dt) - - if cls._use_single_state: - for i in range(cls._num_substeps): - step_fn(cls._state_0, cls._state_0) - cls._state_0.clear_forces() - else: - cfg = PhysicsManager._cfg - need_copy_on_last_substep = (cfg is not None and cfg.use_cuda_graph) and cls._num_substeps % 2 == 1 # type: ignore[union-attr] - - for i in range(cls._num_substeps): - step_fn(cls._state_0, cls._state_1) - if need_copy_on_last_substep and i == cls._num_substeps - 1: - cls._state_0.assign(cls._state_1) - else: - cls._state_0, cls._state_1 = cls._state_1, cls._state_0 - cls._state_0.clear_forces() - - # Populate contacts for contact sensors - if cls._report_contacts: - # For newton_contacts (unified pipeline): use locally computed contacts - # For mujoco_contacts: use class-level _contacts, solver populates it from MuJoCo data - eval_contacts = contacts if contacts is not None else cls._contacts - cls._solver.update_contacts(eval_contacts, cls._state_0) - for sensor in cls._newton_contact_sensors.values(): - sensor.update(cls._state_0, eval_contacts) + Delegates physics work to :meth:`_simulate_physics_only` and then + marks transforms dirty for the next render-cadence sync. + """ + cls._simulate_physics_only() - # Mark transforms dirty so the next render-cadence sync picks them up. if cls._usdrt_stage is not None: cls._mark_transforms_dirty()