From 3e598c84ed665f69ffe5741d0917d61af1dbb5b7 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 10:34:52 -0500
Subject: [PATCH 01/30] start

---
 src/batch.jl  | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/kernel.jl |   1 -
 src/time.jl   |   2 +-
 3 files changed, 212 insertions(+), 2 deletions(-)
 create mode 100644 src/batch.jl

diff --git a/src/batch.jl b/src/batch.jl
new file mode 100644
index 0000000..3310067
--- /dev/null
+++ b/src/batch.jl
@@ -0,0 +1,211 @@
+#= 
+
+BatchParam is very similar to TimeDependentParam, but instead 
+of storing a function, it stores an arbitrary array of parameters.
+
+Given a batch = [k1, k2, k3], batch parameters are seen by the particles as
+
+Particle 1: k1
+Particle 2: k2
+Particle 3: k3
+Particle 4: k1
+Particle 5: k2
+Particle 6: k3
+
+etc.
+
+Just like time, there are two types for batches - one type unstable 
+generic wrapper for an AbstractArray/Number which is manipulated at the 
+highest level, and a lowered representation where the numbers are made
+numbers and arrays are kept but the length of the array is stored in 
+the type. The reason for this is so that scalars are not unnecessarily
+represented as large arrays, to save memory both outside and inside the 
+kernel. The goal is that for large, batched simulations the type instability
+of the UNPACKING step (simulation step is always type stable) is outweighted 
+by the simulation step.
+
+The lowered type does NOT have any arithmetic operations defined on it, as 
+it should be untouched after lowering and in the kernel evaluated for each 
+particle.
+
+=#
+struct BatchParam
+  batch::Union{AbstractArray,Number}
+  #=
+    Currently, batches of TimeDependentParam are not supported
+    It is essentially impossible on the GPU because accessing an 
+    array of functions is not type stable (every particle has its 
+    own function). The alternative - a time function which returns a
+    big array - is also not possible for CPU-SIMD and would run into 
+    memory problems on both CPU and GPU depending on how large that 
+    static array is.
+    
+    On the CPU, the first option may be doable using FunctionWrappers,
+    but that would require special handling because current Time
+    uses bona-fide Julia functions for GPU compatibility.
+
+    If one would like to do this right now, they should just start up 
+    separate processes where each process has its own lattice with its 
+    own TimeDependentParams.
+  =#
+end
+
+struct _LoweredBatchParam{N,V<:AbstractArray}
+  batch::V
+  _LoweredBatchParam(batch::AbstractArray) = new{length(batch),typeof(batch)}(batch)
+  _LoweredBatchParam{N}(batch::AbstractArray) where {N} = new{N,typeof(batch)}(batch)
+end
+
+# Necessary for GPU compatibility if batch is a GPU array
+function Adapt.adapt_structure(to, lbp::_LoweredBatchParam{N}) where {N}
+    batch = Adapt.adapt_structure(to, lbp.batch)
+    return _LoweredBatchParam{N,typeof(batch)}(batch)
+end
+
+# BatchParam will act like a number
+# Conversion of types to BatchParam
+BatchParam(a::BatchParam) = a
+
+# Make these apply via convert
+Base.convert(::Type{BatchParam}, a::Number) = BatchParam(a) # Scalar BatchParam
+Base.convert(::Type{BatchParam}, a::BatchParam) = a
+
+Base.zero(b::BatchParam) = BatchParam(zero(first(b.batch)))
+Base.one(b::BatchParam)  = BatchParam(one(first(b.batch))) 
+
+# Now define the math operations:
+for op in (:+,:-,:*,:/,:^)
+  @eval begin
+    Base.$op(ba::BatchParam, b::Number)   = (let b = b; return BatchParam(map(x->$op(x, b), ba.batch)); end)
+    Base.$op(a::Number,   bb::BatchParam) = (let a = a; return BatchParam(map(x->$op(a, x), bb.batch)); end)
+    function Base.$op(ba::BatchParam, bb::BatchParam)
+      if !(ba.batch isa Number) && !(bb.batch isa Number) && length(ba.batch) != length(bb.batch)
+        error("Cannot perform operation $($op) with two non-scalar BatchParams of differing 
+               lengths (received lengths $(length(ba.batch)) and $(length(bb.batch))).")
+      end
+      return BatchParam(map((x,y)->$op(x, y), ba.batch, bb.batch))
+    end
+  end
+end
+
+function Base.literal_pow(::typeof(^), ba::BatchParam, ::Val{N}) where {N}
+  return BatchParam(map(x->Base.literal_pow(^, x, Val{N}()), ba.batch))
+end
+
+for t = (:+, :-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv,
+  :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, :float,
+  :csch, :acsc, :acsch, :sec, :sech, :asec, :asech, :conj, :log10, :isnan, :sign, :abs)
+  @eval begin
+    Base.$t(b::BatchParam) = BatchParam(map(x->($t)(x), b.batch))
+  end
+end
+
+atan2(b1::BatchParam, b2::BatchParam) = BatchParam(map((x,y)->atan2(x,y), b1.batch, b2.batch))
+
+for t = (:unit, :sincu, :sinhc, :sinhcu, :asinc, :asincu, :asinhc, :asinhcu, :erf, 
+         :erfc, :erfcx, :erfi, :wf, :rect)
+  @eval begin
+    GTPSA.$t(b::BatchParam) = BatchParam(map(x->($t)(x), b.batch))
+  end
+end
+
+
+Base.promote_rule(::Type{BatchParam}, ::Type{U}) where {U<:Number} = BatchParam
+Base.broadcastable(o::BatchParam) = Ref(o)
+
+Base.isapprox(b::BatchParam, n::Number; kwargs...) = all(x->isapprox(x, n, kwargs...), b.batch)
+Base.isapprox(n::Number, b::BatchParam; kwargs...) = all(x->isapprox(n, x, kwargs...), b.batch)
+Base.:(==)(b::BatchParam, n::Number) = all(x->x == n, b.batch)
+Base.:(==)(n::Number, b::BatchParam) = all(x->n == x, b.batch)
+Base.isinf(b::BatchParam) = all(x->isinf(x), b.batch)
+
+#=
+@inline teval(f::TimeFunction, t) = f(t)
+@inline teval(f, t) = f
+
+# === THIS BLOCK WAS WRITTEN BY CLAUDE ===
+# Generated function for arbitrary-length tuples
+@generated function teval(f::T, t) where {T<:Tuple}
+    N = length(T.parameters)
+    if N == 0
+        return :(())
+    end
+    # Use getfield with literal integer arguments
+    exprs = [:(teval(Base.getfield(f, $i), t)) for i in 1:N]
+    return :(tuple($(exprs...)))
+end
+# === END CLAUDE ===
+=#
+
+# Batch lowering should convert types to _LoweredBatchParam
+function batch_lower(b::BatchParam)
+  if b.batch isa AbstractArray
+    return _LoweredBatchParam(b.batch)
+  else
+    return b.batch
+  end
+end
+
+batch_lower(tp::TimeDependentParam) = tp.f
+batch_lower(tp) = tp
+# We can use map on the CPU, but not the GPU. This step of time_lower-ing is on 
+# the CPU and we are already type unstable here anyways, so we should do this.
+time_lower(tp::T) where {T<:Tuple} = map(ti->time_lower(ti), tp)
+
+# Arrays MUST be converted into tuples, for SIMD
+time_lower(tp::SArray{N,TimeDependentParam}) where {N} = time_lower(Tuple(tp))
+
+static_timecheck(tp) = false
+static_timecheck(::TimeFunction) = true
+@unroll function static_timecheck(t::Tuple)
+  @unroll for ti in t
+    if static_timecheck(ti)
+      return true
+    end
+  end
+  return false
+end
+#static_timecheck(tp::T) where {T<:Tuple} = Val{any(t->static_timecheck(t) isa Val{true}, tp)}()
+
+
+
+batch_lower(b) = b
+batch_lower(b::Tuple) = map(bi->batch_lower(bi), b)
+function batch_lower(ba::SArray{N,BatchParam}) where {N}
+  f = Tuple(map(bi->bi.batch, ba))
+  return TimeFunction(t->SArray{N}(map(fi->fi(t), f)))
+end
+time_lower(tp::SArray{N,Any}) where {N} = time_lower(TimeDependentParam.(tp))
+
+static_timecheck(tp) = false
+static_timecheck(::TimeFunction) = true
+@unroll function static_timecheck(t::Tuple)
+  @unroll for ti in t
+    if static_timecheck(ti)
+      return true
+    end
+  end
+  return false
+end
+
+"""
+    lane2vec(lane::SIMD.VecRange{N}) 
+    
+Given a SIMD.VecRange, will return an equivalent SIMD.Vec that
+can be used in arithmetic operations for mapping integer indices
+of particles to a given element in a batch.
+"""
+function lane2vec(lane::SIMD.VecRange{N}) where {N}
+  # Try to match with vector register size, but 
+  # only up to UInt32 -> ~4.3 billion particles, 
+  # probably max on CPU...
+  if Int(pick_vector_width(UInt32)) == N
+    return SIMD.Vec{N,UInt32}(ntuple(i->lane.i+i-1, Val{N}()))
+  else
+    return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}()))
+  end
+end
+
+@inline teval(f::TimeFunction, t) = f(t)
+@inline teval(f, t) = f
+@inline teval(f::Tuple, t) = map(ti->teval(ti, t), f)
diff --git a/src/kernel.jl b/src/kernel.jl
index 07794e1..3fba566 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -64,7 +64,6 @@ end
 function process_args(i, coords, args, ref)
   if !isnothing(ref) && static_timecheck(args) 
     let t = compute_time(coords.v[i,ZI], coords.v[i,PZI], ref)
-      new_args = map(arg->teval(arg, t), args)
       return map(arg->teval(arg, t), args)
     end
   else
diff --git a/src/time.jl b/src/time.jl
index 82c57f0..b7c2e9e 100644
--- a/src/time.jl
+++ b/src/time.jl
@@ -46,7 +46,7 @@ function Base.literal_pow(::typeof(^), da::TimeDependentParam, ::Val{N}) where {
 end
 
 for t = (:+, :-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv,
-  :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, 
+  :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, :float,
   :csch, :acsc, :acsch, :sec, :sech, :asec, :asech, :conj, :log10, :isnan, :sign, :abs)
   @eval begin
     Base.$t(d::TimeDependentParam) = (let f = d.f; return TimeDependentParam((t)-> ($t)(f(t))); end)

From 0515cb10bbe746d8089842673e682c5ca1bd1ba7 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 10:51:28 -0500
Subject: [PATCH 02/30] first batch

---
 src/batch.jl | 81 ++++++++++++++++++----------------------------------
 1 file changed, 28 insertions(+), 53 deletions(-)

diff --git a/src/batch.jl b/src/batch.jl
index 3310067..2ed5b27 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -111,6 +111,8 @@ end
 
 
 Base.promote_rule(::Type{BatchParam}, ::Type{U}) where {U<:Number} = BatchParam
+Base.promote_rule(::Type{BatchParam}, ::Type{TimeDependentParam}) = error("Unable to combine BatchParams with TimeDependentParams")
+Base.promote_rule(::Type{TimeDependentParam}, ::Type{BatchParam}) = error("Unable to combine BatchParams with TimeDependentParams")
 Base.broadcastable(o::BatchParam) = Ref(o)
 
 Base.isapprox(b::BatchParam, n::Number; kwargs...) = all(x->isapprox(x, n, kwargs...), b.batch)
@@ -119,69 +121,28 @@ Base.:(==)(b::BatchParam, n::Number) = all(x->x == n, b.batch)
 Base.:(==)(n::Number, b::BatchParam) = all(x->n == x, b.batch)
 Base.isinf(b::BatchParam) = all(x->isinf(x), b.batch)
 
-#=
-@inline teval(f::TimeFunction, t) = f(t)
-@inline teval(f, t) = f
-
-# === THIS BLOCK WAS WRITTEN BY CLAUDE ===
-# Generated function for arbitrary-length tuples
-@generated function teval(f::T, t) where {T<:Tuple}
-    N = length(T.parameters)
-    if N == 0
-        return :(())
-    end
-    # Use getfield with literal integer arguments
-    exprs = [:(teval(Base.getfield(f, $i), t)) for i in 1:N]
-    return :(tuple($(exprs...)))
-end
-# === END CLAUDE ===
-=#
-
 # Batch lowering should convert types to _LoweredBatchParam
 function batch_lower(b::BatchParam)
   if b.batch isa AbstractArray
-    return _LoweredBatchParam(b.batch)
+    return _LoweredBatchParam(b.batch) # Only arrays are lowered to batchparams
   else
     return b.batch
   end
 end
 
-batch_lower(tp::TimeDependentParam) = tp.f
-batch_lower(tp) = tp
-# We can use map on the CPU, but not the GPU. This step of time_lower-ing is on 
+batch_lower(bp) = bp
+# We can use map on the CPU, but not the GPU. This step of batch_lower-ing is on 
 # the CPU and we are already type unstable here anyways, so we should do this.
-time_lower(tp::T) where {T<:Tuple} = map(ti->time_lower(ti), tp)
+batch_lower(bp::T) where {T<:Tuple} = map(bi->batch_lower(bi), bp)
 
 # Arrays MUST be converted into tuples, for SIMD
-time_lower(tp::SArray{N,TimeDependentParam}) where {N} = time_lower(Tuple(tp))
-
-static_timecheck(tp) = false
-static_timecheck(::TimeFunction) = true
-@unroll function static_timecheck(t::Tuple)
-  @unroll for ti in t
-    if static_timecheck(ti)
-      return true
-    end
-  end
-  return false
-end
-#static_timecheck(tp::T) where {T<:Tuple} = Val{any(t->static_timecheck(t) isa Val{true}, tp)}()
-
+batch_lower(bp::SArray{N,BatchParam}) where {N} = batch_lower(Tuple(bp))
 
-
-batch_lower(b) = b
-batch_lower(b::Tuple) = map(bi->batch_lower(bi), b)
-function batch_lower(ba::SArray{N,BatchParam}) where {N}
-  f = Tuple(map(bi->bi.batch, ba))
-  return TimeFunction(t->SArray{N}(map(fi->fi(t), f)))
-end
-time_lower(tp::SArray{N,Any}) where {N} = time_lower(TimeDependentParam.(tp))
-
-static_timecheck(tp) = false
-static_timecheck(::TimeFunction) = true
-@unroll function static_timecheck(t::Tuple)
+static_batchcheck(bp) = false
+static_batchcheck(::BatchParam) = true
+@unroll function static_batchcheck(t::Tuple)
   @unroll for ti in t
-    if static_timecheck(ti)
+    if static_batchcheck(ti)
       return true
     end
   end
@@ -206,6 +167,20 @@ function lane2vec(lane::SIMD.VecRange{N}) where {N}
   end
 end
 
-@inline teval(f::TimeFunction, t) = f(t)
-@inline teval(f, t) = f
-@inline teval(f::Tuple, t) = map(ti->teval(ti, t), f)
+lane2vec(i) = i
+
+@inline beval(b::_LoweredBatchParam, i) = b.batch[lane2vec(i)]
+@inline beval(b, i) = b
+
+# === THIS BLOCK WAS WRITTEN BY CLAUDE ===
+# Generated function for arbitrary-length tuples
+@generated function beval(f::T, t) where {T<:Tuple}
+    N = length(T.parameters)
+    if N == 0
+        return :(())
+    end
+    # Use getfield with literal integer arguments
+    exprs = [:(beval(Base.getfield(f, $i), t)) for i in 1:N]
+    return :(tuple($(exprs...)))
+end
+# === END CLAUDE ===
\ No newline at end of file

From e298f27c2133cd72078d7c199aca5d0c0c25c231 Mon Sep 17 00:00:00 2001
From: Joseph Devlin <jpd285@cornell.edu>
Date: Wed, 4 Feb 2026 11:54:35 -0500
Subject: [PATCH 03/30] Fringe locations

---
 ext/BeamTrackingBeamlinesExt/exact.jl   | 12 +++-
 ext/BeamTrackingBeamlinesExt/yoshida.jl | 85 ++++++++++++++++---------
 src/BeamTracking.jl                     |  1 +
 src/kernels/yoshida.jl                  | 28 ++++----
 src/tracking_methods.jl                 | 22 +++++--
 src/utils/quaternions.jl                |  2 +-
 test/BeamlinesExt_test.jl               | 28 ++++----
 test/IntegrationTracking_test.jl        |  4 +-
 8 files changed, 116 insertions(+), 66 deletions(-)

diff --git a/ext/BeamTrackingBeamlinesExt/exact.jl b/ext/BeamTrackingBeamlinesExt/exact.jl
index 5150dc0..42ac898 100644
--- a/ext/BeamTrackingBeamlinesExt/exact.jl
+++ b/ext/BeamTrackingBeamlinesExt/exact.jl
@@ -76,8 +76,16 @@ end
 @inline function thick_bend_pure_bdipole(tm::Exact, bunch, bendparams, bm1, L)
   g = bendparams.g_ref
   tilt = bendparams.tilt_ref
-  e1 = bendparams.e1
-  e2 = bendparams.e2
+  if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == EntranceEnd
+    e1 = bendparams.e1
+  else
+    e1 = 0
+  end
+  if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == ExitEnd
+    e2 = bendparams.e2
+  else
+    e2 = 0
+  end
   w = rot_quaternion(0,0,-tilt)
   w_inv = inv_rot_quaternion(0,0,-tilt)
   theta = g * L
diff --git a/ext/BeamTrackingBeamlinesExt/yoshida.jl b/ext/BeamTrackingBeamlinesExt/yoshida.jl
index 416006f..79c63df 100644
--- a/ext/BeamTrackingBeamlinesExt/yoshida.jl
+++ b/ext/BeamTrackingBeamlinesExt/yoshida.jl
@@ -10,13 +10,13 @@
     ds_step = L / num_steps
   end
   if order == 2
-    return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L))
+    return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
   elseif order == 4
-    return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L))
+    return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
   elseif order == 6
-    return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L))
+    return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
   elseif order == 8
-    return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L))
+    return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
   end
 end
 
@@ -81,7 +81,7 @@ end
     q = chargeof(bunch.species)
     mc2 = massof(bunch.species)
     a = gyromagnetic_anomaly(bunch.species)
-    edge_params = ifelse(tm.fringe_on, (a, tilde_m, Ksol, 0, 0, 0), nothing)
+    edge_params = (a, tilde_m, Ksol, 0, 0, 0)
     E0 = mc2/tilde_m/beta_0
     params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, Ksol, mm, kn, ks)
     photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing)
@@ -97,39 +97,48 @@ end
   Ksol = kn[1]
   q = chargeof(bunch.species)
   mc2 = massof(bunch.species)
+  a = gyromagnetic_anomaly(bunch.species)
+  Kn0 = ifelse(mm[2] == 1, kn[2], 0)
+  edge_params = (a, tilde_m, Ksol, Kn0, 0, 0)
   E0 = mc2/tilde_m/beta_0
-  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), Ksol, mm, kn, ks)
+  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, Ksol, mm, kn, ks)
   photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing)
-  return integration_launcher(BeamTracking.sks_multipole!, params, photon_params, tm, nothing, L)
+  return integration_launcher(BeamTracking.sks_multipole!, params, photon_params, tm, edge_params, L)
 end
 
-@inline function thick_pure_bdipole(tm::DriftKick, bunch, bm, L)
+@inline function thick_pure_bdipole(tm::Union{Yoshida,DriftKick}, bunch, bm, L)
   p_over_q_ref = bunch.p_over_q_ref
   tilde_m, gamsqr_0, beta_0 = BeamTracking.drift_params(bunch.species, p_over_q_ref)
   mm = bm.order
   kn, ks = get_strengths(bm, L, p_over_q_ref)
   q = chargeof(bunch.species)
   mc2 = massof(bunch.species)
+  a = gyromagnetic_anomaly(bunch.species)
+  Kn0 = ifelse(mm == 1, kn, 0)
+  edge_params = (a, tilde_m, 0, Kn0, 0, 0)
   E0 = mc2/tilde_m/beta_0
-  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), SA[mm], SA[kn], SA[ks])
+  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, SA[mm], SA[kn], SA[ks])
   photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, SA[mm], SA[kn], SA[ks]), nothing)
-  return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, nothing, L)
+  return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, edge_params, L)
 end
 
-@inline function thick_bdipole(tm::DriftKick, bunch, bm, L)
+@inline function thick_bdipole(tm::Union{Yoshida,DriftKick}, bunch, bm, L)
   p_over_q_ref = bunch.p_over_q_ref
   tilde_m, gamsqr_0, beta_0 = BeamTracking.drift_params(bunch.species, p_over_q_ref)
   mm = bm.order
   kn, ks = get_strengths(bm, L, p_over_q_ref)
   q = chargeof(bunch.species)
   mc2 = massof(bunch.species)
+  a = gyromagnetic_anomaly(bunch.species)
+  Kn0 = ifelse(mm[1] == 1, kn[1], 0)
+  edge_params = (a, tilde_m, 0, Kn0, 0, 0)
   E0 = mc2/tilde_m/beta_0
-  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), mm, kn, ks)
+  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, mm, kn, ks)
   photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing)
-  return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, nothing, L)
+  return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, edge_params, L)
 end
 
-@inline function thick_pure_bdipole(tm::Union{Yoshida,BendKick}, bunch, bm1, L) 
+@inline function thick_pure_bdipole(tm::BendKick, bunch, bm1, L) 
   if isnothing(bunch.coords.q) && !(tm.radiation_damping_on || tm.radiation_fluctuations_on)
     return thick_pure_bdipole(Exact(), bunch, bm1, L)
   else
@@ -144,9 +153,11 @@ end
     q = chargeof(bunch.species)
     mc2 = massof(bunch.species)
     E0 = mc2/tilde_m/beta_0
-    params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, gyromagnetic_anomaly(bunch.species), 0, w, w_inv, k0, SA[mm], SA[kn], SA[ks])
+    a = gyromagnetic_anomaly(bunch.species)
+    edge_params = (a, tilde_m, 0, k0, 0, 0)
+    params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, a, 0, w, w_inv, k0, SA[mm], SA[kn], SA[ks])
     photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, SA[mm], SA[kn], SA[ks]), nothing)
-    return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, nothing, L)
+    return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, edge_params, L)
   end
 end
 
@@ -161,10 +172,12 @@ end
   w_inv = inv_rot_quaternion(0,0,tilt)
   q = chargeof(bunch.species)
   mc2 = massof(bunch.species)
+  a = gyromagnetic_anomaly(bunch.species)
+  edge_params = (a, tilde_m, 0, k0, 0, 0)
   E0 = mc2/tilde_m/beta_0
-  params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, gyromagnetic_anomaly(bunch.species), 0, w, w_inv, k0, mm, kn, ks)
+  params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, a, 0, w, w_inv, k0, mm, kn, ks)
   photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing)
-  return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, nothing, L)
+  return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, edge_params, L)
 end
 
 @inline function thick_bdipole(tm::MatrixKick, bunch, bm, L)
@@ -185,18 +198,12 @@ end
   w_inv = inv_rot_quaternion(0,0,tilt)
   q = chargeof(bunch.species)
   mc2 = massof(bunch.species)
+  a = gyromagnetic_anomaly(bunch.species)
+  edge_params = (a, tilde_m, 0, kn[1], 0, 0)
   E0 = mc2/tilde_m/beta_0
-  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), w, w_inv, k1, mm, kn, ks)
+  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, w, w_inv, k1, mm, kn, ks)
   photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing)
-  return integration_launcher(BeamTracking.mkm_quadrupole!, params, photon_params, tm, nothing, L)
-end
-
-@inline function thick_bdipole(tm::Yoshida, bunch, bm, L)
-  if bm.order[2] == 2
-    return thick_bdipole(MatrixKick(order=tm.order, num_steps=tm.num_steps, ds_step=tm.ds_step, radiation_damping_on=tm.radiation_damping_on, radiation_fluctuations_on=tm.radiation_fluctuations_on), bunch, bm, L)
-  else
-    return thick_bdipole(BendKick(order=tm.order, num_steps=tm.num_steps, ds_step=tm.ds_step, radiation_damping_on=tm.radiation_damping_on, radiation_fluctuations_on=tm.radiation_fluctuations_on), bunch, bm, L)
-  end
+  return integration_launcher(BeamTracking.mkm_quadrupole!, params, photon_params, tm, edge_params, L)
 end
 
 @inline function thick_pure_bquadrupole(tm::Union{Yoshida,MatrixKick}, bunch, bm, L)
@@ -272,7 +279,7 @@ end
     w = rot_quaternion(0,0,tilt)
     w_inv = inv_rot_quaternion(0,0,tilt)
     a = gyromagnetic_anomaly(bunch.species)
-    edge_params = ifelse(tm.fringe_on, (a, tilde_m, 0, Kn0, e1, e2), nothing)
+    edge_params = (a, tilde_m, 0, Kn0, e1, e2)
     q = chargeof(bunch.species)
     mc2 = massof(bunch.species)
     E0 = mc2/tilde_m/beta_0
@@ -313,8 +320,24 @@ end
   p0c = BeamTracking.R_to_pc(bunch.species, p_over_q_ref)
   q = chargeof(bunch.species)
   mc2 = massof(bunch.species)
+  if mm[1] == 0
+    Ksol = kn[1]
+    if length(mm) > 1 && mm[2] == 1
+      Kn0 = kn[2]
+    else
+      Kn0 = 0
+    end
+  elseif mm[1] == 1
+    Ksol = 0
+    Kn0 = kn[1]
+  else
+    Ksol = 0
+    Kn0 = 0
+  end
+  a = gyromagnetic_anomaly(bunch.species)
+  edge_params = (a, tilde_m, Ksol, Kn0, 0, 0)
   E0 = mc2/tilde_m/beta_0
-  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, E_ref, p0c, gyromagnetic_anomaly(bunch.species), omega, t0, E0_over_Rref, mm, kn, ks)
+  params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, E_ref, p0c, a, omega, t0, E0_over_Rref, mm, kn, ks)
   photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing)
-  return integration_launcher(BeamTracking.cavity!, params, photon_params, tm, nothing, L)
+  return integration_launcher(BeamTracking.cavity!, params, photon_params, tm, edge_params, L)
 end
diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl
index cb00b02..51edb88 100644
--- a/src/BeamTracking.jl
+++ b/src/BeamTracking.jl
@@ -19,6 +19,7 @@ import GTPSA: sincu, sinhcu
 
 export Bunch, State, ParticleView, Time, TimeDependentParam
 export Yoshida, Yoshida, MatrixKick, BendKick, SolenoidKick, DriftKick, Exact
+export NoEnd, BothEnds, EntranceEnd, ExitEnd
 export track!
 
 
diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl
index 6bc1df3..e1e60fa 100644
--- a/src/kernels/yoshida.jl
+++ b/src/kernels/yoshida.jl
@@ -2,8 +2,8 @@
 # ===============  I N T E G R A T O R S  ===============
 #
 
-@makekernel fastgtpsa=true function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L)
-  if !isnothing(edge_params)
+@makekernel fastgtpsa=true function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
+  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
     a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
   end
@@ -19,16 +19,17 @@
   if !isnothing(photon_params)
     stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
   end
-  if !isnothing(edge_params)
+  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
+    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
 
 
-@makekernel fastgtpsa=true function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L)
+@makekernel fastgtpsa=true function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
   w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step
   w1 =  1.3512071919596577718181151794851757586002349853515625*ds_step
-  if !isnothing(edge_params)
+  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
     a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
   end
@@ -46,18 +47,19 @@ end
   if !isnothing(photon_params)
     stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
   end
-  if !isnothing(edge_params)
+  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
+    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
 
 
-@makekernel fastgtpsa=true function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L)
+@makekernel fastgtpsa=true function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
   w0 =  1.315186320683911169737712043570355*ds_step
   w1 = -1.17767998417887100694641568096432*ds_step
   w2 =  0.235573213359358133684793182978535*ds_step
   w3 =  0.784513610477557263819497633866351*ds_step
-  if !isnothing(edge_params)
+  if !isnothing(edge_params)  && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
     a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
   end
@@ -79,13 +81,14 @@ end
   if !isnothing(photon_params)
     stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
   end
-  if !isnothing(edge_params)
+  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
+    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
 
 
-@makekernel fastgtpsa=true function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L)
+@makekernel fastgtpsa=true function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
   w0 =  1.7084530707869978*ds_step
   w1 =  0.102799849391985*ds_step
   w2 = -1.96061023297549*ds_step
@@ -94,7 +97,7 @@ end
   w5 = -1.44485223686048*ds_step
   w6 =  0.253693336566229*ds_step
   w7 =  0.914844246229740*ds_step
-  if !isnothing(edge_params)
+  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
     a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
   end
@@ -124,7 +127,8 @@ end
   if !isnothing(photon_params)
     stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
   end
-  if !isnothing(edge_params)
+  if !isnothing(edge_params)  && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
+    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
     linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
\ No newline at end of file
diff --git a/src/tracking_methods.jl b/src/tracking_methods.jl
index 483aea7..8fa06a6 100644
--- a/src/tracking_methods.jl
+++ b/src/tracking_methods.jl
@@ -1,3 +1,11 @@
+# ========== Fringe  ===========================
+abstract type AbstractFringeAt end
+struct NoEnd <: AbstractFringeAt end
+struct BothEnds <: AbstractFringeAt end
+struct EntranceEnd <: AbstractFringeAt end
+struct ExitEnd <: AbstractFringeAt end
+
+
 # ========== Yoshida ===========================
 abstract type AbstractYoshida end
 macro def_integrator_struct(name)
@@ -8,9 +16,9 @@ macro def_integrator_struct(name)
       ds_step::Float64
       radiation_damping_on::Bool
       radiation_fluctuations_on::Bool
-      fringe_on::Bool
+      fringe_at::AbstractFringeAt
   
-      function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_on::Bool=true)
+      function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_at::AbstractFringeAt=BothEnds())
         _order = order
         _num_steps = num_steps
         _ds_step = ds_step
@@ -27,7 +35,7 @@ macro def_integrator_struct(name)
         elseif _ds_step > 0
           _num_steps = -1
         end
-        return new(_order, _num_steps, _ds_step, radiation_damping_on, radiation_fluctuations_on, fringe_on)
+        return new(_order, _num_steps, _ds_step, radiation_damping_on, radiation_fluctuations_on, fringe_at)
       end
     end
   end
@@ -41,4 +49,10 @@ end
 
 
 # ========== Exact ===========================
-struct Exact end
\ No newline at end of file
+struct Exact
+  fringe_at::AbstractFringeAt
+  
+  function Exact(; fringe_at::AbstractFringeAt=BothEnds())
+    return new(fringe_at)
+  end
+end
\ No newline at end of file
diff --git a/src/utils/quaternions.jl b/src/utils/quaternions.jl
index cbd44ea..cec8e0a 100644
--- a/src/utils/quaternions.jl
+++ b/src/utils/quaternions.jl
@@ -31,7 +31,7 @@ function sincos_quaternion(x::TPS{T}) where {T}
   #sq = one(x)
   # Using FastGTPSA! for the following makes other kernels run out of temps
   @FastGTPSA begin
-    if x < 0.1
+    if x < ε #0.1
       while !(conv_sin && conv_cos) && N < N_max
         y = -y*x/((2*N)*(2*N - 1))
         result_sin = prev_sin + y/(2*N + 1)
diff --git a/test/BeamlinesExt_test.jl b/test/BeamlinesExt_test.jl
index c95df85..1941210 100644
--- a/test/BeamlinesExt_test.jl
+++ b/test/BeamlinesExt_test.jl
@@ -421,7 +421,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-14)
 
     # Pure bend:
-    ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_on=false))   
+    ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd()))   
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -432,7 +432,7 @@
            || b0.coords.q ≈ -[0.99999555887473 0.00000197685011 0.00297918168991 0.00008187412527])
 
     # Pure solenoid:
-    ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_on=false))
+    ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -444,7 +444,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Solenoid with quadrupole:
-    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_on=false))
+    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -456,7 +456,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # SK multiple steps:
-    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_on=false))
+    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -468,7 +468,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Step size:
-    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_on=false))
+    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -480,7 +480,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Straight pure dipole (DK):
-    ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2))
+    ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -492,7 +492,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Straight pure dipole (BK):
-    ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10))
+    ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -504,7 +504,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6)
 
     # Straight dipole with quadrupole (DK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -516,7 +516,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Straight dipole with quadrupole (BK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -528,7 +528,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6)
 
     # Straight dipole with quadrupole (MK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd()))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -612,7 +612,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-7)
 
     # Quadrupole with dipole and sextupole (MK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10, fringe_at=NoEnd()))
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -745,7 +745,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-7)
 
     # With solenoid (RK4):
-    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2))
+    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2, fringe_at=NoEnd()))
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -769,7 +769,7 @@
     @test b0.coords.q ≈ q_expected || b0.coords.q ≈ -q_expected
 
     # With solenoid and quadrupole:
-    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20))
+    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20, fringe_at=NoEnd()))
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -859,7 +859,7 @@
     b0 = Bunch([0.4 0.4 0.4 0.4 0.4 -0.5], [1.0 0.0 0.0 0.0], p_over_q_ref=p_over_q_ref, species=Species("electron"))
     v_init = copy(b0.coords.v)
     q_init = copy(b0.coords.q)
-    ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick())
+    ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick(fringe_at=NoEnd()))
     track!(b0, Beamline([ele_dipole], p_over_q_ref=p_over_q_ref))
     @test b0.coords.state[1] == STATE_LOST
     @test v_init == b0.coords.v
diff --git a/test/IntegrationTracking_test.jl b/test/IntegrationTracking_test.jl
index bbdf52a..90e4262 100644
--- a/test/IntegrationTracking_test.jl
+++ b/test/IntegrationTracking_test.jl
@@ -83,7 +83,7 @@
       ker = BeamTracking.bkb_multipole!
       num_steps = 10
       ds_step = T(0.2)
-      return ker, params, nothing, ds_step, num_steps, nothing, L
+      return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L
     end
 
     function integrator_args(::Type{T}) where {T}
@@ -105,7 +105,7 @@
       ker = BeamTracking.dkd_multipole!
       num_steps = 1
       ds_step = T(2)
-      return ker, params, nothing, ds_step, num_steps, nothing, L
+      return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L
     end
 
     function cavity_args(::Type{T}) where {T}

From d876b7062ec80b0aa40c7ea56e1fb596b44b3e6e Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 13:37:18 -0500
Subject: [PATCH 04/30] work

---
 src/BeamTracking.jl |  1 +
 src/batch.jl        | 28 ++++++++++++++++++++++++++--
 src/kernel.jl       | 26 ++++++++++++++++++++++----
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl
index cb00b02..c9ae6ac 100644
--- a/src/BeamTracking.jl
+++ b/src/BeamTracking.jl
@@ -30,6 +30,7 @@ include("utils/z_to_time.jl")
 
 include("types.jl")
 include("time.jl")
+include("batch.jl")
 include("kernel.jl")
 include("tracking_methods.jl")
 
diff --git a/src/batch.jl b/src/batch.jl
index 2ed5b27..6375732 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -78,13 +78,37 @@ for op in (:+,:-,:*,:/,:^)
   @eval begin
     Base.$op(ba::BatchParam, b::Number)   = (let b = b; return BatchParam(map(x->$op(x, b), ba.batch)); end)
     Base.$op(a::Number,   bb::BatchParam) = (let a = a; return BatchParam(map(x->$op(a, x), bb.batch)); end)
+    
     function Base.$op(ba::BatchParam, bb::BatchParam)
-      if !(ba.batch isa Number) && !(bb.batch isa Number) && length(ba.batch) != length(bb.batch)
+      if ba.batch isa Number
+        if bb.batch isa Number
+          return BatchParam($op(ba.batch, bb.batch))
+        else
+          let a = ba.batch
+            # WE HAVE TO WRITE THIS BY HAND FOR EACH OP!!!
+            if a ≈ 0 # So that e.g. big arrays aren't made for skew strengths when only normal is batch
+              return BatchParam(0f0)
+            else
+              return BatchParam(map((bbi)->$op(a, bbi), bb.batch))
+            end
+          end
+        end
+      elseif bb.batch isa Number
+        let b = bb.batch
+          if b ≈ 0
+            return BatchParam(0f0)
+          else
+            return BatchParam(map((bai)->$op(bai, b), ba.batch))
+          end
+        end
+      elseif length(ba.batch) == length(bb.batch)
+        return BatchParam(map((bai,bbi)->$op(bai, bbi), ba.batch, bb.batch))
+      else
         error("Cannot perform operation $($op) with two non-scalar BatchParams of differing 
                lengths (received lengths $(length(ba.batch)) and $(length(bb.batch))).")
       end
-      return BatchParam(map((x,y)->$op(x, y), ba.batch, bb.batch))
     end
+
   end
 end
 
diff --git a/src/kernel.jl b/src/kernel.jl
index 3fba566..b664a5f 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -9,7 +9,12 @@ blank_kernel!(args...) = nothing
   kernel::K = blank_kernel!
   args::A   = ()
   function KernelCall(kernel, args)
-    _args = map(t->time_lower(t), args)
+    if args != ()
+      @show args
+      @show batch_lower(args)
+      error("")
+    end
+    _args = map(t->time_lower(batch_lower(t)), args)
     new{typeof(kernel),typeof(_args)}(kernel, _args)
   end 
 end
@@ -55,16 +60,29 @@ _generic_kernel!(i, coords, kc) = __generic_kernel!(i, coords, kc.chain, kc.ref)
 
 @unroll function __generic_kernel!(i, coords::Coords, chain, ref)
   @unroll for kcall in chain
-    args = process_args(i, coords, kcall.args, ref)
+    bargs = process_batch_args(i, kcall.args)
+    args = process_time_args(i, coords, bargs, ref)
     (kcall.kernel)(i, coords, args...)
   end
   return nothing
 end
 
-function process_args(i, coords, args, ref)
+function process_batch_args(i, args)
+      @show args
+    error("")
+  if static_batchcheck(args) 
+    # beval(args, i)
+    #error("")
+    return beval(args, i)
+  else
+    return args
+  end
+end
+
+function process_time_args(i, coords, args, ref)
   if !isnothing(ref) && static_timecheck(args) 
     let t = compute_time(coords.v[i,ZI], coords.v[i,PZI], ref)
-      return map(arg->teval(arg, t), args)
+      return teval(args, t)
     end
   else
     return args

From 95bcadf3c023d183411d78d0620be2342728a823 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 14:26:52 -0500
Subject: [PATCH 05/30] make enums

---
 Project.toml                            |   3 +-
 ext/BeamTrackingBeamlinesExt/exact.jl   |   4 +-
 ext/BeamTrackingBeamlinesExt/utils.jl   |  18 ++
 ext/BeamTrackingBeamlinesExt/yoshida.jl |  10 +-
 src/BeamTracking.jl                     |   5 +-
 src/kernels/yoshida.jl                  | 232 ++++++++++++------------
 src/tracking_methods.jl                 |  15 +-
 test/BeamlinesExt_test.jl               |  28 +--
 test/IntegrationTracking_test.jl        |   4 +-
 9 files changed, 172 insertions(+), 147 deletions(-)

diff --git a/Project.toml b/Project.toml
index 6ee4eba..5c924d5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,12 +1,13 @@
 name = "BeamTracking"
 uuid = "8ef5c10a-4ca3-437f-8af5-b84d8af36df0"
 authors = ["mattsignorelli <mgs255@cornell.edu> and contributors"]
-version = "0.5.1"
+version = "0.5.5"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 AtomicAndPhysicalConstants = "5c0d271c-5419-4163-b387-496237733d8b"
+EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
 GTPSA = "b27dd330-f138-47c5-815b-40db9dd9b6e8"
 HostCPUFeatures = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
diff --git a/ext/BeamTrackingBeamlinesExt/exact.jl b/ext/BeamTrackingBeamlinesExt/exact.jl
index 42ac898..1c09d8e 100644
--- a/ext/BeamTrackingBeamlinesExt/exact.jl
+++ b/ext/BeamTrackingBeamlinesExt/exact.jl
@@ -76,12 +76,12 @@ end
 @inline function thick_bend_pure_bdipole(tm::Exact, bunch, bendparams, bm1, L)
   g = bendparams.g_ref
   tilt = bendparams.tilt_ref
-  if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == EntranceEnd
+  if tm.fringe_at == Fringe.BothEnds || tm.fringe_at == Fringe.EntranceEnd
     e1 = bendparams.e1
   else
     e1 = 0
   end
-  if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == ExitEnd
+  if tm.fringe_at == Fringe.BothEnds || tm.fringe_at == Fringe.ExitEnd
     e2 = bendparams.e2
   else
     e2 = 0
diff --git a/ext/BeamTrackingBeamlinesExt/utils.jl b/ext/BeamTrackingBeamlinesExt/utils.jl
index 0072bd6..f47bfa3 100644
--- a/ext/BeamTrackingBeamlinesExt/utils.jl
+++ b/ext/BeamTrackingBeamlinesExt/utils.jl
@@ -169,4 +169,22 @@ function rf_phi0(rfparams)
   else
     error("RF parameter zero_phase value not set correctly.")
   end
+end
+
+#---------------------------------------------------------------------------------------------------
+
+function fringe_in(f::Fringe.T)
+  if f == Fringe.BothEnds || f == Fringe.EntranceEnd
+    return Val{true}()
+  else
+    return Val{false}()
+  end
+end
+
+function fringe_out(f::Fringe.T)
+  if f == Fringe.BothEnds || f == Fringe.ExitEnd
+    return Val{true}()
+  else
+    return Val{false}()
+  end
 end
\ No newline at end of file
diff --git a/ext/BeamTrackingBeamlinesExt/yoshida.jl b/ext/BeamTrackingBeamlinesExt/yoshida.jl
index 79c63df..76e4efc 100644
--- a/ext/BeamTrackingBeamlinesExt/yoshida.jl
+++ b/ext/BeamTrackingBeamlinesExt/yoshida.jl
@@ -9,14 +9,16 @@
     num_steps = Int(ceil(L / ds_step))
     ds_step = L / num_steps
   end
+  fin  = fringe_in(tm.fringe_at)
+  fout = fringe_out(tm.fringe_at)
   if order == 2
-    return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
+    return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L))
   elseif order == 4
-    return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
+    return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L))
   elseif order == 6
-    return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
+    return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L))
   elseif order == 8
-    return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L))
+    return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L))
   end
 end
 
diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl
index 51edb88..07117b0 100644
--- a/src/BeamTracking.jl
+++ b/src/BeamTracking.jl
@@ -11,7 +11,8 @@ using GTPSA,
       Accessors,
       SpecialFunctions,
       AtomicAndPhysicalConstants,
-      Random
+      Random,
+      EnumX
 
 using KernelAbstractions
 
@@ -19,7 +20,7 @@ import GTPSA: sincu, sinhcu
 
 export Bunch, State, ParticleView, Time, TimeDependentParam
 export Yoshida, Yoshida, MatrixKick, BendKick, SolenoidKick, DriftKick, Exact
-export NoEnd, BothEnds, EntranceEnd, ExitEnd
+export Fringe
 export track!
 
 
diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl
index e1e60fa..4bed375 100644
--- a/src/kernels/yoshida.jl
+++ b/src/kernels/yoshida.jl
@@ -2,133 +2,141 @@
 # ===============  I N T E G R A T O R S  ===============
 #
 
-@makekernel fastgtpsa=true function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
-  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  for step in 1:num_steps
-    ker(i, coords, params..., ds_step)
-    if !isnothing(photon_params) && (step < num_steps)
-      stochastic_radiation!(i, coords, photon_params..., ds_step)
+@inline function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
+  @inbounds begin
+    if !isnothing(edge_params) && fringe_in
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    for step in 1:num_steps
+      ker(i, coords, params..., ds_step)
+      if !isnothing(photon_params) && (step < num_steps)
+        stochastic_radiation!(i, coords, photon_params..., ds_step)
+      end
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    if !isnothing(edge_params) && fringe_out
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
 
 
-@makekernel fastgtpsa=true function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
-  w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step
-  w1 =  1.3512071919596577718181151794851757586002349853515625*ds_step
-  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  for step in 1:num_steps
-    ker(i, coords, params..., w1)
-    ker(i, coords, params..., w0)
-    ker(i, coords, params..., w1)
-    if !isnothing(photon_params) && (step < num_steps)
-      stochastic_radiation!(i, coords, photon_params..., ds_step)
+@inline function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
+  @inbounds begin
+    w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step
+    w1 =  1.3512071919596577718181151794851757586002349853515625*ds_step
+    if !isnothing(edge_params) && fringe_in
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    for step in 1:num_steps
+      ker(i, coords, params..., w1)
+      ker(i, coords, params..., w0)
+      ker(i, coords, params..., w1)
+      if !isnothing(photon_params) && (step < num_steps)
+        stochastic_radiation!(i, coords, photon_params..., ds_step)
+      end
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    if !isnothing(edge_params) && fringe_out
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
 
 
-@makekernel fastgtpsa=true function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
-  w0 =  1.315186320683911169737712043570355*ds_step
-  w1 = -1.17767998417887100694641568096432*ds_step
-  w2 =  0.235573213359358133684793182978535*ds_step
-  w3 =  0.784513610477557263819497633866351*ds_step
-  if !isnothing(edge_params)  && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  for step in 1:num_steps
-    ker(i, coords, params..., w3)
-    ker(i, coords, params..., w2)
-    ker(i, coords, params..., w1)
-    ker(i, coords, params..., w0)
-    ker(i, coords, params..., w1)
-    ker(i, coords, params..., w2)
-    ker(i, coords, params..., w3)
-    if !isnothing(photon_params) && (step < num_steps)
-      stochastic_radiation!(i, coords, photon_params..., ds_step)
+@inline function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
+  @inbounds begin
+    w0 =  1.315186320683911169737712043570355*ds_step
+    w1 = -1.17767998417887100694641568096432*ds_step
+    w2 =  0.235573213359358133684793182978535*ds_step
+    w3 =  0.784513610477557263819497633866351*ds_step
+    if !isnothing(edge_params)  && fringe_in
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    for step in 1:num_steps
+      ker(i, coords, params..., w3)
+      ker(i, coords, params..., w2)
+      ker(i, coords, params..., w1)
+      ker(i, coords, params..., w0)
+      ker(i, coords, params..., w1)
+      ker(i, coords, params..., w2)
+      ker(i, coords, params..., w3)
+      if !isnothing(photon_params) && (step < num_steps)
+        stochastic_radiation!(i, coords, photon_params..., ds_step)
+      end
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    if !isnothing(edge_params) && fringe_out
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
 
 
-@makekernel fastgtpsa=true function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L)
-  w0 =  1.7084530707869978*ds_step
-  w1 =  0.102799849391985*ds_step
-  w2 = -1.96061023297549*ds_step
-  w3 =  1.93813913762276*ds_step
-  w4 = -0.158240635368243*ds_step
-  w5 = -1.44485223686048*ds_step
-  w6 =  0.253693336566229*ds_step
-  w7 =  0.914844246229740*ds_step
-  if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  for step in 1:num_steps
-    ker(i, coords, params..., w7)
-    ker(i, coords, params..., w6)
-    ker(i, coords, params..., w5)
-    ker(i, coords, params..., w4)
-    ker(i, coords, params..., w3)
-    ker(i, coords, params..., w2)
-    ker(i, coords, params..., w1)
-    ker(i, coords, params..., w0)
-    ker(i, coords, params..., w1) 
-    ker(i, coords, params..., w2)
-    ker(i, coords, params..., w3)
-    ker(i, coords, params..., w4)
-    ker(i, coords, params..., w5)
-    ker(i, coords, params..., w6)
-    ker(i, coords, params..., w7)
-    if !isnothing(photon_params) && (step < num_steps)
-      stochastic_radiation!(i, coords, photon_params..., ds_step)
+@inline function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
+  @inbounds begin
+    w0 =  1.7084530707869978*ds_step
+    w1 =  0.102799849391985*ds_step
+    w2 = -1.96061023297549*ds_step
+    w3 =  1.93813913762276*ds_step
+    w4 = -0.158240635368243*ds_step
+    w5 = -1.44485223686048*ds_step
+    w6 =  0.253693336566229*ds_step
+    w7 =  0.914844246229740*ds_step
+    if !isnothing(edge_params) && fringe_in
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    for step in 1:num_steps
+      ker(i, coords, params..., w7)
+      ker(i, coords, params..., w6)
+      ker(i, coords, params..., w5)
+      ker(i, coords, params..., w4)
+      ker(i, coords, params..., w3)
+      ker(i, coords, params..., w2)
+      ker(i, coords, params..., w1)
+      ker(i, coords, params..., w0)
+      ker(i, coords, params..., w1) 
+      ker(i, coords, params..., w2)
+      ker(i, coords, params..., w3)
+      ker(i, coords, params..., w4)
+      ker(i, coords, params..., w5)
+      ker(i, coords, params..., w6)
+      ker(i, coords, params..., w7)
+      if !isnothing(photon_params) && (step < num_steps)
+        stochastic_radiation!(i, coords, photon_params..., ds_step)
+      end
+    end
+    if !isnothing(photon_params)
+      stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
+    end
+    if !isnothing(edge_params)  && fringe_out
+      a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
+      linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
-  if !isnothing(photon_params)
-    stochastic_radiation!(i, coords, photon_params..., ds_step / 2)
-  end
-  if !isnothing(edge_params)  && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd)
-    a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
-    linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
   end
 end
\ No newline at end of file
diff --git a/src/tracking_methods.jl b/src/tracking_methods.jl
index 8fa06a6..b3d7034 100644
--- a/src/tracking_methods.jl
+++ b/src/tracking_methods.jl
@@ -1,10 +1,5 @@
 # ========== Fringe  ===========================
-abstract type AbstractFringeAt end
-struct NoEnd <: AbstractFringeAt end
-struct BothEnds <: AbstractFringeAt end
-struct EntranceEnd <: AbstractFringeAt end
-struct ExitEnd <: AbstractFringeAt end
-
+@enumx Fringe NoEnd BothEnds EntranceEnd ExitEnd
 
 # ========== Yoshida ===========================
 abstract type AbstractYoshida end
@@ -16,9 +11,9 @@ macro def_integrator_struct(name)
       ds_step::Float64
       radiation_damping_on::Bool
       radiation_fluctuations_on::Bool
-      fringe_at::AbstractFringeAt
+      fringe_at::Fringe.T
   
-      function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_at::AbstractFringeAt=BothEnds())
+      function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_at::Fringe.T=Fringe.BothEnds)
         _order = order
         _num_steps = num_steps
         _ds_step = ds_step
@@ -50,9 +45,9 @@ end
 
 # ========== Exact ===========================
 struct Exact
-  fringe_at::AbstractFringeAt
+  fringe_at::Fringe.T
   
-  function Exact(; fringe_at::AbstractFringeAt=BothEnds())
+  function Exact(; fringe_at::Fringe.T=Fringe.BothEnds)
     return new(fringe_at)
   end
 end
\ No newline at end of file
diff --git a/test/BeamlinesExt_test.jl b/test/BeamlinesExt_test.jl
index 1941210..4abd74f 100644
--- a/test/BeamlinesExt_test.jl
+++ b/test/BeamlinesExt_test.jl
@@ -421,7 +421,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-14)
 
     # Pure bend:
-    ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd()))   
+    ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=Fringe.NoEnd))   
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -432,7 +432,7 @@
            || b0.coords.q ≈ -[0.99999555887473 0.00000197685011 0.00297918168991 0.00008187412527])
 
     # Pure solenoid:
-    ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_at=NoEnd()))
+    ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -444,7 +444,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Solenoid with quadrupole:
-    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -456,7 +456,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # SK multiple steps:
-    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -468,7 +468,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Step size:
-    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -480,7 +480,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Straight pure dipole (DK):
-    ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -492,7 +492,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Straight pure dipole (BK):
-    ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -504,7 +504,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6)
 
     # Straight dipole with quadrupole (DK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -516,7 +516,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9)
 
     # Straight dipole with quadrupole (BK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -528,7 +528,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6)
 
     # Straight dipole with quadrupole (MK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=Fringe.NoEnd))
     v = collect(transpose(@vars(D10)))
     q = TPS64{D10}[1 0 0 0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -612,7 +612,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-7)
 
     # Quadrupole with dipole and sextupole (MK):
-    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10, fringe_at=NoEnd()))
+    ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10, fringe_at=Fringe.NoEnd))
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -745,7 +745,7 @@
     @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-7)
 
     # With solenoid (RK4):
-    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2, fringe_at=NoEnd()))
+    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2, fringe_at=Fringe.NoEnd))
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -769,7 +769,7 @@
     @test b0.coords.q ≈ q_expected || b0.coords.q ≈ -q_expected
 
     # With solenoid and quadrupole:
-    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20, fringe_at=NoEnd()))
+    ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20, fringe_at=Fringe.NoEnd))
     v = [0.01 0.02 0.03 0.04 0.05 0.06]
     q = [1.0 0.0 0.0 0.0]
     b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron"))
@@ -859,7 +859,7 @@
     b0 = Bunch([0.4 0.4 0.4 0.4 0.4 -0.5], [1.0 0.0 0.0 0.0], p_over_q_ref=p_over_q_ref, species=Species("electron"))
     v_init = copy(b0.coords.v)
     q_init = copy(b0.coords.q)
-    ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick(fringe_at=NoEnd()))
+    ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick(fringe_at=Fringe.NoEnd))
     track!(b0, Beamline([ele_dipole], p_over_q_ref=p_over_q_ref))
     @test b0.coords.state[1] == STATE_LOST
     @test v_init == b0.coords.v
diff --git a/test/IntegrationTracking_test.jl b/test/IntegrationTracking_test.jl
index 90e4262..f07dcba 100644
--- a/test/IntegrationTracking_test.jl
+++ b/test/IntegrationTracking_test.jl
@@ -83,7 +83,7 @@
       ker = BeamTracking.bkb_multipole!
       num_steps = 10
       ds_step = T(0.2)
-      return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L
+      return ker, params, nothing, ds_step, num_steps, nothing, Val{false}(), Val{false}(), L
     end
 
     function integrator_args(::Type{T}) where {T}
@@ -105,7 +105,7 @@
       ker = BeamTracking.dkd_multipole!
       num_steps = 1
       ds_step = T(2)
-      return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L
+      return ker, params, nothing, ds_step, num_steps, nothing, Val{false}(), Val{false}(), L
     end
 
     function cavity_args(::Type{T}) where {T}

From df489c236fb7d50044591bcc362daa3cfb160608 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 14:32:19 -0500
Subject: [PATCH 06/30] @eles->@elements

---
 test/lattices/alignment_lat.jl | 2 +-
 test/lattices/aperture_lat.jl  | 2 +-
 test/lattices/esr.jl           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/lattices/alignment_lat.jl b/test/lattices/alignment_lat.jl
index 356c104..45eb861 100644
--- a/test/lattices/alignment_lat.jl
+++ b/test/lattices/alignment_lat.jl
@@ -1,7 +1,7 @@
 using BeamTracking
 using Beamlines
 
-@eles begin
+@elements begin
   drift1 = Drift(L = 2.0, x_offset = 0.1, y_offset = 0.2, z_offset = 0.3, 
                     x_rot = 0.04, y_rot = 0.05, tilt = 0.06, tracking_method = Exact())
 
diff --git a/test/lattices/aperture_lat.jl b/test/lattices/aperture_lat.jl
index 557bee1..8814784 100644
--- a/test/lattices/aperture_lat.jl
+++ b/test/lattices/aperture_lat.jl
@@ -1,7 +1,7 @@
 using BeamTracking
 using Beamlines
 
-@eles begin
+@elements begin
   d_error = LineElement(L=1, x1_limit=1, dx=5)
 
   d1_rect = Drift(L=1, x1_limit = 1, x2_limit = 2, y1_limit = 3, y2_limit = 5, aperture_shape = ApertureShape.Rectangular, aperture_active = false)
diff --git a/test/lattices/esr.jl b/test/lattices/esr.jl
index a9d3622..01f6444 100644
--- a/test/lattices/esr.jl
+++ b/test/lattices/esr.jl
@@ -1,6 +1,6 @@
 using BeamTracking, Beamlines
 
-@eles begin
+@elements begin
 IP6__1 =  Marker()
 D000001__1 =  Drift( L = 5.3)
 Q1ER_6 =  Quadrupole( L = 1.8, Kn1 = -0.2291420342)

From cc685cf86bd1560b2d73aef2603370ab6dbe0916 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 15:33:17 -0500
Subject: [PATCH 07/30] CUDA ext

---
 Project.toml                                   |  3 +++
 ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl | 13 +++++++++++++
 src/kernels/radiation.jl                       |  2 +-
 src/utils/math_simd.jl                         | 10 +++-------
 4 files changed, 20 insertions(+), 8 deletions(-)
 create mode 100644 ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl

diff --git a/Project.toml b/Project.toml
index 5c924d5..2ddae00 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,15 +22,18 @@ Unrolled = "9602ed7d-8fef-5bc8-8597-8f21381861e8"
 
 [weakdeps]
 Beamlines = "5bb90b03-0719-46b8-8ce4-1ef3afd3cd4b"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
 BeamTrackingBeamlinesExt = "Beamlines"
+BeamTrackingCUDAExt = "CUDA"
 
 [compat]
 Accessors = "0.1.42"
 Adapt = "4.3.0"
 AtomicAndPhysicalConstants = "0.8.0"
 Beamlines = "0.8.0"
+CUDA = "5.9"
 GTPSA = "1.5.3"
 HostCPUFeatures = "0.1"
 KernelAbstractions = "0.9.35"
diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
new file mode 100644
index 0000000..7ab9873
--- /dev/null
+++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
@@ -0,0 +1,13 @@
+module BeamTrackingCUDAExt
+using CUDA: CuArray
+import BeamTracking: gaussian_random
+
+function gaussian_random(::CuArray, sigma1, sigma2)
+  s, c = sincospi(2 * rand())
+  t = sqrt(-2 * log(rand()))
+  z0 = c*t*sigma1
+  z1 = s*t*sigma2
+  return z0, z1
+end
+
+end
\ No newline at end of file
diff --git a/src/kernels/radiation.jl b/src/kernels/radiation.jl
index f9bcd20..acbead4 100644
--- a/src/kernels/radiation.jl
+++ b/src/kernels/radiation.jl
@@ -160,7 +160,7 @@ end
     sigma2_1 = one(sigma2)
     sigma = sqrt(vifelse(alive, sigma2, sigma2_1)) 
 
-    dpz, theta = gaussian_random(sigma, sqrt(13/55)/gamma)
+    dpz, theta = gaussian_random(v, sigma, sqrt(13/55)/gamma)
     s, c  = sincos(theta)
 
     b_perp_hat_x = vifelse(b_perp > 0, b_perp_x / b_perp, 0)
diff --git a/src/utils/math_simd.jl b/src/utils/math_simd.jl
index 6ccbf19..211985c 100644
--- a/src/utils/math_simd.jl
+++ b/src/utils/math_simd.jl
@@ -165,18 +165,14 @@ compiler bug, but CUDA.rand seems to be ok. Nonetheless
 this may also give CPU performance benefits with radiation 
 because we already need to compute two randn's anyways.
 """
-function gaussian_random(sigma1, sigma2)
-  s, c = sincospi(2 * rand())
-  t = sqrt(-2 * log(rand()))
-  z0 = c*t*sigma1
-  z1 = s*t*sigma2
-  return z0, z1
+function gaussian_random(::Matrix, sigma1, sigma2)
+  return randn()*sigma1, randn()*sigma2
 end
 
 
 """
 See gaussian_random, but for SIMD vectors.
 """
-function gaussian_random(sigma1::SIMD.Vec, sigma2::SIMD.Vec) 
+function gaussian_random(::Matrix, sigma1::SIMD.Vec, sigma2::SIMD.Vec)
   return SIMDMathFunctions.vmap(gaussian_random, sigma1, sigma2)
 end

From aefac4b0f7603eb395559740a699b210293e5a45 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Wed, 4 Feb 2026 16:45:44 -0500
Subject: [PATCH 08/30] tests

---
 .../BeamlinesExt/beamlines_stochastic_test.jl | 69 +------------------
 1 file changed, 1 insertion(+), 68 deletions(-)

diff --git a/test/BeamlinesExt/beamlines_stochastic_test.jl b/test/BeamlinesExt/beamlines_stochastic_test.jl
index 9bf854b..e96a96c 100644
--- a/test/BeamlinesExt/beamlines_stochastic_test.jl
+++ b/test/BeamlinesExt/beamlines_stochastic_test.jl
@@ -3,6 +3,7 @@ using Random
 @testset "Stochastic radiation" begin
   Random.seed!(0)
 
+  # Here just check that they don't bug out
   p_over_q_ref = BeamTracking.E_to_R(Species("electron"), 18e9)
   bend = SBend(g = 0.01, L = 2.0, 
   tracking_method = Yoshida(order = 2, num_steps = 1, 
@@ -13,109 +14,41 @@ using Random
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈ 
-  [0.04889165969752571 
-  0.021174046860678353 
-  0.10556186702264685 
-  0.03999912146491459 
-  0.04761050793826471 
-  0.05997673560385752]'
-
   bend.tracking_method = Yoshida(order = 2, num_steps = 2, 
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈ 
-  [0.04889193258818915
-  0.021174302497216673
-  0.10556186827683302
-  0.03999908434104034
-  0.04761050068397177
-  0.059975733285604405]'
-
   bend.tracking_method = Yoshida(order = 4, num_steps = 1,
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈ 
-  [0.048891974265822244
-  0.02117431265208611
-  0.10556186796033079
-  0.03999910506374448
-  0.047610499467786394
-  0.059976284409328576]'
-
   bend.tracking_method = Yoshida(order = 4, num_steps = 2, 
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈ 
-  [0.0488920016565342
-  0.021174218561770715
-  0.10556186797663822
-  0.039998878356961864
-  0.04761049873848289
-  0.05997027467880636]'
-
-
   bend.tracking_method = Yoshida(order = 6, num_steps = 1,
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈ 
-  [0.04889226281541181
-  0.021174668413673003
-  0.10556186827012498
-  0.03999920072349538
-  0.04761049187637356
-  0.059978810393760046]'
-
-
   bend.tracking_method = Yoshida(order = 6, num_steps = 2, 
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
-  
-  @test b0.coords.v ≈
-  [0.04889189599231092
-  0.02117408337833361
-  0.10556186775984797
-  0.03999890918412512
-  0.04761050140381129
-  0.059971097099417885]'
-
 
   bend.tracking_method = Yoshida(order = 8, num_steps = 1, 
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈
-  [0.04889209188518841
-  0.02117482934639319
-  0.10556186796145814
-  0.039999846102478705
-  0.047610496377937794
-  0.059995919985280796]'
-
-
   bend.tracking_method = Yoshida(order = 8, num_steps = 2, 
   radiation_damping_on = true, radiation_fluctuations_on = true)
   b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref)
   track!(b0, line)
 
-  @test b0.coords.v ≈
-  [0.04889219169051286
-  0.02117499046271834
-  0.10556186830041747
-  0.039999857154632036
-  0.04761049389108041
-  0.05999621385138163]'
-
   # Now just check SIMD , if it doesn't bug out
   bend.tracking_method = Yoshida(order = 8, num_steps = 2, 
   radiation_damping_on = true, radiation_fluctuations_on = true)

From 00273a15aa427ae5bab2f439f4fb10dfe9b796b1 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 06:09:29 -0500
Subject: [PATCH 09/30] fix

---
 src/kernel.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/kernel.jl b/src/kernel.jl
index b664a5f..1ca4698 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -9,11 +9,11 @@ blank_kernel!(args...) = nothing
   kernel::K = blank_kernel!
   args::A   = ()
   function KernelCall(kernel, args)
-    if args != ()
+    #=if args != ()
       @show args
       @show batch_lower(args)
       error("")
-    end
+    end=#
     _args = map(t->time_lower(batch_lower(t)), args)
     new{typeof(kernel),typeof(_args)}(kernel, _args)
   end 
@@ -68,8 +68,8 @@ _generic_kernel!(i, coords, kc) = __generic_kernel!(i, coords, kc.chain, kc.ref)
 end
 
 function process_batch_args(i, args)
-      @show args
-    error("")
+  #=    @show args
+    error("")=#
   if static_batchcheck(args) 
     # beval(args, i)
     #error("")

From f7d839887789cf1505a07d441cb611680dd6d178 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 07:11:51 -0500
Subject: [PATCH 10/30] fix

---
 ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
index 7ab9873..6e1a168 100644
--- a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
+++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
@@ -1,8 +1,10 @@
 module BeamTrackingCUDAExt
-using CUDA: CuArray
+using CUDA: CuDeviceArray
 import BeamTracking: gaussian_random
 
-function gaussian_random(::CuArray, sigma1, sigma2)
+# CUDA.jl has horrifying error message with randn, but not rand
+# Box-Muller transform let's us get around that
+function gaussian_random(::CuDeviceArray, sigma1, sigma2)
   s, c = sincospi(2 * rand())
   t = sqrt(-2 * log(rand()))
   z0 = c*t*sigma1

From ba752c9ab287a706e72295ac9fb4f329e4c32b87 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 07:20:14 -0500
Subject: [PATCH 11/30] fix SIMD

---
 ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl | 12 ++++++++++--
 src/utils/math_simd.jl                         | 10 ++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
index 6e1a168..007017a 100644
--- a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
+++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
@@ -2,8 +2,16 @@ module BeamTrackingCUDAExt
 using CUDA: CuDeviceArray
 import BeamTracking: gaussian_random
 
-# CUDA.jl has horrifying error message with randn, but not rand
-# Box-Muller transform let's us get around that
+"""
+This function returns two Gaussian random numbers with 
+mean 0 and standard deviations sigma1, sigma2 using a 
+Box-Muller transform.
+
+This was implemented because CUDA.randn has some horrible 
+compiler bug, but CUDA.rand seems to be ok. Nonetheless 
+this may also give CPU performance benefits with radiation 
+because we already need to compute two randn's anyways.
+"""
 function gaussian_random(::CuDeviceArray, sigma1, sigma2)
   s, c = sincospi(2 * rand())
   t = sqrt(-2 * log(rand()))
diff --git a/src/utils/math_simd.jl b/src/utils/math_simd.jl
index 211985c..2941601 100644
--- a/src/utils/math_simd.jl
+++ b/src/utils/math_simd.jl
@@ -157,13 +157,7 @@ end
 
 """
 This function returns two Gaussian random numbers with 
-mean 0 and standard deviations sigma1, sigma2 using a 
-Box-Muller transform.
-
-This was implemented because CUDA.randn has some horrible 
-compiler bug, but CUDA.rand seems to be ok. Nonetheless 
-this may also give CPU performance benefits with radiation 
-because we already need to compute two randn's anyways.
+mean 0 and standard deviations sigma1, sigma2.
 """
 function gaussian_random(::Matrix, sigma1, sigma2)
   return randn()*sigma1, randn()*sigma2
@@ -174,5 +168,5 @@ end
 See gaussian_random, but for SIMD vectors.
 """
 function gaussian_random(::Matrix, sigma1::SIMD.Vec, sigma2::SIMD.Vec)
-  return SIMDMathFunctions.vmap(gaussian_random, sigma1, sigma2)
+  return SIMDMathFunctions.vmap((s1,s2)->(randn()*s1, randn()*s2), sigma1, sigma2)
 end

From 750bfc983a39b57fce0d6437061c212fc29fe298 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 10:05:30 -0500
Subject: [PATCH 12/30] improve unpacking type stability

---
 .ipynb_checkpoints/Untitled-checkpoint.ipynb  |   6 +
 Untitled.ipynb                                | 855 ++++++++++++++++++
 .../BeamTrackingBeamlinesExt.jl               |   2 +-
 ext/BeamTrackingBeamlinesExt/unpack.jl        |   2 +-
 ext/BeamTrackingBeamlinesExt/utils.jl         |  66 +-
 .../BeamTrackingCUDAExt.jl                    |   4 +-
 src/BeamTracking.jl                           |   2 +-
 src/batch.jl                                  | 161 +++-
 src/kernel.jl                                 |   3 +
 9 files changed, 1034 insertions(+), 67 deletions(-)
 create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb
 create mode 100644 Untitled.ipynb

diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000..363fcab
--- /dev/null
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000..a2c867f
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,855 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "becfe72f-f151-4c6f-91cd-4e60b4b1acab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using Revise\n",
+    "using ProfileView\n",
+    "using BenchmarkTools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ca8e5ad0-3b8a-4ef5-9e34-0b9ee1c91477",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using BeamTracking, Beamlines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "2da06e85-ae4e-4742-9b6e-60f3d68ca6f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using BeamTracking: batch_lower"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "4a3e6cf7-13ff-423b-a15d-f180a4aed64d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Beamline:\n",
+       " species_ref = electron\n",
+       " E_ref = 1.8e10\n",
+       "\n",
+       " \u001b[1m Index \u001b[0m \u001b[1m Name \u001b[0m \u001b[1m Kind       \u001b[0m \u001b[1m s [m] \u001b[0m\n",
+       "  1       qf     Quadrupole   0\n",
+       "  2       d      Drift        0.5\n",
+       "  3       qd     Quadrupole   1.7\n",
+       "  4       d      Drift        2.2\n"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@elements begin\n",
+    "    qf = Quadrupole(Kn1=0.36, L=0.5)\n",
+    "    d = Drift(L=1.2)\n",
+    "    qd = Quadrupole(Kn1=-0.36, L=0.5)\n",
+    "end\n",
+    "fodo = Beamline([qf, d, qd, d], species_ref=Species(\"electron\"), E_ref=18e9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "id": "e739ea1c-147a-447b-9a98-8585a51f9d24",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setting bunch.species = Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ) (reference species from the Beamline)\n",
+      "Setting bunch.p_over_q_ref = -60.04153711147289 (reference p_over_q_ref from the Beamline)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.1341179241307242e-8, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "N = 1000\n",
+    "b0 = Bunch(zeros(N,6), zeros(N,4))\n",
+    "track!(b0, fodo)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "f41dc24b-8aac-4f70-ab9d-1a9a7eac0d19",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.1341179241307242e-8, 35225.12124230655))"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "kc = track!(b0, fodo.line[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kn, ks = get_m"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "id": "9ba053b1-968f-4c5f-84a1-d96837ef06de",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5))"
+      ]
+     },
+     "execution_count": 95,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "kc.chain[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "ee21d4ec-c7b3-4371-9f79-b5a146913158",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "args= kc.chain[1].args"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "id": "6c89cfe5-5b3e-46e7-a6e7-b713a54930fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "false"
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "1e-7≈0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "id": "b762b051-237e-4917-b0ff-8f35fb25beb1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "args[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "id": "702fa820-feb2-46a1-b84d-a763de300a8c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-0.0"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "batch_lower(args[2][9])[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "3f49a56d-fd3b-4a50-9419-bfa5db328b51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "foreach(x->x.Bn0=1e-7, fodo.line)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "3e7a06b6-1f48-474a-807e-eb9515488897",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  2.309 ms (409 allocations: 19.30 KiB)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.9552193012013305e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; … ; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@btime track!($b0, $fodo)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "id": "73d3f135-ece0-4743-954b-8abb2ee4cdd4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "foreach(x->x.Kn0=BatchParam([2e-7, 1e-7]), fodo.line)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "21a8efb7-db32-4465-93fb-1d0e56b9a3e2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  2.349 ms (923 allocations: 29.17 KiB)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 4.0692151117807276e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [-5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; … ; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@btime track!($b0, $fodo)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "9f5d074b-31ca-420d-943d-67ca4fbe4804",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element Vector{BatchParam}:\n",
+       " BatchParam(-0.0)\n",
+       " BatchParam(0.0)"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t[1][end]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "id": "213e0113-edba-427d-b8cc-1cf18352fc57",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BMultipoleParams\n",
+       " Bn0   = BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n",
+       " Kn1   = BatchParam(0.36)\n"
+      ]
+     },
+     "execution_count": 128,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bm = qf.BMultipoleParams"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "bef5d1b8-9aaa-4be9-abe6-d17c8f886aef",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BeamTrackingBeamlinesExt"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "const BTBL = Base.get_extension(BeamTracking, :BeamTrackingBeamlinesExt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "id": "2efbe468-28af-4e0c-b7ec-ee6aa4e980bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "n = BatchParam[BatchParam([-1.2008307422294578e-5]), BatchParam(0.36)]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(BatchParam[BatchParam(0.0f0), BatchParam(0.36)], -60.04153711147289, Bool[0, 1])"
+      ]
+     },
+     "execution_count": 116,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np, p_over_q_ref, normalized =  BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "id": "808e9a36-b6a9-4cca-9db9-1f5ff3c8c2e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.000219 seconds (176 allocations: 4.984 KiB)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])"
+      ]
+     },
+     "execution_count": 133,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@time kn, ks = BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "id": "d666c09a-0d3c-4adf-a88f-229ab6b0d52a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BatchParam"
+      ]
+     },
+     "execution_count": 137,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "L=qf.L\n",
+    "promote_type(reduce(promote_type, typeof.(bm.n)), \n",
+    "                      reduce(promote_type, typeof.(bm.s)),\n",
+    "                      reduce(promote_type, typeof.(bm.tilt)),\n",
+    "                      typeof(L), typeof(p_over_q_ref)\n",
+    "      )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "id": "f1c249b6-f18d-49a9-ac92-ac931862d027",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BatchParam"
+      ]
+     },
+     "execution_count": 136,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "promote_type(eltype(bm.n), typeof(qf.L), typeof(p_over_q_ref))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "id": "9598c822-1f59-42eb-959e-50a273355b66",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}} with indices SOneTo(2):\n",
+       " BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n",
+       " BatchParam(0.36)"
+      ]
+     },
+     "execution_count": 144,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "getfield(bm, :n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 151,
+   "id": "4bf816aa-169a-43cc-8ced-3f9e899d2ebd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MethodInstance for BeamTrackingBeamlinesExt.get_strengths(::BMultipoleParams{BatchParam, 2}, ::Float64, ::Float64)\n",
+      "  from get_strengths(\u001b[90mbm\u001b[39m, \u001b[90mL\u001b[39m, \u001b[90mp_over_q_ref\u001b[39m)\u001b[90m @\u001b[39m \u001b[90mBeamTrackingBeamlinesExt\u001b[39m \u001b[90m~/.julia/dev/BeamTracking/ext/BeamTrackingBeamlinesExt/\u001b[39m\u001b[90m\u001b[4mutils.jl:79\u001b[24m\u001b[39m\n",
+      "Arguments\n",
+      "  #self#\u001b[36m::Core.Const(BeamTrackingBeamlinesExt.get_strengths)\u001b[39m\n",
+      "  bm\u001b[36m::BMultipoleParams{BatchParam, 2}\u001b[39m\n",
+      "  L\u001b[36m::Float64\u001b[39m\n",
+      "  p_over_q_ref\u001b[36m::Float64\u001b[39m\n",
+      "Locals\n",
+      "  sp\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "  np\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "  integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
+      "  normalized\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
+      "  order\u001b[36m::StaticArraysCore.SVector{2, Int64}\u001b[39m\n",
+      "  tilt\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "  s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "  n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "  T\u001b[36m::Type{BatchParam}\u001b[39m\n",
+      "  bmtilt\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n",
+      "  bms\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n",
+      "  bmn\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n",
+      "Body\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n",
+      "\u001b[90m1 ─\u001b[39m        nothing\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(sp))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(np))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(integrated))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(normalized))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(order))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(tilt))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(s))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(n))\n",
+      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(T))\n",
+      "\u001b[90m│  \u001b[39m        (bmn = BeamTrackingBeamlinesExt.getfield(bm, :n))\n",
+      "\u001b[90m│  \u001b[39m        (bms = BeamTrackingBeamlinesExt.getfield(bm, :s))\n",
+      "\u001b[90m│  \u001b[39m        (bmtilt = BeamTrackingBeamlinesExt.getfield(bm, :tilt))\n",
+      "\u001b[90m│  \u001b[39m %14  = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %15  = BeamTrackingBeamlinesExt.isconcretetype(%14)\u001b[36m::Core.Const(true)\u001b[39m\n",
+      "\u001b[90m└──\u001b[39m        goto #3 if not %15\n",
+      "\u001b[90m2 ─\u001b[39m %17  = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %18  = BeamTrackingBeamlinesExt.typeof(L)\u001b[36m::Core.Const(Float64)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %19  = BeamTrackingBeamlinesExt.typeof(p_over_q_ref)\u001b[36m::Core.Const(Float64)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (T = BeamTrackingBeamlinesExt.promote_type(%17, %18, %19))\n",
+      "\u001b[90m└──\u001b[39m        goto #4\n",
+      "\u001b[90m3 ─\u001b[39m        Core.Const(:(bmn isa BeamTrackingBeamlinesExt.AbstractArray))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(goto %40 if not %22))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmn)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.materialize(%25)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.reduce(%24, %26)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bms)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.materialize(%29)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.reduce(%28, %30)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmtilt)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.materialize(%33)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.reduce(%32, %34)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%27, %31, %35, %36, %37)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(goto %46))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmn)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(bms)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmtilt)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n",
+      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n",
+      "\u001b[90m└──\u001b[39m        Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%40, %41, %42, %43, %44)))\n",
+      "\u001b[90m4 ┄\u001b[39m %46  = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %47  = BeamTrackingBeamlinesExt.make_static(bmn)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %48  = Base.broadcasted(%46, %47)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (n = Base.materialize(%48))\n",
+      "\u001b[90m│  \u001b[39m %50  = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %51  = BeamTrackingBeamlinesExt.make_static(bms)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %52  = Base.broadcasted(%50, %51)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (s = Base.materialize(%52))\n",
+      "\u001b[90m│  \u001b[39m %54  = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %55  = BeamTrackingBeamlinesExt.make_static(bmtilt)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %56  = Base.broadcasted(%54, %55)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (tilt = Base.materialize(%56))\n",
+      "\u001b[90m│  \u001b[39m        (order = BeamTrackingBeamlinesExt.getfield(bm, :order))\n",
+      "\u001b[90m│  \u001b[39m        (normalized = BeamTrackingBeamlinesExt.getfield(bm, :normalized))\n",
+      "\u001b[90m│  \u001b[39m        (integrated = BeamTrackingBeamlinesExt.getfield(bm, :integrated))\n",
+      "\u001b[90m│  \u001b[39m %61  = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %62  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %63  = n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %64  = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %65  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %66  = Base.broadcasted(%64, %65)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %67  = Base.broadcasted(%62, %63, %66)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %68  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %69  = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %70  = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %71  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %72  = Base.broadcasted(%70, %71)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %73  = Base.broadcasted(%68, %69, %72)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %74  = Base.broadcasted(%61, %67, %73)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (np = Base.materialize(%74))\n",
+      "\u001b[90m│  \u001b[39m %76  = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %77  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %78  = Base.broadcasted(BeamTrackingBeamlinesExt.:-, n)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %79  = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %80  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %81  = Base.broadcasted(%79, %80)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %82  = Base.broadcasted(%77, %78, %81)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %83  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %84  = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %85  = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %86  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %87  = Base.broadcasted(%85, %86)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %88  = Base.broadcasted(%83, %84, %87)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %89  = Base.broadcasted(%76, %82, %88)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (sp = Base.materialize(%89))\n",
+      "\u001b[90m│  \u001b[39m %91  = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %92  = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %93  = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %94  = Base.broadcasted(%91, %92, %93, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (np = Base.materialize(%94))\n",
+      "\u001b[90m│  \u001b[39m %96  = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %97  = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %98  = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %99  = Base.broadcasted(%96, %97, %98, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (sp = Base.materialize(%99))\n",
+      "\u001b[90m│  \u001b[39m %101 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %102 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %103 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %104 = Base.broadcasted(%101, %102, %103, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (np = Base.materialize(%104))\n",
+      "\u001b[90m│  \u001b[39m %106 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %107 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %108 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m %109 = Base.broadcasted(%106, %107, %108, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
+      "\u001b[90m│  \u001b[39m        (sp = Base.materialize(%109))\n",
+      "\u001b[90m│  \u001b[39m %111 = Core.tuple(np, sp)\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n",
+      "\u001b[90m└──\u001b[39m        return %111\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "@code_warntype BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "id": "e93f6ce2-9383-49b5-b798-3d33380c8214",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
+       " BatchParam([2.0e-7, 1.0e-7])\n",
+       " BatchParam(0.36)"
+      ]
+     },
+     "execution_count": 130,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "kn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "id": "fb0ca353-c7a2-4c90-bd93-4e74cbb2d981",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
+       " BatchParam(-0.0)\n",
+       " BatchParam(0.0)"
+      ]
+     },
+     "execution_count": 131,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "63312df7-bd24-4dc6-9369-5323c2121925",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
+       " BatchParam(-0.0)\n",
+       " BatchParam(0.36)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "kn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "9bf348a6-928d-459c-96dc-54ce19162426",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
+       " BatchParam(-0.0)\n",
+       " BatchParam(0.0)"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "930fb9e4-ef96-42d9-8d0d-4bcec336f585",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "search: \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1my\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22md\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mize \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer Unde\u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mI\u001b[22m\u001b[0m\u001b[1mn\u001b[22miti\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/latex": [
+       "\\begin{verbatim}\n",
+       "finally\n",
+       "\\end{verbatim}\n",
+       "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n",
+       "\n",
+       "\\begin{verbatim}\n",
+       "f = open(\"file\")\n",
+       "try\n",
+       "    operate_on_file(f)\n",
+       "finally\n",
+       "    close(f)\n",
+       "end\n",
+       "\\end{verbatim}\n",
+       "When control leaves the \\href{@ref}{\\texttt{try}} block (for example, due to a \\href{@ref}{\\texttt{return}}, or just finishing normally), \\href{@ref}{\\texttt{close(f)}} will be executed. If the \\texttt{try} block exits due to an exception, the exception will continue propagating. A \\texttt{catch} block may be combined with \\texttt{try} and \\texttt{finally} as well. In this case the \\texttt{finally} block will run after \\texttt{catch} has handled the error.\n",
+       "\n"
+      ],
+      "text/markdown": [
+       "```\n",
+       "finally\n",
+       "```\n",
+       "\n",
+       "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n",
+       "\n",
+       "```julia\n",
+       "f = open(\"file\")\n",
+       "try\n",
+       "    operate_on_file(f)\n",
+       "finally\n",
+       "    close(f)\n",
+       "end\n",
+       "```\n",
+       "\n",
+       "When control leaves the [`try`](@ref) block (for example, due to a [`return`](@ref), or just finishing normally), [`close(f)`](@ref) will be executed. If the `try` block exits due to an exception, the exception will continue propagating. A `catch` block may be combined with `try` and `finally` as well. In this case the `finally` block will run after `catch` has handled the error.\n"
+      ],
+      "text/plain": [
+       "\u001b[36m  finally\u001b[39m\n",
+       "\n",
+       "  Run some code when a given block of code exits, regardless of how it exits.\n",
+       "  For example, here is how we can guarantee that an opened file is closed:\n",
+       "\n",
+       "\u001b[36m  f = open(\"file\")\u001b[39m\n",
+       "\u001b[36m  try\u001b[39m\n",
+       "\u001b[36m      operate_on_file(f)\u001b[39m\n",
+       "\u001b[36m  finally\u001b[39m\n",
+       "\u001b[36m      close(f)\u001b[39m\n",
+       "\u001b[36m  end\u001b[39m\n",
+       "\n",
+       "  When control leaves the \u001b[36mtry\u001b[39m block (for example, due to a \u001b[36mreturn\u001b[39m, or just\n",
+       "  finishing normally), \u001b[36mclose(f)\u001b[39m will be executed. If the \u001b[36mtry\u001b[39m block exits due\n",
+       "  to an exception, the exception will continue propagating. A \u001b[36mcatch\u001b[39m block may\n",
+       "  be combined with \u001b[36mtry\u001b[39m and \u001b[36mfinally\u001b[39m as well. In this case the \u001b[36mfinally\u001b[39m block\n",
+       "  will run after \u001b[36mcatch\u001b[39m has handled the error."
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "?finally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2606c31-0971-4091-a110-b22588873197",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia Global (10 threads) 1.10",
+   "language": "julia",
+   "name": "julia-global-_10-threads_-1.10"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl
index 5c67483..9883a6b 100644
--- a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl
+++ b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl
@@ -16,7 +16,7 @@ function track!(
   kwargs...
 )
   coords = bunch.coords
-  @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...)
+  return @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...)
   return bunch
 end
 
diff --git a/ext/BeamTrackingBeamlinesExt/unpack.jl b/ext/BeamTrackingBeamlinesExt/unpack.jl
index e9bb656..06a2eef 100644
--- a/ext/BeamTrackingBeamlinesExt/unpack.jl
+++ b/ext/BeamTrackingBeamlinesExt/unpack.jl
@@ -246,7 +246,7 @@ function universal!(
 
   # Evolve time through whole element
   bunch.t_ref += L/beta_gamma_to_v(beta_gamma_ref)
-  
+  return kc
   # noinline necessary here for small binaries and faster execution
   @noinline launch!(coords, kc; kwargs...)
   return nothing
diff --git a/ext/BeamTrackingBeamlinesExt/utils.jl b/ext/BeamTrackingBeamlinesExt/utils.jl
index f47bfa3..0482138 100644
--- a/ext/BeamTrackingBeamlinesExt/utils.jl
+++ b/ext/BeamTrackingBeamlinesExt/utils.jl
@@ -77,31 +77,34 @@ This works for both BMultipole and BMultipoleParams. Branchless bc SIMD -> basic
 no loss in computing both but benefit of branchless.
 """
 @inline function get_strengths(bm, L, p_over_q_ref)
-  if isconcretetype(eltype(bm.n))
-    T = promote_type(eltype(bm.n),
+  bmn = getfield(bm, :n)
+  bms = getfield(bm, :s)
+  bmtilt = getfield(bm, :tilt)
+  if isconcretetype(eltype(bmn))
+    T = promote_type(eltype(bmn),
                     typeof(L), typeof(p_over_q_ref)
     )
   else
-    if bm.n isa AbstractArray
-      T = promote_type(reduce(promote_type, typeof.(bm.n)), 
-                      reduce(promote_type, typeof.(bm.s)),
-                      reduce(promote_type, typeof.(bm.tilt)),
+    if bmn isa AbstractArray
+      T = promote_type(reduce(promote_type, typeof.(bmn)), 
+                      reduce(promote_type, typeof.(bms)),
+                      reduce(promote_type, typeof.(bmtilt)),
                       typeof(L), typeof(p_over_q_ref)
       )
     else
-      T = promote_type(typeof(bm.n), 
-                      typeof(bm.s),
-                      typeof(bm.tilt),
+      T = promote_type(typeof(bmn), 
+                      typeof(bms),
+                      typeof(bmtilt),
                       typeof(L), typeof(p_over_q_ref)
       )
     end
   end
-  n = T.(make_static(bm.n))
-  s = T.(make_static(bm.s))
-  tilt = T.(make_static(bm.tilt))
-  order = bm.order
-  normalized = bm.normalized
-  integrated = bm.integrated
+  n = T.(make_static(bmn))
+  s = T.(make_static(bms))
+  tilt = T.(make_static(bmtilt))
+  order = getfield(bm, :order)
+  normalized = getfield(bm, :normalized)
+  integrated = getfield(bm, :integrated)
   np = @. n*cos(order*tilt) + s*sin(order*tilt)
   sp = @. -n*sin(order*tilt) + s*cos(order*tilt)
   np = @. ifelse(!normalized, np/p_over_q_ref, np) 
@@ -112,31 +115,34 @@ no loss in computing both but benefit of branchless.
 end
 
 @inline function get_integrated_strengths(bm, L, p_over_q_ref)
-  if isconcretetype(eltype(bm.n))
-    T = promote_type(eltype(bm.n),
+  bmn = getfield(bm, :n)
+  bms = getfield(bm, :s)
+  bmtilt = getfield(bm, :tilt)
+  if isconcretetype(eltype(bmn))
+    T = promote_type(eltype(bmn),
                     typeof(L), typeof(p_over_q_ref)
     )
   else
-    if bm.n isa AbstractArray
-      T = promote_type(reduce(promote_type, typeof.(bm.n)), 
-                      reduce(promote_type, typeof.(bm.s)),
-                      reduce(promote_type, typeof.(bm.tilt)),
+    if bmn isa AbstractArray
+      T = promote_type(reduce(promote_type, typeof.(bmn)), 
+                      reduce(promote_type, typeof.(bms)),
+                      reduce(promote_type, typeof.(bmtilt)),
                       typeof(L), typeof(p_over_q_ref)
       )
     else
-      T = promote_type(typeof(bm.n), 
-                      typeof(bm.s),
-                      typeof(bm.tilt),
+      T = promote_type(typeof(bmn), 
+                      typeof(bms),
+                      typeof(bmtilt),
                       typeof(L), typeof(p_over_q_ref)
       )
     end
   end
-  n = T.(make_static(bm.n))
-  s = T.(make_static(bm.s))
-  tilt = T.(make_static(bm.tilt))
-  order = bm.order
-  normalized = bm.normalized
-  integrated = bm.integrated
+  n = T.(make_static(bmn))
+  s = T.(make_static(bms))
+  tilt = T.(make_static(bmtilt))
+  order = getfield(bm, :order)
+  normalized = getfield(bm, :normalized)
+  integrated = getfield(bm, :integrated)
   np = @. n*cos(order*tilt) + s*sin(order*tilt)
   sp = @. -n*sin(order*tilt) + s*cos(order*tilt)
   np = @. ifelse(!normalized, np/p_over_q_ref, np) 
diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
index 007017a..fd14bba 100644
--- a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
+++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl
@@ -8,9 +8,7 @@ mean 0 and standard deviations sigma1, sigma2 using a
 Box-Muller transform.
 
 This was implemented because CUDA.randn has some horrible 
-compiler bug, but CUDA.rand seems to be ok. Nonetheless 
-this may also give CPU performance benefits with radiation 
-because we already need to compute two randn's anyways.
+compiler bug, but CUDA.rand seems to be ok.
 """
 function gaussian_random(::CuDeviceArray, sigma1, sigma2)
   s, c = sincospi(2 * rand())
diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl
index 2e21520..ac941f8 100644
--- a/src/BeamTracking.jl
+++ b/src/BeamTracking.jl
@@ -18,7 +18,7 @@ using KernelAbstractions
 
 import GTPSA: sincu, sinhcu
 
-export Bunch, State, ParticleView, Time, TimeDependentParam
+export Bunch, State, ParticleView, Time, TimeDependentParam, BatchParam
 export Yoshida, Yoshida, MatrixKick, BendKick, SolenoidKick, DriftKick, Exact
 export Fringe
 export track!
diff --git a/src/batch.jl b/src/batch.jl
index 6375732..2d63e26 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -74,49 +74,148 @@ Base.zero(b::BatchParam) = BatchParam(zero(first(b.batch)))
 Base.one(b::BatchParam)  = BatchParam(one(first(b.batch))) 
 
 # Now define the math operations:
-for op in (:+,:-,:*,:/,:^)
-  @eval begin
-    Base.$op(ba::BatchParam, b::Number)   = (let b = b; return BatchParam(map(x->$op(x, b), ba.batch)); end)
-    Base.$op(a::Number,   bb::BatchParam) = (let a = a; return BatchParam(map(x->$op(a, x), bb.batch)); end)
-    
-    function Base.$op(ba::BatchParam, bb::BatchParam)
-      if ba.batch isa Number
-        if bb.batch isa Number
-          return BatchParam($op(ba.batch, bb.batch))
-        else
-          let a = ba.batch
-            # WE HAVE TO WRITE THIS BY HAND FOR EACH OP!!!
-            if a ≈ 0 # So that e.g. big arrays aren't made for skew strengths when only normal is batch
-              return BatchParam(0f0)
-            else
-              return BatchParam(map((bbi)->$op(a, bbi), bb.batch))
-            end
-          end
-        end
-      elseif bb.batch isa Number
-        let b = bb.batch
-          if b ≈ 0
-            return BatchParam(0f0)
-          else
-            return BatchParam(map((bai)->$op(bai, b), ba.batch))
-          end
+# The operations are individually-specialized for each operator, assuming that the 
+# most expensive step is creating temporary arrays, not type instability. As such, 
+# each are defined in a way as to minimize number of temporary arrays during unpacking,
+# checking for e.g. 0's and 1's.
+function _batch_addsub(batch_a, batch_b, op::T) where {T<:Union{typeof(+),typeof(-)}}
+  if batch_a isa Number
+    if batch_b isa Number
+      return BatchParam(op(batch_a, batch_b))
+    else
+      if batch_a ≈ 0 # add/sub by zero gives identity
+        return BatchParam(batch_b)
+      else
+        let a = batch_a
+          return BatchParam(map((bi)->op(a, bi), batch_b))
         end
-      elseif length(ba.batch) == length(bb.batch)
-        return BatchParam(map((bai,bbi)->$op(bai, bbi), ba.batch, bb.batch))
+      end
+    end
+  elseif batch_b isa Number
+    if batch_b ≈ 0 # add/sub by zero gives identity
+      return BatchParam(batch_a)
+    else
+      let b = batch_b
+        return BatchParam(map((ai)->op(ai, b), batch_a))
+      end
+    end
+  elseif length(batch_a) == length(batch_b)
+    return BatchParam(map((ai,bi)->op(ai, bi), batch_a, batch_b))
+  else
+    error("Cannot perform operation $(op) with two non-scalar BatchParams of differing 
+            lengths (received lengths $(length(batch_a)) and $(length(batch_b))).")
+  end
+end
+
+Base.:+(ba::BatchParam, n::Number)      = _batch_addsub(ba.batch, n, +)
+Base.:+(n::Number, bb::BatchParam)      = _batch_addsub(n, bb.batch, +)
+Base.:+(ba::BatchParam, bb::BatchParam) = _batch_addsub(ba.batch, bb.batch, +)
+
+Base.:-(ba::BatchParam, n::Number)      = _batch_addsub(ba.batch, n, -)
+Base.:-(n::Number, bb::BatchParam)      = _batch_addsub(n, bb.batch, -)
+Base.:-(ba::BatchParam, bb::BatchParam) = _batch_addsub(ba.batch, bb.batch, -)
+
+function _batch_mul(batch_a, batch_b)
+  if batch_a isa Number
+    if batch_b isa Number
+      return BatchParam(*(batch_a, batch_b))
+    else
+      if batch_a ≈ 0 # mul by 0 gives 0 -> make scalar
+        return BatchParam(0f0)
+      elseif batch_a ≈ 1 # mul by 1 gives identity
+        return BatchParam(batch_b)
       else
-        error("Cannot perform operation $($op) with two non-scalar BatchParams of differing 
-               lengths (received lengths $(length(ba.batch)) and $(length(bb.batch))).")
+        let a = batch_a
+          return BatchParam(map((bi)->*(a, bi), batch_b))
+        end
+      end
+    end
+  elseif batch_b isa Number
+    if batch_b ≈ 0 # mul by 0 gives 0 -> make scalar
+      return BatchParam(0f0)
+    elseif batch_b ≈ 1 # mul by 1 gives identity
+        return BatchParam(batch_a)
+    else
+      let b = batch_b
+        return BatchParam(map((ai)->*(ai, b), batch_a))
+      end
+    end
+  elseif length(batch_a) == length(batch_b)
+    return BatchParam(map((ai,bi)->*(ai, bi), batch_a, batch_b))
+  else
+    error("Cannot perform operation * with two non-scalar BatchParams of differing 
+            lengths (received lengths $(length(batch_a)) and $(length(batch_b))).")
+  end
+end
+
+Base.:*(ba::BatchParam, n::Number)      = _batch_mul(ba.batch, n)
+Base.:*(n::Number, bb::BatchParam)      = _batch_mul(n, bb.batch)
+Base.:*(ba::BatchParam, bb::BatchParam) = _batch_mul(ba.batch, bb.batch)
+
+function _batch_div(batch_a, batch_b)
+  if batch_a isa Number
+    if batch_b isa Number
+      return BatchParam(/(batch_a, batch_b))
+    else
+      let a = batch_a
+        return BatchParam(map((bi)->/(a, bi), batch_b))
+      end
+    end
+  elseif batch_b isa Number
+    if batch_b ≈ 0 # div by 0 gives Inf -> make scalar
+      return BatchParam(Inf32)
+    elseif batch_b ≈ 1 # div by 1 gives identity
+        return BatchParam(batch_a)
+    else
+      let b = batch_b
+        return BatchParam(map((ai)->/(ai, b), batch_a))
       end
     end
+  elseif length(batch_a) == length(batch_b)
+    return BatchParam(map((ai,bi)->/(ai, bi), batch_a, batch_b))
+  else
+    error("Cannot perform operation / with two non-scalar BatchParams of differing 
+            lengths (received lengths $(length(batch_a)) and $(length(batch_b))).")
+  end
+end
+
+Base.:/(ba::BatchParam, n::Number)      = _batch_div(ba.batch, n)
+Base.:/(n::Number, bb::BatchParam)      = _batch_div(n, bb.batch)
+Base.:/(ba::BatchParam, bb::BatchParam) = _batch_div(ba.batch, bb.batch)
 
+# for now no special things for pow, unsure if called anywhere.
+function _batch_pow(batch_a, batch_b)
+  if batch_a isa Number
+    if batch_b isa Number
+      return BatchParam(^(batch_a, batch_b))
+    else
+      let a = batch_a
+        return BatchParam(map((bi)->^(a, bi), batch_b))
+      end
+    end
+  elseif batch_b isa Number
+    let b = batch_b
+      return BatchParam(map((ai)->^(ai, b), batch_a))
+    end
+  elseif length(batch_a) == length(batch_b)
+    return BatchParam(map((ai,bi)->^(ai, bi), batch_a, batch_b))
+  else
+    error("Cannot perform operation ^ with two non-scalar BatchParams of differing 
+            lengths (received lengths $(length(batch_a)) and $(length(batch_b))).")
   end
 end
 
+Base.:^(ba::BatchParam, n::Number)      = _batch_pow(ba.batch, n)
+Base.:^(n::Number, bb::BatchParam)      = _batch_pow(n, bb.batch)
+Base.:^(ba::BatchParam, bb::BatchParam) = _batch_pow(ba.batch, bb.batch)
+
 function Base.literal_pow(::typeof(^), ba::BatchParam, ::Val{N}) where {N}
   return BatchParam(map(x->Base.literal_pow(^, x, Val{N}()), ba.batch))
 end
 
-for t = (:+, :-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv,
+Base.:+(b::BatchParam) = b # identity
+
+for t = (:-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv,
   :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, :float,
   :csch, :acsc, :acsch, :sec, :sech, :asec, :asech, :conj, :log10, :isnan, :sign, :abs)
   @eval begin
diff --git a/src/kernel.jl b/src/kernel.jl
index 1ca4698..c943c4f 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -9,11 +9,14 @@ blank_kernel!(args...) = nothing
   kernel::K = blank_kernel!
   args::A   = ()
   function KernelCall(kernel, args)
+    
     #=if args != ()
       @show args
       @show batch_lower(args)
+      @show typeof(batch_lower(args))
       error("")
     end=#
+    return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args))
     _args = map(t->time_lower(batch_lower(t)), args)
     new{typeof(kernel),typeof(_args)}(kernel, _args)
   end 

From ebba96d2bcca2bb11cb7caed29d3b3ad26d4ccbb Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 10:40:25 -0500
Subject: [PATCH 13/30] kind of working

---
 Untitled.ipynb                                | 271 ++++++------------
 .../BeamTrackingBeamlinesExt.jl               |   2 +-
 ext/BeamTrackingBeamlinesExt/unpack.jl        |   2 +-
 src/batch.jl                                  |  12 +-
 src/kernel.jl                                 |   8 +-
 5 files changed, 108 insertions(+), 187 deletions(-)

diff --git a/Untitled.ipynb b/Untitled.ipynb
index a2c867f..811310e 100644
--- a/Untitled.ipynb
+++ b/Untitled.ipynb
@@ -14,12 +14,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 206,
    "id": "ca8e5ad0-3b8a-4ef5-9e34-0b9ee1c91477",
    "metadata": {},
    "outputs": [],
    "source": [
-    "using BeamTracking, Beamlines"
+    "using BeamTracking, Beamlines, StaticArrays, SIMD"
    ]
   },
   {
@@ -68,29 +68,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": null,
+   "id": "c977d76f-afc1-4387-93fe-43efcf565893",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = SA[1,2]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "e739ea1c-147a-447b-9a98-8585a51f9d24",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting bunch.species = Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ) (reference species from the Beamline)\n",
-      "Setting bunch.p_over_q_ref = -60.04153711147289 (reference p_over_q_ref from the Beamline)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.1341179241307242e-8, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
-      ]
-     },
-     "execution_count": 80,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "N = 1000\n",
     "b0 = Bunch(zeros(N,6), zeros(N,4))\n",
@@ -99,49 +90,69 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 180,
    "id": "f41dc24b-8aac-4f70-ab9d-1a9a7eac0d19",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.1341179241307242e-8, 35225.12124230655))"
+       "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.8012461147958558e-8, 35225.12124230655))"
       ]
      },
-     "execution_count": 81,
+     "execution_count": 180,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "\n",
     "kc = track!(b0, fodo.line[1])"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6",
+   "id": "2e9aac96-d73e-4d21-a10a-0034262b9ab3",
    "metadata": {},
    "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 202,
+   "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (1.0e-7, 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, 1.0e-7, 0, 0), Val{true}(), Val{true}(), 0.5)"
+      ]
+     },
+     "execution_count": 202,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "kn, ks = get_m"
+    "args = kc.chain[1].args\n",
+    "#@time BeamTracking.static_batchcheck(args)\n",
+    "@inbounds BeamTracking.beval(args, 2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 174,
    "id": "9ba053b1-968f-4c5f-84a1-d96837ef06de",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5))"
+       "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, SVector{2, BatchParam}, SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5))"
       ]
      },
-     "execution_count": 95,
+     "execution_count": 174,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -152,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 175,
    "id": "ee21d4ec-c7b3-4371-9f79-b5a146913158",
    "metadata": {
     "scrolled": true
@@ -161,10 +172,10 @@
     {
      "data": {
       "text/plain": [
-       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)"
+       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)"
       ]
      },
-     "execution_count": 82,
+     "execution_count": 175,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -175,44 +186,65 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
-   "id": "6c89cfe5-5b3e-46e7-a6e7-b713a54930fd",
+   "execution_count": 176,
+   "id": "5f17a22d-aae7-4a1d-a7db-d5ec80fd1c7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "true"
+      ]
+     },
+     "execution_count": 176,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "args[end-3][4].batch === args[2][9][1].batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 177,
+   "id": "ecf2a0dd-c119-4d0e-87ab-18d31cdba961",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "false"
+       "BatchParam([2.0e-7, 1.0e-7])"
       ]
      },
-     "execution_count": 93,
+     "execution_count": 177,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "1e-7≈0"
+    "args[2][9][1]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 178,
    "id": "b762b051-237e-4917-b0ff-8f35fb25beb1",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])"
+       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)"
       ]
      },
-     "execution_count": 90,
+     "execution_count": 178,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "args[2]"
+    "batch_lower(args)"
    ]
   },
   {
@@ -276,7 +308,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 207,
    "id": "73d3f135-ece0-4743-954b-8abb2ee4cdd4",
    "metadata": {},
    "outputs": [],
@@ -507,7 +539,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 151,
+   "execution_count": 153,
    "id": "4bf816aa-169a-43cc-8ced-3f9e899d2ebd",
    "metadata": {},
    "outputs": [
@@ -515,145 +547,22 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "MethodInstance for BeamTrackingBeamlinesExt.get_strengths(::BMultipoleParams{BatchParam, 2}, ::Float64, ::Float64)\n",
-      "  from get_strengths(\u001b[90mbm\u001b[39m, \u001b[90mL\u001b[39m, \u001b[90mp_over_q_ref\u001b[39m)\u001b[90m @\u001b[39m \u001b[90mBeamTrackingBeamlinesExt\u001b[39m \u001b[90m~/.julia/dev/BeamTracking/ext/BeamTrackingBeamlinesExt/\u001b[39m\u001b[90m\u001b[4mutils.jl:79\u001b[24m\u001b[39m\n",
-      "Arguments\n",
-      "  #self#\u001b[36m::Core.Const(BeamTrackingBeamlinesExt.get_strengths)\u001b[39m\n",
-      "  bm\u001b[36m::BMultipoleParams{BatchParam, 2}\u001b[39m\n",
-      "  L\u001b[36m::Float64\u001b[39m\n",
-      "  p_over_q_ref\u001b[36m::Float64\u001b[39m\n",
-      "Locals\n",
-      "  sp\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "  np\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "  integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
-      "  normalized\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
-      "  order\u001b[36m::StaticArraysCore.SVector{2, Int64}\u001b[39m\n",
-      "  tilt\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "  s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "  n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "  T\u001b[36m::Type{BatchParam}\u001b[39m\n",
-      "  bmtilt\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n",
-      "  bms\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n",
-      "  bmn\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n",
-      "Body\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n",
-      "\u001b[90m1 ─\u001b[39m        nothing\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(sp))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(np))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(integrated))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(normalized))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(order))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(tilt))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(s))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(n))\n",
-      "\u001b[90m│  \u001b[39m        Core.NewvarNode(:(T))\n",
-      "\u001b[90m│  \u001b[39m        (bmn = BeamTrackingBeamlinesExt.getfield(bm, :n))\n",
-      "\u001b[90m│  \u001b[39m        (bms = BeamTrackingBeamlinesExt.getfield(bm, :s))\n",
-      "\u001b[90m│  \u001b[39m        (bmtilt = BeamTrackingBeamlinesExt.getfield(bm, :tilt))\n",
-      "\u001b[90m│  \u001b[39m %14  = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %15  = BeamTrackingBeamlinesExt.isconcretetype(%14)\u001b[36m::Core.Const(true)\u001b[39m\n",
-      "\u001b[90m└──\u001b[39m        goto #3 if not %15\n",
-      "\u001b[90m2 ─\u001b[39m %17  = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %18  = BeamTrackingBeamlinesExt.typeof(L)\u001b[36m::Core.Const(Float64)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %19  = BeamTrackingBeamlinesExt.typeof(p_over_q_ref)\u001b[36m::Core.Const(Float64)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (T = BeamTrackingBeamlinesExt.promote_type(%17, %18, %19))\n",
-      "\u001b[90m└──\u001b[39m        goto #4\n",
-      "\u001b[90m3 ─\u001b[39m        Core.Const(:(bmn isa BeamTrackingBeamlinesExt.AbstractArray))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(goto %40 if not %22))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmn)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.materialize(%25)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.reduce(%24, %26)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bms)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.materialize(%29)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.reduce(%28, %30)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmtilt)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(Base.materialize(%33)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.reduce(%32, %34)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%27, %31, %35, %36, %37)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(goto %46))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmn)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(bms)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmtilt)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n",
-      "\u001b[90m│  \u001b[39m        Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n",
-      "\u001b[90m└──\u001b[39m        Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%40, %41, %42, %43, %44)))\n",
-      "\u001b[90m4 ┄\u001b[39m %46  = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %47  = BeamTrackingBeamlinesExt.make_static(bmn)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %48  = Base.broadcasted(%46, %47)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (n = Base.materialize(%48))\n",
-      "\u001b[90m│  \u001b[39m %50  = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %51  = BeamTrackingBeamlinesExt.make_static(bms)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %52  = Base.broadcasted(%50, %51)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (s = Base.materialize(%52))\n",
-      "\u001b[90m│  \u001b[39m %54  = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %55  = BeamTrackingBeamlinesExt.make_static(bmtilt)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %56  = Base.broadcasted(%54, %55)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (tilt = Base.materialize(%56))\n",
-      "\u001b[90m│  \u001b[39m        (order = BeamTrackingBeamlinesExt.getfield(bm, :order))\n",
-      "\u001b[90m│  \u001b[39m        (normalized = BeamTrackingBeamlinesExt.getfield(bm, :normalized))\n",
-      "\u001b[90m│  \u001b[39m        (integrated = BeamTrackingBeamlinesExt.getfield(bm, :integrated))\n",
-      "\u001b[90m│  \u001b[39m %61  = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %62  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %63  = n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %64  = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %65  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %66  = Base.broadcasted(%64, %65)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %67  = Base.broadcasted(%62, %63, %66)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %68  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %69  = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %70  = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %71  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %72  = Base.broadcasted(%70, %71)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %73  = Base.broadcasted(%68, %69, %72)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %74  = Base.broadcasted(%61, %67, %73)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (np = Base.materialize(%74))\n",
-      "\u001b[90m│  \u001b[39m %76  = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %77  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %78  = Base.broadcasted(BeamTrackingBeamlinesExt.:-, n)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %79  = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %80  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %81  = Base.broadcasted(%79, %80)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %82  = Base.broadcasted(%77, %78, %81)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %83  = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %84  = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %85  = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %86  = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %87  = Base.broadcasted(%85, %86)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %88  = Base.broadcasted(%83, %84, %87)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %89  = Base.broadcasted(%76, %82, %88)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (sp = Base.materialize(%89))\n",
-      "\u001b[90m│  \u001b[39m %91  = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %92  = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %93  = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %94  = Base.broadcasted(%91, %92, %93, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (np = Base.materialize(%94))\n",
-      "\u001b[90m│  \u001b[39m %96  = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %97  = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %98  = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %99  = Base.broadcasted(%96, %97, %98, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (sp = Base.materialize(%99))\n",
-      "\u001b[90m│  \u001b[39m %101 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %102 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %103 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %104 = Base.broadcasted(%101, %102, %103, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (np = Base.materialize(%104))\n",
-      "\u001b[90m│  \u001b[39m %106 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %107 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %108 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m %109 = Base.broadcasted(%106, %107, %108, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n",
-      "\u001b[90m│  \u001b[39m        (sp = Base.materialize(%109))\n",
-      "\u001b[90m│  \u001b[39m %111 = Core.tuple(np, sp)\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n",
-      "\u001b[90m└──\u001b[39m        return %111\n",
-      "\n"
+      "  0.000089 seconds (104 allocations: 1.906 KiB)\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])"
+      ]
+     },
+     "execution_count": 153,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "@code_warntype BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
+    "@time BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
    ]
   },
   {
diff --git a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl
index 9883a6b..5c67483 100644
--- a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl
+++ b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl
@@ -16,7 +16,7 @@ function track!(
   kwargs...
 )
   coords = bunch.coords
-  return @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...)
+  @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...)
   return bunch
 end
 
diff --git a/ext/BeamTrackingBeamlinesExt/unpack.jl b/ext/BeamTrackingBeamlinesExt/unpack.jl
index 06a2eef..e9bb656 100644
--- a/ext/BeamTrackingBeamlinesExt/unpack.jl
+++ b/ext/BeamTrackingBeamlinesExt/unpack.jl
@@ -246,7 +246,7 @@ function universal!(
 
   # Evolve time through whole element
   bunch.t_ref += L/beta_gamma_to_v(beta_gamma_ref)
-  return kc
+  
   # noinline necessary here for small binaries and faster execution
   @noinline launch!(coords, kc; kwargs...)
   return nothing
diff --git a/src/batch.jl b/src/batch.jl
index 2d63e26..cd4aaa8 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -262,7 +262,7 @@ batch_lower(bp::T) where {T<:Tuple} = map(bi->batch_lower(bi), bp)
 batch_lower(bp::SArray{N,BatchParam}) where {N} = batch_lower(Tuple(bp))
 
 static_batchcheck(bp) = false
-static_batchcheck(::BatchParam) = true
+static_batchcheck(::_LoweredBatchParam) = true
 @unroll function static_batchcheck(t::Tuple)
   @unroll for ti in t
     if static_batchcheck(ti)
@@ -272,6 +272,7 @@ static_batchcheck(::BatchParam) = true
   return false
 end
 
+#=
 """
     lane2vec(lane::SIMD.VecRange{N}) 
     
@@ -289,10 +290,15 @@ function lane2vec(lane::SIMD.VecRange{N}) where {N}
     return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}()))
   end
 end
+=#
+
+@inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
-lane2vec(i) = i
+@inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
+  modlane = SIMD.VecRange{N}(mod1(lane.i, B)) # mod start around batch
+  return b.batch[modlane]
+end
 
-@inline beval(b::_LoweredBatchParam, i) = b.batch[lane2vec(i)]
 @inline beval(b, i) = b
 
 # === THIS BLOCK WAS WRITTEN BY CLAUDE ===
diff --git a/src/kernel.jl b/src/kernel.jl
index c943c4f..4bdf407 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -16,12 +16,15 @@ blank_kernel!(args...) = nothing
       @show typeof(batch_lower(args))
       error("")
     end=#
-    return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args))
+    #return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args))
     _args = map(t->time_lower(batch_lower(t)), args)
     new{typeof(kernel),typeof(_args)}(kernel, _args)
   end 
 end
 
+# In case KernelCall contains batch GPU array
+Adapt.@adapt_structure KernelCall
+
 # Store the state of the reference coordinate system
 # Needed for time-dependent parameters
 struct RefState{T,U}
@@ -36,6 +39,9 @@ struct KernelChain{C<:Tuple{Vararg{<:KernelCall}}, S<:Union{Nothing,RefState}}
   KernelChain(chain, ref=nothing) = new{typeof(chain), typeof(ref)}(chain, ref)
 end
 
+# In case KernelChain contains batch GPU array
+Adapt.@adapt_structure KernelChain
+
 KernelChain(::Val{N}, ref=nothing) where {N} = KernelChain(ntuple(t->KernelCall(), Val{N}()), ref)
 
 push(kc::KernelChain, kcall::Nothing) = kc

From 00b9bf39bdbfb1fd3a34a3e2b6bfb808c99549f9 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 11:28:47 -0500
Subject: [PATCH 14/30] fix Adapt for GPU batch param

---
 Untitled.ipynb | 58 ++++++++++++++++----------------------------------
 src/batch.jl   |  2 +-
 2 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/Untitled.ipynb b/Untitled.ipynb
index 811310e..82d9d15 100644
--- a/Untitled.ipynb
+++ b/Untitled.ipynb
@@ -34,79 +34,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 225,
    "id": "4a3e6cf7-13ff-423b-a15d-f180a4aed64d",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Beamline:\n",
-       " species_ref = electron\n",
-       " E_ref = 1.8e10\n",
-       "\n",
-       " \u001b[1m Index \u001b[0m \u001b[1m Name \u001b[0m \u001b[1m Kind       \u001b[0m \u001b[1m s [m] \u001b[0m\n",
-       "  1       qf     Quadrupole   0\n",
-       "  2       d      Drift        0.5\n",
-       "  3       qd     Quadrupole   1.7\n",
-       "  4       d      Drift        2.2\n"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "@elements begin\n",
     "    qf = Quadrupole(Kn1=0.36, L=0.5)\n",
     "    d = Drift(L=1.2)\n",
     "    qd = Quadrupole(Kn1=-0.36, L=0.5)\n",
     "end\n",
-    "fodo = Beamline([qf, d, qd, d], species_ref=Species(\"electron\"), E_ref=18e9)"
+    "fodo = Beamline(repeat([qf, d, qd, d], 10000), species_ref=Species(\"electron\"), E_ref=18e9);"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 226,
    "id": "c977d76f-afc1-4387-93fe-43efcf565893",
    "metadata": {},
    "outputs": [],
    "source": [
-    "a = SA[1,2]\n"
+    "foreach(x->x.Bn0=1e-7, fodo.line)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 227,
    "id": "e739ea1c-147a-447b-9a98-8585a51f9d24",
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "N = 1000\n",
-    "b0 = Bunch(zeros(N,6), zeros(N,4))\n",
-    "track!(b0, fodo)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 180,
-   "id": "f41dc24b-8aac-4f70-ab9d-1a9a7eac0d19",
-   "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " 12.356829 seconds (2.74 M allocations: 130.921 MiB, 0.41% gc time)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.8012461147958558e-8, 35225.12124230655))"
+       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 0.00011341179241308246, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; … ; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
       ]
      },
-     "execution_count": 180,
+     "execution_count": 227,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "kc = track!(b0, fodo.line[1])"
+    "N = 200\n",
+    "b0 = Bunch(zeros(N,6), zeros(N,4), species=fodo.species_ref, p_over_q_ref=fodo.p_over_q_ref)\n",
+    "@time track!(b0, fodo)"
    ]
   },
   {
diff --git a/src/batch.jl b/src/batch.jl
index cd4aaa8..3a5e1e1 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -59,7 +59,7 @@ end
 # Necessary for GPU compatibility if batch is a GPU array
 function Adapt.adapt_structure(to, lbp::_LoweredBatchParam{N}) where {N}
     batch = Adapt.adapt_structure(to, lbp.batch)
-    return _LoweredBatchParam{N,typeof(batch)}(batch)
+    return _LoweredBatchParam{N}(batch)
 end
 
 # BatchParam will act like a number

From 4d2d0240fb1ee1f5b436af1de4da5bc8251777ca Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 14:34:49 -0500
Subject: [PATCH 15/30] done

---
 Untitled.ipynb     | 742 ---------------------------------------------
 src/batch.jl       |  52 ++--
 src/kernel.jl      |  12 -
 test/batch_test.jl | 167 ++++++++++
 test/runtests.jl   |   3 +-
 5 files changed, 199 insertions(+), 777 deletions(-)
 delete mode 100644 Untitled.ipynb
 create mode 100644 test/batch_test.jl

diff --git a/Untitled.ipynb b/Untitled.ipynb
deleted file mode 100644
index 82d9d15..0000000
--- a/Untitled.ipynb
+++ /dev/null
@@ -1,742 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "becfe72f-f151-4c6f-91cd-4e60b4b1acab",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "using Revise\n",
-    "using ProfileView\n",
-    "using BenchmarkTools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 206,
-   "id": "ca8e5ad0-3b8a-4ef5-9e34-0b9ee1c91477",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "using BeamTracking, Beamlines, StaticArrays, SIMD"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "id": "2da06e85-ae4e-4742-9b6e-60f3d68ca6f6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "using BeamTracking: batch_lower"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 225,
-   "id": "4a3e6cf7-13ff-423b-a15d-f180a4aed64d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@elements begin\n",
-    "    qf = Quadrupole(Kn1=0.36, L=0.5)\n",
-    "    d = Drift(L=1.2)\n",
-    "    qd = Quadrupole(Kn1=-0.36, L=0.5)\n",
-    "end\n",
-    "fodo = Beamline(repeat([qf, d, qd, d], 10000), species_ref=Species(\"electron\"), E_ref=18e9);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 226,
-   "id": "c977d76f-afc1-4387-93fe-43efcf565893",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "foreach(x->x.Bn0=1e-7, fodo.line)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 227,
-   "id": "e739ea1c-147a-447b-9a98-8585a51f9d24",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " 12.356829 seconds (2.74 M allocations: 130.921 MiB, 0.41% gc time)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 0.00011341179241308246, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; … ; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
-      ]
-     },
-     "execution_count": 227,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "N = 200\n",
-    "b0 = Bunch(zeros(N,6), zeros(N,4), species=fodo.species_ref, p_over_q_ref=fodo.p_over_q_ref)\n",
-    "@time track!(b0, fodo)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e9aac96-d73e-4d21-a10a-0034262b9ab3",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 202,
-   "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (1.0e-7, 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, 1.0e-7, 0, 0), Val{true}(), Val{true}(), 0.5)"
-      ]
-     },
-     "execution_count": 202,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "args = kc.chain[1].args\n",
-    "#@time BeamTracking.static_batchcheck(args)\n",
-    "@inbounds BeamTracking.beval(args, 2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 174,
-   "id": "9ba053b1-968f-4c5f-84a1-d96837ef06de",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, SVector{2, BatchParam}, SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5))"
-      ]
-     },
-     "execution_count": 174,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "kc.chain[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 175,
-   "id": "ee21d4ec-c7b3-4371-9f79-b5a146913158",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)"
-      ]
-     },
-     "execution_count": 175,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "args= kc.chain[1].args"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 176,
-   "id": "5f17a22d-aae7-4a1d-a7db-d5ec80fd1c7c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "true"
-      ]
-     },
-     "execution_count": 176,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "args[end-3][4].batch === args[2][9][1].batch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 177,
-   "id": "ecf2a0dd-c119-4d0e-87ab-18d31cdba961",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BatchParam([2.0e-7, 1.0e-7])"
-      ]
-     },
-     "execution_count": 177,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "args[2][9][1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 178,
-   "id": "b762b051-237e-4917-b0ff-8f35fb25beb1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)"
-      ]
-     },
-     "execution_count": 178,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "batch_lower(args)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 91,
-   "id": "702fa820-feb2-46a1-b84d-a763de300a8c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-0.0"
-      ]
-     },
-     "execution_count": 91,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "batch_lower(args[2][9])[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "3f49a56d-fd3b-4a50-9419-bfa5db328b51",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "foreach(x->x.Bn0=1e-7, fodo.line)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "id": "3e7a06b6-1f48-474a-807e-eb9515488897",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  2.309 ms (409 allocations: 19.30 KiB)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.9552193012013305e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; … ; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "@btime track!($b0, $fodo)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 207,
-   "id": "73d3f135-ece0-4743-954b-8abb2ee4cdd4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "foreach(x->x.Kn0=BatchParam([2e-7, 1e-7]), fodo.line)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "21a8efb7-db32-4465-93fb-1d0e56b9a3e2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  2.349 ms (923 allocations: 29.17 KiB)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 4.0692151117807276e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01  …  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [-5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; … ; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "@btime track!($b0, $fodo)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "9f5d074b-31ca-420d-943d-67ca4fbe4804",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2-element Vector{BatchParam}:\n",
-       " BatchParam(-0.0)\n",
-       " BatchParam(0.0)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "t[1][end]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 128,
-   "id": "213e0113-edba-427d-b8cc-1cf18352fc57",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BMultipoleParams\n",
-       " Bn0   = BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n",
-       " Kn1   = BatchParam(0.36)\n"
-      ]
-     },
-     "execution_count": 128,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "bm = qf.BMultipoleParams"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "bef5d1b8-9aaa-4be9-abe6-d17c8f886aef",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BeamTrackingBeamlinesExt"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "const BTBL = Base.get_extension(BeamTracking, :BeamTrackingBeamlinesExt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 116,
-   "id": "2efbe468-28af-4e0c-b7ec-ee6aa4e980bd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "n = BatchParam[BatchParam([-1.2008307422294578e-5]), BatchParam(0.36)]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(BatchParam[BatchParam(0.0f0), BatchParam(0.36)], -60.04153711147289, Bool[0, 1])"
-      ]
-     },
-     "execution_count": 116,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np, p_over_q_ref, normalized =  BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 133,
-   "id": "808e9a36-b6a9-4cca-9db9-1f5ff3c8c2e8",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.000219 seconds (176 allocations: 4.984 KiB)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])"
-      ]
-     },
-     "execution_count": 133,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "@time kn, ks = BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 137,
-   "id": "d666c09a-0d3c-4adf-a88f-229ab6b0d52a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BatchParam"
-      ]
-     },
-     "execution_count": 137,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "L=qf.L\n",
-    "promote_type(reduce(promote_type, typeof.(bm.n)), \n",
-    "                      reduce(promote_type, typeof.(bm.s)),\n",
-    "                      reduce(promote_type, typeof.(bm.tilt)),\n",
-    "                      typeof(L), typeof(p_over_q_ref)\n",
-    "      )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 136,
-   "id": "f1c249b6-f18d-49a9-ac92-ac931862d027",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BatchParam"
-      ]
-     },
-     "execution_count": 136,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "promote_type(eltype(bm.n), typeof(qf.L), typeof(p_over_q_ref))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 144,
-   "id": "9598c822-1f59-42eb-959e-50a273355b66",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2-element StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}} with indices SOneTo(2):\n",
-       " BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n",
-       " BatchParam(0.36)"
-      ]
-     },
-     "execution_count": 144,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "getfield(bm, :n)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 153,
-   "id": "4bf816aa-169a-43cc-8ced-3f9e899d2ebd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.000089 seconds (104 allocations: 1.906 KiB)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])"
-      ]
-     },
-     "execution_count": 153,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "@time BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 130,
-   "id": "e93f6ce2-9383-49b5-b798-3d33380c8214",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
-       " BatchParam([2.0e-7, 1.0e-7])\n",
-       " BatchParam(0.36)"
-      ]
-     },
-     "execution_count": 130,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "kn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 131,
-   "id": "fb0ca353-c7a2-4c90-bd93-4e74cbb2d981",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
-       " BatchParam(-0.0)\n",
-       " BatchParam(0.0)"
-      ]
-     },
-     "execution_count": 131,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ks"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "63312df7-bd24-4dc6-9369-5323c2121925",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
-       " BatchParam(-0.0)\n",
-       " BatchParam(0.36)"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "kn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "9bf348a6-928d-459c-96dc-54ce19162426",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n",
-       " BatchParam(-0.0)\n",
-       " BatchParam(0.0)"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ks"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "930fb9e4-ef96-42d9-8d0d-4bcec336f585",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "search: \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1my\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22md\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mize \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer Unde\u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mI\u001b[22m\u001b[0m\u001b[1mn\u001b[22miti\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/latex": [
-       "\\begin{verbatim}\n",
-       "finally\n",
-       "\\end{verbatim}\n",
-       "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n",
-       "\n",
-       "\\begin{verbatim}\n",
-       "f = open(\"file\")\n",
-       "try\n",
-       "    operate_on_file(f)\n",
-       "finally\n",
-       "    close(f)\n",
-       "end\n",
-       "\\end{verbatim}\n",
-       "When control leaves the \\href{@ref}{\\texttt{try}} block (for example, due to a \\href{@ref}{\\texttt{return}}, or just finishing normally), \\href{@ref}{\\texttt{close(f)}} will be executed. If the \\texttt{try} block exits due to an exception, the exception will continue propagating. A \\texttt{catch} block may be combined with \\texttt{try} and \\texttt{finally} as well. In this case the \\texttt{finally} block will run after \\texttt{catch} has handled the error.\n",
-       "\n"
-      ],
-      "text/markdown": [
-       "```\n",
-       "finally\n",
-       "```\n",
-       "\n",
-       "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n",
-       "\n",
-       "```julia\n",
-       "f = open(\"file\")\n",
-       "try\n",
-       "    operate_on_file(f)\n",
-       "finally\n",
-       "    close(f)\n",
-       "end\n",
-       "```\n",
-       "\n",
-       "When control leaves the [`try`](@ref) block (for example, due to a [`return`](@ref), or just finishing normally), [`close(f)`](@ref) will be executed. If the `try` block exits due to an exception, the exception will continue propagating. A `catch` block may be combined with `try` and `finally` as well. In this case the `finally` block will run after `catch` has handled the error.\n"
-      ],
-      "text/plain": [
-       "\u001b[36m  finally\u001b[39m\n",
-       "\n",
-       "  Run some code when a given block of code exits, regardless of how it exits.\n",
-       "  For example, here is how we can guarantee that an opened file is closed:\n",
-       "\n",
-       "\u001b[36m  f = open(\"file\")\u001b[39m\n",
-       "\u001b[36m  try\u001b[39m\n",
-       "\u001b[36m      operate_on_file(f)\u001b[39m\n",
-       "\u001b[36m  finally\u001b[39m\n",
-       "\u001b[36m      close(f)\u001b[39m\n",
-       "\u001b[36m  end\u001b[39m\n",
-       "\n",
-       "  When control leaves the \u001b[36mtry\u001b[39m block (for example, due to a \u001b[36mreturn\u001b[39m, or just\n",
-       "  finishing normally), \u001b[36mclose(f)\u001b[39m will be executed. If the \u001b[36mtry\u001b[39m block exits due\n",
-       "  to an exception, the exception will continue propagating. A \u001b[36mcatch\u001b[39m block may\n",
-       "  be combined with \u001b[36mtry\u001b[39m and \u001b[36mfinally\u001b[39m as well. In this case the \u001b[36mfinally\u001b[39m block\n",
-       "  will run after \u001b[36mcatch\u001b[39m has handled the error."
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "?finally"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b2606c31-0971-4091-a110-b22588873197",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Julia Global (10 threads) 1.10",
-   "language": "julia",
-   "name": "julia-global-_10-threads_-1.10"
-  },
-  "language_info": {
-   "file_extension": ".jl",
-   "mimetype": "application/julia",
-   "name": "julia",
-   "version": "1.10.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/src/batch.jl b/src/batch.jl
index 3a5e1e1..b9ce3c6 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -48,6 +48,13 @@ struct BatchParam
     separate processes where each process has its own lattice with its 
     own TimeDependentParams.
   =#
+  function BatchParam(batch::AbstractArray)
+    if length(batch) == 1
+      error("Cannot make BatchParam with array of length 1")
+    end
+    return new(batch)
+  end
+  BatchParam(n::Number) = new(n)
 end
 
 struct _LoweredBatchParam{N,V<:AbstractArray}
@@ -213,6 +220,29 @@ function Base.literal_pow(::typeof(^), ba::BatchParam, ::Val{N}) where {N}
   return BatchParam(map(x->Base.literal_pow(^, x, Val{N}()), ba.batch))
 end
 
+atan2(bpa::BatchParam, bpb::BatchParam) = _batch_atan2(bpa.batch, bpb.batch)
+
+function _batch_atan2(batch_a, batch_b)
+  if batch_a isa Number
+    if batch_b isa Number
+      return BatchParam(atan2(batch_a, batch_b))
+    else
+      let a = batch_a
+        return BatchParam(map((bi)->atan2(a, bi), batch_b))
+      end
+    end
+  elseif batch_b isa Number
+    let b = batch_b
+      return BatchParam(map((ai)->atan2(ai, b), batch_a))
+    end
+  elseif length(batch_a) == length(batch_b)
+    return BatchParam(map((ai,bi)->atan2(ai, bi), batch_a, batch_b))
+  else
+    error("Cannot perform operation ^ with two non-scalar BatchParams of differing 
+            lengths (received lengths $(length(batch_a)) and $(length(batch_b))).")
+  end
+end
+
 Base.:+(b::BatchParam) = b # identity
 
 for t = (:-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv,
@@ -223,8 +253,6 @@ for t = (:-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :in
   end
 end
 
-atan2(b1::BatchParam, b2::BatchParam) = BatchParam(map((x,y)->atan2(x,y), b1.batch, b2.batch))
-
 for t = (:unit, :sincu, :sinhc, :sinhcu, :asinc, :asincu, :asinhc, :asinhcu, :erf, 
          :erfc, :erfcx, :erfi, :wf, :rect)
   @eval begin
@@ -272,26 +300,6 @@ static_batchcheck(::_LoweredBatchParam) = true
   return false
 end
 
-#=
-"""
-    lane2vec(lane::SIMD.VecRange{N}) 
-    
-Given a SIMD.VecRange, will return an equivalent SIMD.Vec that
-can be used in arithmetic operations for mapping integer indices
-of particles to a given element in a batch.
-"""
-function lane2vec(lane::SIMD.VecRange{N}) where {N}
-  # Try to match with vector register size, but 
-  # only up to UInt32 -> ~4.3 billion particles, 
-  # probably max on CPU...
-  if Int(pick_vector_width(UInt32)) == N
-    return SIMD.Vec{N,UInt32}(ntuple(i->lane.i+i-1, Val{N}()))
-  else
-    return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}()))
-  end
-end
-=#
-
 @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
diff --git a/src/kernel.jl b/src/kernel.jl
index 4bdf407..1ab8522 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -9,14 +9,6 @@ blank_kernel!(args...) = nothing
   kernel::K = blank_kernel!
   args::A   = ()
   function KernelCall(kernel, args)
-    
-    #=if args != ()
-      @show args
-      @show batch_lower(args)
-      @show typeof(batch_lower(args))
-      error("")
-    end=#
-    #return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args))
     _args = map(t->time_lower(batch_lower(t)), args)
     new{typeof(kernel),typeof(_args)}(kernel, _args)
   end 
@@ -77,11 +69,7 @@ _generic_kernel!(i, coords, kc) = __generic_kernel!(i, coords, kc.chain, kc.ref)
 end
 
 function process_batch_args(i, args)
-  #=    @show args
-    error("")=#
   if static_batchcheck(args) 
-    # beval(args, i)
-    #error("")
     return beval(args, i)
   else
     return args
diff --git a/test/batch_test.jl b/test/batch_test.jl
new file mode 100644
index 0000000..1c20b2d
--- /dev/null
+++ b/test/batch_test.jl
@@ -0,0 +1,167 @@
+using Beamlines
+using BeamTracking
+using Test
+using LinearAlgebra
+
+# Pushes 4 equal particles with 4 batched parameters
+# compares when particle is pushed through 4 different elements
+function test_batch(
+  batch_params, 
+  nonbatch_params=(;); 
+  v=nothing, 
+  q=nothing, 
+  extra_tests=(),
+)
+
+  if isnothing(v)
+    v = repeat((rand(1,6).-0.5)*1e-4, 4, 1) 
+  end
+  if isnothing(q)
+    q = repeat(mapslices(normalize, rand(1,4), dims=2), 4, 1)
+  end
+
+  E_ref = 5e9
+  species = Species("electron")
+
+  ele_1 = LineElement()
+  ele_2 = LineElement()
+  ele_3 = LineElement()
+  ele_4 = LineElement()
+  ele_batch = LineElement()
+  for (k,val) in pairs(batch_params)
+    val_1 = -val
+    val_2 = -0.5*val
+    val_3 = 0.5*val
+    val_4 = val
+    setproperty!(ele_1, k, val_1)
+    setproperty!(ele_2, k, val_2)
+    setproperty!(ele_3, k, val_3)
+    setproperty!(ele_4, k, val_4)
+    setproperty!(ele_batch, k, BatchParam([val_1, val_2, val_3, val_4]))
+  end
+  for (k,val) in pairs(nonbatch_params)
+    setproperty!(ele_1, k, val)
+    setproperty!(ele_2, k, val)
+    setproperty!(ele_3, k, val)
+    setproperty!(ele_4, k, val)
+    setproperty!(ele_batch, k, val)
+  end
+
+  bl_1 = Beamline([ele_1], E_ref=E_ref, species_ref=species)
+  bl_2 = Beamline([ele_2], E_ref=E_ref, species_ref=species)
+  bl_3 = Beamline([ele_3], E_ref=E_ref, species_ref=species)
+  bl_4 = Beamline([ele_4], E_ref=E_ref, species_ref=species)
+  bl_batch = Beamline([ele_batch], E_ref=E_ref, species_ref=species)
+
+  b0_1 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_1.p_over_q_ref, species=bl_1.species_ref)
+  b0_2 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_2.p_over_q_ref, species=bl_2.species_ref)
+  b0_3 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_3.p_over_q_ref, species=bl_3.species_ref)
+  b0_4 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_4.p_over_q_ref, species=bl_4.species_ref)
+
+  b0_batch = Bunch(v, q; p_over_q_ref=bl_batch.p_over_q_ref, species=bl_batch.species_ref)
+
+  track!(b0_1, bl_1)
+  track!(b0_2, bl_2)
+  track!(b0_3, bl_3)
+  track!(b0_4, bl_4)
+
+  # Ensure branchlessness of parameters with explicit SIMD
+  track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) 
+  
+  @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v
+  @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v
+  @test b0_batch.coords.v[3,:]' ≈ b0_3.coords.v
+  @test b0_batch.coords.v[4,:]' ≈ b0_4.coords.v
+
+  @test b0_batch.coords.q[1,:]' ≈ b0_1.coords.q
+  @test b0_batch.coords.q[2,:]' ≈ b0_2.coords.q
+  @test b0_batch.coords.q[3,:]' ≈ b0_3.coords.q
+  @test b0_batch.coords.q[4,:]' ≈ b0_4.coords.q
+  
+  for extra_test in extra_tests
+    @test extra_test(b0_plus, b0_minus, b0_time)
+  end
+end
+
+@testset "Batch" begin
+  # Test each of the splits: DKD, MKM, SKS, BKB
+  # MKM:
+  test_batch((;Kn1=0.36), (;L=0.5))
+  test_batch((;Kn1=0.36, Ks2=-1.2, Kn12=105.), (;L=3.4))
+
+  # SKS:
+  test_batch((;Ksol=1.2), (;L=3.4)) 
+  test_batch((;Ksol=0.23, Kn1=0.36, Ks2=-1.2, Kn12=105.), (;L=3.4))
+
+  # DKD:
+  test_batch((;Kn0=1e-2, Kn1=-2, Kn3=14), (;L=5.1))
+
+  # BKB not working at the moment because bend multipoles not 
+  # implemented and Ks0 (also stored) becomes a TimeDependentParam
+  # test_batch((;Kn0=1e-4, e1=2e-2, e2=3e-2), (;L=1.4))
+  
+  Kn1 = 0.36
+  Ks2 = -1.2
+  L = 3.4
+  Kn12 = 105.
+  Kn3 = 14
+  Kn0 = 1e-2
+  Ksol = -0.23
+  test_batch((;Kn1=Kn1), (;L=L))
+  test_batch((;Kn1=Kn1, Ks2=Ks2, Kn12=Kn12), (;L=L))
+
+  # SKS:
+  test_batch((;Ksol=Ksol), (;L=L))
+  test_batch((;Ksol=Ksol, Kn1=Kn1, Ks2=Ks2, Kn12=Kn12), (;L=L))
+
+  # DKD:
+  test_batch((;Kn0=Kn0, Kn1=Kn1, Kn3=Kn3), (;L=L))
+
+  # Now with different types of multipoles entered:
+  test_batch((;Bn1=Kn1), (;L=0.5))
+  test_batch((;Bn1L=Kn1, Ks2L=Ks2, Bn12=Kn12), (;L=L))
+
+  # SKS:
+  test_batch((;Bsol=Ksol), (;L=L))
+  test_batch((;BsolL=Ksol), (;L=L))
+  test_batch((;KsolL=Ksol, Bn1L=Kn1, Bs2=Ks2, Kn12L=Kn12), (;L=L))
+
+  # DKD:
+  test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=L))
+  test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=L))
+  test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=0))
+  test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=0))
+
+  #=
+  # Aperture:
+  # let's make a time-dependent aperture which oscillates but will allow both
+  # particles through
+  function check_state(p, m, t, state=BeamTracking.STATE_ALIVE)
+    if !all(p.coords.state .== m.coords.state .== t.coords.state .== state)
+      error("Test failed: p.coords.state = $(p.coords.state), m.coords.state = $(m.coords.state),
+      t.coords.state = $(t.coords.state), state = $state")
+    else
+      return true
+    end
+  end
+
+  test_batch(
+    (;x2_limit=1), 
+    (;aperture_shape = ApertureShape.Rectangular); 
+    v=[0.5 0 0 0 0 0],
+    ft=(v)->v*cos(2*Time()),
+    extra_tests=(check_state,)
+  )
+
+  # now one where all particle should die
+  test_batch(
+    (;x2_limit=1), 
+    (;aperture_shape = ApertureShape.Rectangular); 
+    v=[0.5 0 0 0 0 0],
+    ft=(v)->v*sin(2*Time()),
+    extra_tests=((p,m,t)->check_state(p,m,t,BeamTracking.STATE_LOST_POS_X),)
+  )
+
+=#
+  
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 7fefaea..9584a71 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -200,4 +200,5 @@ include("alignment_tracking_test.jl")
 include("aperture_tracking_test.jl")
 include("ExactTracking_test.jl")
 include("IntegrationTracking_test.jl")
-include("time_test.jl")
\ No newline at end of file
+include("time_test.jl")
+include("batch_test.jl")
\ No newline at end of file

From 2b433ab0dfcabf8e86ae1ca1d1ab678acf6f9f41 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 14:35:05 -0500
Subject: [PATCH 16/30] bump

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 2ddae00..3e0f888 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "BeamTracking"
 uuid = "8ef5c10a-4ca3-437f-8af5-b84d8af36df0"
 authors = ["mattsignorelli <mgs255@cornell.edu> and contributors"]
-version = "0.5.5"
+version = "0.5.6"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"

From 06d85ee092205401854ddd5289925c62c57e580e Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 14:58:49 -0500
Subject: [PATCH 17/30] linear algebra

---
 Project.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 3e0f888..ee70594 100644
--- a/Project.toml
+++ b/Project.toml
@@ -52,8 +52,9 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 GTPSA = "b27dd330-f138-47c5-815b-40db9dd9b6e8"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Distributions", "JET", "GTPSA", "BenchmarkTools", "Beamlines", "StaticArrays"]
+test = ["Test", "Distributions", "JET", "GTPSA", "BenchmarkTools", "Beamlines", "StaticArrays", "LinearAlgebra"]

From 0b75697bf7b11c92a03b83a0518917e6305ca914 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 16:06:37 -0500
Subject: [PATCH 18/30] maybe fix

---
 src/batch.jl | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/batch.jl b/src/batch.jl
index b9ce3c6..b4b30a3 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -303,8 +303,26 @@ end
 @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
-  modlane = SIMD.VecRange{N}(mod1(lane.i, B)) # mod start around batch
-  return b.batch[modlane]
+  m = rem(lane2vec(lane), B)
+  return b.batch[vifelse(m == 0, B, m)]
+end
+
+"""
+    lane2vec(lane::SIMD.VecRange{N}) 
+    
+Given a SIMD.VecRange, will return an equivalent SIMD.Vec that
+can be used in arithmetic operations for mapping integer indices
+of particles to a given element in a batch.
+"""
+function lane2vec(lane::SIMD.VecRange{N}) where {N}
+  # Try to match with vector register size, but 
+  # only up to UInt32 -> ~4.3 billion particles, 
+  # probably max on CPU...
+  if Int(pick_vector_width(UInt32)) == N
+    return SIMD.Vec{N,UInt32}(ntuple(i->lane.i+i-1, Val{N}()))
+  else
+    return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}()))
+  end
 end
 
 @inline beval(b, i) = b

From 34317bfd92ae3bfc0baf3f5c2d80a9db5cc1daba Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 16:30:47 -0500
Subject: [PATCH 19/30] remove inbounds

---
 src/kernel.jl                | 12 ++++++------
 src/kernels/rfcavity_kick.jl |  4 ++--
 src/kernels/yoshida.jl       | 16 ++++++++--------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/kernel.jl b/src/kernel.jl
index 1ab8522..3c321e4 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -203,9 +203,9 @@ macro makekernel(args...)
     if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds
       return quote
         @inline function $(fcn_name)($(args...))
-          @inbounds begin
+          #@inbounds begin
             $(body)
-          end
+          #end
         end
       end
     else # no inbounds
@@ -219,17 +219,17 @@ macro makekernel(args...)
     if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds
       return quote
         @inline function $(fcn_name)($(args...))
-          @inbounds begin @FastGTPSA begin
+          #@inbounds begin @FastGTPSA begin
             $(body)
-          end end
+          #end end
         end
       end
     else # no inbounds
       return quote
         @inline function $(fcn_name)($(args...))
-          @FastGTPSA begin
+          #@FastGTPSA begin
             $(body)
-          end 
+          #end 
         end
       end
 
diff --git a/src/kernels/rfcavity_kick.jl b/src/kernels/rfcavity_kick.jl
index 9e22898..dd56281 100644
--- a/src/kernels/rfcavity_kick.jl
+++ b/src/kernels/rfcavity_kick.jl
@@ -107,7 +107,7 @@ end
 
 
 function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm, kn, ks, L)
-  @FastGTPSA begin @inbounds begin
+  #@FastGTPSA begin @inbounds begin
     v = coords.v
     alive = (coords.state[i] == STATE_ALIVE)
     #r2 = v[i,XI]*v[i,XI] + v[i,YI]*v[i,YI]
@@ -152,7 +152,7 @@ function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm
     else
       omega = (ox, oy, oz)
     end
-  end end
+  #end end
   return omega
 end
 
diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl
index 4bed375..a6f77fb 100644
--- a/src/kernels/yoshida.jl
+++ b/src/kernels/yoshida.jl
@@ -3,7 +3,7 @@
 #
 
 @inline function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  @inbounds begin
+  #@inbounds begin
     if !isnothing(edge_params) && fringe_in
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
@@ -24,12 +24,12 @@
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
+  #end
 end
 
 
 @inline function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  @inbounds begin
+  #@inbounds begin
     w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step
     w1 =  1.3512071919596577718181151794851757586002349853515625*ds_step
     if !isnothing(edge_params) && fringe_in
@@ -54,12 +54,12 @@ end
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
+  #end
 end
 
 
 @inline function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  @inbounds begin
+  #@inbounds begin
     w0 =  1.315186320683911169737712043570355*ds_step
     w1 = -1.17767998417887100694641568096432*ds_step
     w2 =  0.235573213359358133684793182978535*ds_step
@@ -90,12 +90,12 @@ end
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
+  #end
 end
 
 
 @inline function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  @inbounds begin
+  #@inbounds begin
     w0 =  1.7084530707869978*ds_step
     w1 =  0.102799849391985*ds_step
     w2 = -1.96061023297549*ds_step
@@ -138,5 +138,5 @@ end
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  end
+  #end
 end
\ No newline at end of file

From 00b75f0d1313ce21ad53f88254cb824471c64d8e Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 16:31:23 -0500
Subject: [PATCH 20/30] make first test

---
 test/runtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 9584a71..3f791cb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -195,10 +195,10 @@ function quaternion_coeffs_approx_equal(q_expected, q_calculated, ϵ)
   return all_ok
 end
 
+include("batch_test.jl")
 include("BeamlinesExt_test.jl")
 include("alignment_tracking_test.jl")
 include("aperture_tracking_test.jl")
 include("ExactTracking_test.jl")
 include("IntegrationTracking_test.jl")
 include("time_test.jl")
-include("batch_test.jl")
\ No newline at end of file

From 9edd6ae2310451a151e10be9ef361f39fcced745 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 16:37:09 -0500
Subject: [PATCH 21/30] try no explicit SIMD

---
 test/batch_test.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/batch_test.jl b/test/batch_test.jl
index 1c20b2d..e96220b 100644
--- a/test/batch_test.jl
+++ b/test/batch_test.jl
@@ -66,7 +66,7 @@ function test_batch(
   track!(b0_4, bl_4)
 
   # Ensure branchlessness of parameters with explicit SIMD
-  track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) 
+  track!(b0_batch, bl_batch; use_explicit_SIMD=false, use_KA=true) 
   
   @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v
   @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v

From 99317cd50215021e4fc77d711305a77cb32026a4 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 16:52:52 -0500
Subject: [PATCH 22/30] put inbounds bck in

---
 src/kernel.jl          | 12 ++++++------
 src/kernels/yoshida.jl | 16 ++++++++--------
 test/runtests.jl       |  2 ++
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/kernel.jl b/src/kernel.jl
index 3c321e4..1ab8522 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -203,9 +203,9 @@ macro makekernel(args...)
     if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds
       return quote
         @inline function $(fcn_name)($(args...))
-          #@inbounds begin
+          @inbounds begin
             $(body)
-          #end
+          end
         end
       end
     else # no inbounds
@@ -219,17 +219,17 @@ macro makekernel(args...)
     if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds
       return quote
         @inline function $(fcn_name)($(args...))
-          #@inbounds begin @FastGTPSA begin
+          @inbounds begin @FastGTPSA begin
             $(body)
-          #end end
+          end end
         end
       end
     else # no inbounds
       return quote
         @inline function $(fcn_name)($(args...))
-          #@FastGTPSA begin
+          @FastGTPSA begin
             $(body)
-          #end 
+          end 
         end
       end
 
diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl
index a6f77fb..4bed375 100644
--- a/src/kernels/yoshida.jl
+++ b/src/kernels/yoshida.jl
@@ -3,7 +3,7 @@
 #
 
 @inline function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  #@inbounds begin
+  @inbounds begin
     if !isnothing(edge_params) && fringe_in
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1)
@@ -24,12 +24,12 @@
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  #end
+  end
 end
 
 
 @inline function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  #@inbounds begin
+  @inbounds begin
     w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step
     w1 =  1.3512071919596577718181151794851757586002349853515625*ds_step
     if !isnothing(edge_params) && fringe_in
@@ -54,12 +54,12 @@ end
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  #end
+  end
 end
 
 
 @inline function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  #@inbounds begin
+  @inbounds begin
     w0 =  1.315186320683911169737712043570355*ds_step
     w1 = -1.17767998417887100694641568096432*ds_step
     w2 =  0.235573213359358133684793182978535*ds_step
@@ -90,12 +90,12 @@ end
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  #end
+  end
 end
 
 
 @inline function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out}
-  #@inbounds begin
+  @inbounds begin
     w0 =  1.7084530707869978*ds_step
     w1 =  0.102799849391985*ds_step
     w2 = -1.96061023297549*ds_step
@@ -138,5 +138,5 @@ end
       a, tilde_m, Ksol, Kn0, e1, e2 = edge_params
       linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1)
     end
-  #end
+  end
 end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 3f791cb..5082bae 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,6 +15,8 @@ using BeamTracking: Coords, KernelCall, Q0, QX, QY, QZ, STATE_ALIVE, STATE_LOST,
       quat_mul, quat_rotate, gaussian_random
 using Beamlines: isactive
 
+@show BeamTracking.REGISTER_SIZE
+
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = false
 BenchmarkTools.DEFAULT_PARAMETERS.evals = 2
 

From 98dbce80f0ed77d80c0c634a6affa19f30b367b6 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 16:53:14 -0500
Subject: [PATCH 23/30] put inbounds back in

---
 src/kernels/rfcavity_kick.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/kernels/rfcavity_kick.jl b/src/kernels/rfcavity_kick.jl
index dd56281..9e22898 100644
--- a/src/kernels/rfcavity_kick.jl
+++ b/src/kernels/rfcavity_kick.jl
@@ -107,7 +107,7 @@ end
 
 
 function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm, kn, ks, L)
-  #@FastGTPSA begin @inbounds begin
+  @FastGTPSA begin @inbounds begin
     v = coords.v
     alive = (coords.state[i] == STATE_ALIVE)
     #r2 = v[i,XI]*v[i,XI] + v[i,YI]*v[i,YI]
@@ -152,7 +152,7 @@ function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm
     else
       omega = (ox, oy, oz)
     end
-  #end end
+  end end
   return omega
 end
 

From 006125c2c5468510507ae8e4298af390be235976 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 19:57:11 -0500
Subject: [PATCH 24/30] comments fix compiler bug >:(

---
 src/batch.jl       | 5 ++++-
 test/batch_test.jl | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/batch.jl b/src/batch.jl
index b4b30a3..619886d 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -304,7 +304,10 @@ end
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
   m = rem(lane2vec(lane), B)
-  return b.batch[vifelse(m == 0, B, m)]
+  i = vifelse(m == 0, B, m)
+  #@show i
+  #@show b.batch[i]
+  return b.batch[i]
 end
 
 """
diff --git a/test/batch_test.jl b/test/batch_test.jl
index e96220b..1c20b2d 100644
--- a/test/batch_test.jl
+++ b/test/batch_test.jl
@@ -66,7 +66,7 @@ function test_batch(
   track!(b0_4, bl_4)
 
   # Ensure branchlessness of parameters with explicit SIMD
-  track!(b0_batch, bl_batch; use_explicit_SIMD=false, use_KA=true) 
+  track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) 
   
   @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v
   @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v

From bf597bd0acf57f4f6a9d5e344021aa971b9ce132 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Thu, 5 Feb 2026 20:07:01 -0500
Subject: [PATCH 25/30] error if called on Linux or Windows <v1.11

---
 src/batch.jl       | 5 +++++
 test/batch_test.jl | 9 ++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/batch.jl b/src/batch.jl
index 619886d..4cf2a46 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -303,6 +303,11 @@ end
 @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
+  @static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
+    error("LLVM has a compiler bug with batch parameter explicit SIMD on Julia 
+           versions < 1.11 AND a Windows or Linux operating system. To get around 
+           this, specify the track! flags: use_explicit_SIMD=false, use_KA=true.")
+  end
   m = rem(lane2vec(lane), B)
   i = vifelse(m == 0, B, m)
   #@show i
diff --git a/test/batch_test.jl b/test/batch_test.jl
index 1c20b2d..cf48b9f 100644
--- a/test/batch_test.jl
+++ b/test/batch_test.jl
@@ -66,7 +66,14 @@ function test_batch(
   track!(b0_4, bl_4)
 
   # Ensure branchlessness of parameters with explicit SIMD
-  track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) 
+  if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
+    use_explicit_SIMD=false
+    use_KA=true
+  else
+    use_explicit_SIMD=true
+    use_KA=false
+  end
+  track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD, use_KA=use_KA) 
   
   @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v
   @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v

From 1d442e6ace1e9a53926dbac7057a4e2af3d3adc4 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Fri, 6 Feb 2026 09:50:20 -0500
Subject: [PATCH 26/30] check if x64 or aarch problem with lts

---
 .github/workflows/CI.yml | 12 ++++++++++++
 src/batch.jl             |  6 +++---
 test/batch_test.jl       | 12 +++++-------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index b86cccf..109317c 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -40,6 +40,18 @@ jobs:
             arch: aarch64
           - os: macos-latest
             arch: x64
+        include:
+          - os: ubuntu-24.04-arm
+            version: 'lts'
+            arch: aarch64
+        include:
+          - os: ubuntu-24.04-arm
+            version: '1.11'
+            arch: aarch64
+        include:
+          - os: ubuntu-24.04-arm
+            version: '1.12'
+            arch: aarch64
     steps:
       - uses: actions/checkout@v6
       - uses: julia-actions/setup-julia@v2
diff --git a/src/batch.jl b/src/batch.jl
index 4cf2a46..e75700f 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -303,11 +303,11 @@ end
 @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
-  @static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
+  #=@static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
     error("LLVM has a compiler bug with batch parameter explicit SIMD on Julia 
            versions < 1.11 AND a Windows or Linux operating system. To get around 
-           this, specify the track! flags: use_explicit_SIMD=false, use_KA=true.")
-  end
+           this, specify the `track!` keyword argument: `use_explicit_SIMD=false`")
+  end=#
   m = rem(lane2vec(lane), B)
   i = vifelse(m == 0, B, m)
   #@show i
diff --git a/test/batch_test.jl b/test/batch_test.jl
index cf48b9f..e7eb35c 100644
--- a/test/batch_test.jl
+++ b/test/batch_test.jl
@@ -66,14 +66,12 @@ function test_batch(
   track!(b0_4, bl_4)
 
   # Ensure branchlessness of parameters with explicit SIMD
-  if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
-    use_explicit_SIMD=false
-    use_KA=true
-  else
+  #if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
+  #  use_explicit_SIMD=false
+ # else
     use_explicit_SIMD=true
-    use_KA=false
-  end
-  track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD, use_KA=use_KA) 
+ #end
+  track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD) 
   
   @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v
   @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v

From 453e4370987205f47c9985fa4439045adbe7b640 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Fri, 6 Feb 2026 09:56:44 -0500
Subject: [PATCH 27/30] fix CI

---
 .github/workflows/CI.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 109317c..a3e09da 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -44,11 +44,9 @@ jobs:
           - os: ubuntu-24.04-arm
             version: 'lts'
             arch: aarch64
-        include:
           - os: ubuntu-24.04-arm
             version: '1.11'
             arch: aarch64
-        include:
           - os: ubuntu-24.04-arm
             version: '1.12'
             arch: aarch64

From 2bf74e03df8830fe388bed83d2c062c7325e7adc Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Fri, 6 Feb 2026 10:02:43 -0500
Subject: [PATCH 28/30] check time test

---
 src/batch.jl       | 13 ++++++-------
 test/batch_test.jl |  8 ++++----
 test/runtests.jl   |  3 ++-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/batch.jl b/src/batch.jl
index e75700f..c04fb59 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -303,15 +303,14 @@ end
 @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
-  #=@static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
-    error("LLVM has a compiler bug with batch parameter explicit SIMD on Julia 
-           versions < 1.11 AND a Windows or Linux operating system. To get around 
-           this, specify the `track!` keyword argument: `use_explicit_SIMD=false`")
-  end=#
+  @static if (VERSION < v"1.11" && Sys.ARCH == :x64)
+    error("Julia's explicit SIMD.jl has a compiler bug that appears with batch 
+           parameters on versions < 1.11 AND an x64 bit architecture, which we 
+           detected that you have. To get around this, specify the `track!` 
+           keyword argument `use_explicit_SIMD=false`")
+  end
   m = rem(lane2vec(lane), B)
   i = vifelse(m == 0, B, m)
-  #@show i
-  #@show b.batch[i]
   return b.batch[i]
 end
 
diff --git a/test/batch_test.jl b/test/batch_test.jl
index e7eb35c..e76685f 100644
--- a/test/batch_test.jl
+++ b/test/batch_test.jl
@@ -66,11 +66,11 @@ function test_batch(
   track!(b0_4, bl_4)
 
   # Ensure branchlessness of parameters with explicit SIMD
-  #if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux()))
-  #  use_explicit_SIMD=false
- # else
+  if (VERSION < v"1.11" && Sys.ARCH == :x64)
+    use_explicit_SIMD=false
+  else
     use_explicit_SIMD=true
- #end
+  end
   track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD) 
   
   @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v
diff --git a/test/runtests.jl b/test/runtests.jl
index 5082bae..b72231d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -198,9 +198,10 @@ function quaternion_coeffs_approx_equal(q_expected, q_calculated, ϵ)
 end
 
 include("batch_test.jl")
+include("time_test.jl")
 include("BeamlinesExt_test.jl")
 include("alignment_tracking_test.jl")
 include("aperture_tracking_test.jl")
 include("ExactTracking_test.jl")
 include("IntegrationTracking_test.jl")
-include("time_test.jl")
+

From eee00739dfc792ac8a4cfde26c1af5cddca265d6 Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Fri, 6 Feb 2026 10:24:11 -0500
Subject: [PATCH 29/30] show arch in tests

---
 test/runtests.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index b72231d..46935da 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -16,6 +16,7 @@ using BeamTracking: Coords, KernelCall, Q0, QX, QY, QZ, STATE_ALIVE, STATE_LOST,
 using Beamlines: isactive
 
 @show BeamTracking.REGISTER_SIZE
+@show Sys.ARCH
 
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = false
 BenchmarkTools.DEFAULT_PARAMETERS.evals = 2

From 86d5b3d8e16d33f15c34b7caa890e3739599191a Mon Sep 17 00:00:00 2001
From: mattsignorelli <mgs255@cornell.edu>
Date: Fri, 6 Feb 2026 13:56:47 -0500
Subject: [PATCH 30/30] check if fixed

---
 src/batch.jl       | 4 ++--
 src/kernel.jl      | 2 +-
 test/batch_test.jl | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/batch.jl b/src/batch.jl
index c04fb59..c9331b4 100644
--- a/src/batch.jl
+++ b/src/batch.jl
@@ -303,9 +303,9 @@ end
 @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)]
 
 @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N}
-  @static if (VERSION < v"1.11" && Sys.ARCH == :x64)
+  @static if (VERSION < v"1.11" && Sys.ARCH == :x86_64)
     error("Julia's explicit SIMD.jl has a compiler bug that appears with batch 
-           parameters on versions < 1.11 AND an x64 bit architecture, which we 
+           parameters on versions < 1.11 AND an x86_64 bit architecture, which we 
            detected that you have. To get around this, specify the `track!` 
            keyword argument `use_explicit_SIMD=false`")
   end
diff --git a/src/kernel.jl b/src/kernel.jl
index 1ab8522..3898b9b 100644
--- a/src/kernel.jl
+++ b/src/kernel.jl
@@ -95,7 +95,7 @@ end
   groupsize::Union{Nothing,Integer}=nothing, #backend isa CPU ? floor(Int,REGISTER_SIZE/sizeof(eltype(v))) : 256 
   multithread_threshold::Integer=Threads.nthreads() > 1 ? 1750*Threads.nthreads() : typemax(Int),
   use_KA::Bool=!(get_backend(coords.v) isa CPU && isnothing(groupsize)),
-  use_explicit_SIMD::Bool=!use_KA && (@static VERSION < v"1.11" || Sys.ARCH != :aarch64) # Default to use explicit SIMD on CPU, excepts for Macs above LTS bc SIMD.jl bug
+  use_explicit_SIMD::Bool=!use_KA #&& (@static VERSION < v"1.11" || Sys.ARCH != :aarch64) # Default to use explicit SIMD on CPU, excepts for Macs above LTS bc SIMD.jl bug
 ) where {V}
   v = coords.v
   N_particle = size(v, 1)
diff --git a/test/batch_test.jl b/test/batch_test.jl
index e76685f..ee4b71c 100644
--- a/test/batch_test.jl
+++ b/test/batch_test.jl
@@ -66,7 +66,7 @@ function test_batch(
   track!(b0_4, bl_4)
 
   # Ensure branchlessness of parameters with explicit SIMD
-  if (VERSION < v"1.11" && Sys.ARCH == :x64)
+  if (VERSION < v"1.11" && Sys.ARCH == :x86_64)
     use_explicit_SIMD=false
   else
     use_explicit_SIMD=true