From 3e598c84ed665f69ffe5741d0917d61af1dbb5b7 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 10:34:52 -0500 Subject: [PATCH 01/30] start --- src/batch.jl | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/kernel.jl | 1 - src/time.jl | 2 +- 3 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 src/batch.jl diff --git a/src/batch.jl b/src/batch.jl new file mode 100644 index 0000000..3310067 --- /dev/null +++ b/src/batch.jl @@ -0,0 +1,211 @@ +#= + +BatchParam is very similar to TimeDependentParam, but instead +of storing a function, it stores an arbitrary array of parameters. + +Given a batch = [k1, k2, k3], batch parameters are seen by the particles as + +Particle 1: k1 +Particle 2: k2 +Particle 3: k3 +Particle 4: k1 +Particle 5: k2 +Particle 6: k3 + +etc. + +Just like time, there are two types for batches - one type unstable +generic wrapper for an AbstractArray/Number which is manipulated at the +highest level, and a lowered representation where the numbers are made +numbers and arrays are kept but the length of the array is stored in +the type. The reason for this is so that scalars are not unnecessarily +represented as large arrays, to save memory both outside and inside the +kernel. The goal is that for large, batched simulations the type instability +of the UNPACKING step (simulation step is always type stable) is outweighted +by the simulation step. + +The lowered type does NOT have any arithmetic operations defined on it, as +it should be untouched after lowering and in the kernel evaluated for each +particle. + +=# +struct BatchParam + batch::Union{AbstractArray,Number} + #= + Currently, batches of TimeDependentParam are not supported + It is essentially impossible on the GPU because accessing an + array of functions is not type stable (every particle has its + own function). The alternative - a time function which returns a + big array - is also not possible for CPU-SIMD and would run into + memory problems on both CPU and GPU depending on how large that + static array is. + + On the CPU, the first option may be doable using FunctionWrappers, + but that would require special handling because current Time + uses bona-fide Julia functions for GPU compatibility. + + If one would like to do this right now, they should just start up + separate processes where each process has its own lattice with its + own TimeDependentParams. + =# +end + +struct _LoweredBatchParam{N,V<:AbstractArray} + batch::V + _LoweredBatchParam(batch::AbstractArray) = new{length(batch),typeof(batch)}(batch) + _LoweredBatchParam{N}(batch::AbstractArray) where {N} = new{N,typeof(batch)}(batch) +end + +# Necessary for GPU compatibility if batch is a GPU array +function Adapt.adapt_structure(to, lbp::_LoweredBatchParam{N}) where {N} + batch = Adapt.adapt_structure(to, lbp.batch) + return _LoweredBatchParam{N,typeof(batch)}(batch) +end + +# BatchParam will act like a number +# Conversion of types to BatchParam +BatchParam(a::BatchParam) = a + +# Make these apply via convert +Base.convert(::Type{BatchParam}, a::Number) = BatchParam(a) # Scalar BatchParam +Base.convert(::Type{BatchParam}, a::BatchParam) = a + +Base.zero(b::BatchParam) = BatchParam(zero(first(b.batch))) +Base.one(b::BatchParam) = BatchParam(one(first(b.batch))) + +# Now define the math operations: +for op in (:+,:-,:*,:/,:^) + @eval begin + Base.$op(ba::BatchParam, b::Number) = (let b = b; return BatchParam(map(x->$op(x, b), ba.batch)); end) + Base.$op(a::Number, bb::BatchParam) = (let a = a; return BatchParam(map(x->$op(a, x), bb.batch)); end) + function Base.$op(ba::BatchParam, bb::BatchParam) + if !(ba.batch isa Number) && !(bb.batch isa Number) && length(ba.batch) != length(bb.batch) + error("Cannot perform operation $($op) with two non-scalar BatchParams of differing + lengths (received lengths $(length(ba.batch)) and $(length(bb.batch))).") + end + return BatchParam(map((x,y)->$op(x, y), ba.batch, bb.batch)) + end + end +end + +function Base.literal_pow(::typeof(^), ba::BatchParam, ::Val{N}) where {N} + return BatchParam(map(x->Base.literal_pow(^, x, Val{N}()), ba.batch)) +end + +for t = (:+, :-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv, + :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, :float, + :csch, :acsc, :acsch, :sec, :sech, :asec, :asech, :conj, :log10, :isnan, :sign, :abs) + @eval begin + Base.$t(b::BatchParam) = BatchParam(map(x->($t)(x), b.batch)) + end +end + +atan2(b1::BatchParam, b2::BatchParam) = BatchParam(map((x,y)->atan2(x,y), b1.batch, b2.batch)) + +for t = (:unit, :sincu, :sinhc, :sinhcu, :asinc, :asincu, :asinhc, :asinhcu, :erf, + :erfc, :erfcx, :erfi, :wf, :rect) + @eval begin + GTPSA.$t(b::BatchParam) = BatchParam(map(x->($t)(x), b.batch)) + end +end + + +Base.promote_rule(::Type{BatchParam}, ::Type{U}) where {U<:Number} = BatchParam +Base.broadcastable(o::BatchParam) = Ref(o) + +Base.isapprox(b::BatchParam, n::Number; kwargs...) = all(x->isapprox(x, n, kwargs...), b.batch) +Base.isapprox(n::Number, b::BatchParam; kwargs...) = all(x->isapprox(n, x, kwargs...), b.batch) +Base.:(==)(b::BatchParam, n::Number) = all(x->x == n, b.batch) +Base.:(==)(n::Number, b::BatchParam) = all(x->n == x, b.batch) +Base.isinf(b::BatchParam) = all(x->isinf(x), b.batch) + +#= +@inline teval(f::TimeFunction, t) = f(t) +@inline teval(f, t) = f + +# === THIS BLOCK WAS WRITTEN BY CLAUDE === +# Generated function for arbitrary-length tuples +@generated function teval(f::T, t) where {T<:Tuple} + N = length(T.parameters) + if N == 0 + return :(()) + end + # Use getfield with literal integer arguments + exprs = [:(teval(Base.getfield(f, $i), t)) for i in 1:N] + return :(tuple($(exprs...))) +end +# === END CLAUDE === +=# + +# Batch lowering should convert types to _LoweredBatchParam +function batch_lower(b::BatchParam) + if b.batch isa AbstractArray + return _LoweredBatchParam(b.batch) + else + return b.batch + end +end + +batch_lower(tp::TimeDependentParam) = tp.f +batch_lower(tp) = tp +# We can use map on the CPU, but not the GPU. This step of time_lower-ing is on +# the CPU and we are already type unstable here anyways, so we should do this. +time_lower(tp::T) where {T<:Tuple} = map(ti->time_lower(ti), tp) + +# Arrays MUST be converted into tuples, for SIMD +time_lower(tp::SArray{N,TimeDependentParam}) where {N} = time_lower(Tuple(tp)) + +static_timecheck(tp) = false +static_timecheck(::TimeFunction) = true +@unroll function static_timecheck(t::Tuple) + @unroll for ti in t + if static_timecheck(ti) + return true + end + end + return false +end +#static_timecheck(tp::T) where {T<:Tuple} = Val{any(t->static_timecheck(t) isa Val{true}, tp)}() + + + +batch_lower(b) = b +batch_lower(b::Tuple) = map(bi->batch_lower(bi), b) +function batch_lower(ba::SArray{N,BatchParam}) where {N} + f = Tuple(map(bi->bi.batch, ba)) + return TimeFunction(t->SArray{N}(map(fi->fi(t), f))) +end +time_lower(tp::SArray{N,Any}) where {N} = time_lower(TimeDependentParam.(tp)) + +static_timecheck(tp) = false +static_timecheck(::TimeFunction) = true +@unroll function static_timecheck(t::Tuple) + @unroll for ti in t + if static_timecheck(ti) + return true + end + end + return false +end + +""" + lane2vec(lane::SIMD.VecRange{N}) + +Given a SIMD.VecRange, will return an equivalent SIMD.Vec that +can be used in arithmetic operations for mapping integer indices +of particles to a given element in a batch. +""" +function lane2vec(lane::SIMD.VecRange{N}) where {N} + # Try to match with vector register size, but + # only up to UInt32 -> ~4.3 billion particles, + # probably max on CPU... + if Int(pick_vector_width(UInt32)) == N + return SIMD.Vec{N,UInt32}(ntuple(i->lane.i+i-1, Val{N}())) + else + return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}())) + end +end + +@inline teval(f::TimeFunction, t) = f(t) +@inline teval(f, t) = f +@inline teval(f::Tuple, t) = map(ti->teval(ti, t), f) diff --git a/src/kernel.jl b/src/kernel.jl index 07794e1..3fba566 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -64,7 +64,6 @@ end function process_args(i, coords, args, ref) if !isnothing(ref) && static_timecheck(args) let t = compute_time(coords.v[i,ZI], coords.v[i,PZI], ref) - new_args = map(arg->teval(arg, t), args) return map(arg->teval(arg, t), args) end else diff --git a/src/time.jl b/src/time.jl index 82c57f0..b7c2e9e 100644 --- a/src/time.jl +++ b/src/time.jl @@ -46,7 +46,7 @@ function Base.literal_pow(::typeof(^), da::TimeDependentParam, ::Val{N}) where { end for t = (:+, :-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv, - :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, + :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, :float, :csch, :acsc, :acsch, :sec, :sech, :asec, :asech, :conj, :log10, :isnan, :sign, :abs) @eval begin Base.$t(d::TimeDependentParam) = (let f = d.f; return TimeDependentParam((t)-> ($t)(f(t))); end) From 0515cb10bbe746d8089842673e682c5ca1bd1ba7 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 10:51:28 -0500 Subject: [PATCH 02/30] first batch --- src/batch.jl | 81 ++++++++++++++++++---------------------------------- 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/src/batch.jl b/src/batch.jl index 3310067..2ed5b27 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -111,6 +111,8 @@ end Base.promote_rule(::Type{BatchParam}, ::Type{U}) where {U<:Number} = BatchParam +Base.promote_rule(::Type{BatchParam}, ::Type{TimeDependentParam}) = error("Unable to combine BatchParams with TimeDependentParams") +Base.promote_rule(::Type{TimeDependentParam}, ::Type{BatchParam}) = error("Unable to combine BatchParams with TimeDependentParams") Base.broadcastable(o::BatchParam) = Ref(o) Base.isapprox(b::BatchParam, n::Number; kwargs...) = all(x->isapprox(x, n, kwargs...), b.batch) @@ -119,69 +121,28 @@ Base.:(==)(b::BatchParam, n::Number) = all(x->x == n, b.batch) Base.:(==)(n::Number, b::BatchParam) = all(x->n == x, b.batch) Base.isinf(b::BatchParam) = all(x->isinf(x), b.batch) -#= -@inline teval(f::TimeFunction, t) = f(t) -@inline teval(f, t) = f - -# === THIS BLOCK WAS WRITTEN BY CLAUDE === -# Generated function for arbitrary-length tuples -@generated function teval(f::T, t) where {T<:Tuple} - N = length(T.parameters) - if N == 0 - return :(()) - end - # Use getfield with literal integer arguments - exprs = [:(teval(Base.getfield(f, $i), t)) for i in 1:N] - return :(tuple($(exprs...))) -end -# === END CLAUDE === -=# - # Batch lowering should convert types to _LoweredBatchParam function batch_lower(b::BatchParam) if b.batch isa AbstractArray - return _LoweredBatchParam(b.batch) + return _LoweredBatchParam(b.batch) # Only arrays are lowered to batchparams else return b.batch end end -batch_lower(tp::TimeDependentParam) = tp.f -batch_lower(tp) = tp -# We can use map on the CPU, but not the GPU. This step of time_lower-ing is on +batch_lower(bp) = bp +# We can use map on the CPU, but not the GPU. This step of batch_lower-ing is on # the CPU and we are already type unstable here anyways, so we should do this. -time_lower(tp::T) where {T<:Tuple} = map(ti->time_lower(ti), tp) +batch_lower(bp::T) where {T<:Tuple} = map(bi->batch_lower(bi), bp) # Arrays MUST be converted into tuples, for SIMD -time_lower(tp::SArray{N,TimeDependentParam}) where {N} = time_lower(Tuple(tp)) - -static_timecheck(tp) = false -static_timecheck(::TimeFunction) = true -@unroll function static_timecheck(t::Tuple) - @unroll for ti in t - if static_timecheck(ti) - return true - end - end - return false -end -#static_timecheck(tp::T) where {T<:Tuple} = Val{any(t->static_timecheck(t) isa Val{true}, tp)}() - +batch_lower(bp::SArray{N,BatchParam}) where {N} = batch_lower(Tuple(bp)) - -batch_lower(b) = b -batch_lower(b::Tuple) = map(bi->batch_lower(bi), b) -function batch_lower(ba::SArray{N,BatchParam}) where {N} - f = Tuple(map(bi->bi.batch, ba)) - return TimeFunction(t->SArray{N}(map(fi->fi(t), f))) -end -time_lower(tp::SArray{N,Any}) where {N} = time_lower(TimeDependentParam.(tp)) - -static_timecheck(tp) = false -static_timecheck(::TimeFunction) = true -@unroll function static_timecheck(t::Tuple) +static_batchcheck(bp) = false +static_batchcheck(::BatchParam) = true +@unroll function static_batchcheck(t::Tuple) @unroll for ti in t - if static_timecheck(ti) + if static_batchcheck(ti) return true end end @@ -206,6 +167,20 @@ function lane2vec(lane::SIMD.VecRange{N}) where {N} end end -@inline teval(f::TimeFunction, t) = f(t) -@inline teval(f, t) = f -@inline teval(f::Tuple, t) = map(ti->teval(ti, t), f) +lane2vec(i) = i + +@inline beval(b::_LoweredBatchParam, i) = b.batch[lane2vec(i)] +@inline beval(b, i) = b + +# === THIS BLOCK WAS WRITTEN BY CLAUDE === +# Generated function for arbitrary-length tuples +@generated function beval(f::T, t) where {T<:Tuple} + N = length(T.parameters) + if N == 0 + return :(()) + end + # Use getfield with literal integer arguments + exprs = [:(beval(Base.getfield(f, $i), t)) for i in 1:N] + return :(tuple($(exprs...))) +end +# === END CLAUDE === \ No newline at end of file From e298f27c2133cd72078d7c199aca5d0c0c25c231 Mon Sep 17 00:00:00 2001 From: Joseph Devlin Date: Wed, 4 Feb 2026 11:54:35 -0500 Subject: [PATCH 03/30] Fringe locations --- ext/BeamTrackingBeamlinesExt/exact.jl | 12 +++- ext/BeamTrackingBeamlinesExt/yoshida.jl | 85 ++++++++++++++++--------- src/BeamTracking.jl | 1 + src/kernels/yoshida.jl | 28 ++++---- src/tracking_methods.jl | 22 +++++-- src/utils/quaternions.jl | 2 +- test/BeamlinesExt_test.jl | 28 ++++---- test/IntegrationTracking_test.jl | 4 +- 8 files changed, 116 insertions(+), 66 deletions(-) diff --git a/ext/BeamTrackingBeamlinesExt/exact.jl b/ext/BeamTrackingBeamlinesExt/exact.jl index 5150dc0..42ac898 100644 --- a/ext/BeamTrackingBeamlinesExt/exact.jl +++ b/ext/BeamTrackingBeamlinesExt/exact.jl @@ -76,8 +76,16 @@ end @inline function thick_bend_pure_bdipole(tm::Exact, bunch, bendparams, bm1, L) g = bendparams.g_ref tilt = bendparams.tilt_ref - e1 = bendparams.e1 - e2 = bendparams.e2 + if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == EntranceEnd + e1 = bendparams.e1 + else + e1 = 0 + end + if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == ExitEnd + e2 = bendparams.e2 + else + e2 = 0 + end w = rot_quaternion(0,0,-tilt) w_inv = inv_rot_quaternion(0,0,-tilt) theta = g * L diff --git a/ext/BeamTrackingBeamlinesExt/yoshida.jl b/ext/BeamTrackingBeamlinesExt/yoshida.jl index 416006f..79c63df 100644 --- a/ext/BeamTrackingBeamlinesExt/yoshida.jl +++ b/ext/BeamTrackingBeamlinesExt/yoshida.jl @@ -10,13 +10,13 @@ ds_step = L / num_steps end if order == 2 - return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L)) + return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) elseif order == 4 - return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L)) + return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) elseif order == 6 - return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L)) + return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) elseif order == 8 - return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, L)) + return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) end end @@ -81,7 +81,7 @@ end q = chargeof(bunch.species) mc2 = massof(bunch.species) a = gyromagnetic_anomaly(bunch.species) - edge_params = ifelse(tm.fringe_on, (a, tilde_m, Ksol, 0, 0, 0), nothing) + edge_params = (a, tilde_m, Ksol, 0, 0, 0) E0 = mc2/tilde_m/beta_0 params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, Ksol, mm, kn, ks) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing) @@ -97,39 +97,48 @@ end Ksol = kn[1] q = chargeof(bunch.species) mc2 = massof(bunch.species) + a = gyromagnetic_anomaly(bunch.species) + Kn0 = ifelse(mm[2] == 1, kn[2], 0) + edge_params = (a, tilde_m, Ksol, Kn0, 0, 0) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), Ksol, mm, kn, ks) + params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, Ksol, mm, kn, ks) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing) - return integration_launcher(BeamTracking.sks_multipole!, params, photon_params, tm, nothing, L) + return integration_launcher(BeamTracking.sks_multipole!, params, photon_params, tm, edge_params, L) end -@inline function thick_pure_bdipole(tm::DriftKick, bunch, bm, L) +@inline function thick_pure_bdipole(tm::Union{Yoshida,DriftKick}, bunch, bm, L) p_over_q_ref = bunch.p_over_q_ref tilde_m, gamsqr_0, beta_0 = BeamTracking.drift_params(bunch.species, p_over_q_ref) mm = bm.order kn, ks = get_strengths(bm, L, p_over_q_ref) q = chargeof(bunch.species) mc2 = massof(bunch.species) + a = gyromagnetic_anomaly(bunch.species) + Kn0 = ifelse(mm == 1, kn, 0) + edge_params = (a, tilde_m, 0, Kn0, 0, 0) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), SA[mm], SA[kn], SA[ks]) + params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, SA[mm], SA[kn], SA[ks]) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, SA[mm], SA[kn], SA[ks]), nothing) - return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, nothing, L) + return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, edge_params, L) end -@inline function thick_bdipole(tm::DriftKick, bunch, bm, L) +@inline function thick_bdipole(tm::Union{Yoshida,DriftKick}, bunch, bm, L) p_over_q_ref = bunch.p_over_q_ref tilde_m, gamsqr_0, beta_0 = BeamTracking.drift_params(bunch.species, p_over_q_ref) mm = bm.order kn, ks = get_strengths(bm, L, p_over_q_ref) q = chargeof(bunch.species) mc2 = massof(bunch.species) + a = gyromagnetic_anomaly(bunch.species) + Kn0 = ifelse(mm[1] == 1, kn[1], 0) + edge_params = (a, tilde_m, 0, Kn0, 0, 0) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), mm, kn, ks) + params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, mm, kn, ks) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing) - return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, nothing, L) + return integration_launcher(BeamTracking.dkd_multipole!, params, photon_params, tm, edge_params, L) end -@inline function thick_pure_bdipole(tm::Union{Yoshida,BendKick}, bunch, bm1, L) +@inline function thick_pure_bdipole(tm::BendKick, bunch, bm1, L) if isnothing(bunch.coords.q) && !(tm.radiation_damping_on || tm.radiation_fluctuations_on) return thick_pure_bdipole(Exact(), bunch, bm1, L) else @@ -144,9 +153,11 @@ end q = chargeof(bunch.species) mc2 = massof(bunch.species) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, gyromagnetic_anomaly(bunch.species), 0, w, w_inv, k0, SA[mm], SA[kn], SA[ks]) + a = gyromagnetic_anomaly(bunch.species) + edge_params = (a, tilde_m, 0, k0, 0, 0) + params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, a, 0, w, w_inv, k0, SA[mm], SA[kn], SA[ks]) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, SA[mm], SA[kn], SA[ks]), nothing) - return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, nothing, L) + return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, edge_params, L) end end @@ -161,10 +172,12 @@ end w_inv = inv_rot_quaternion(0,0,tilt) q = chargeof(bunch.species) mc2 = massof(bunch.species) + a = gyromagnetic_anomaly(bunch.species) + edge_params = (a, tilde_m, 0, k0, 0, 0) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, gyromagnetic_anomaly(bunch.species), 0, w, w_inv, k0, mm, kn, ks) + params = (q, mc2, tm.radiation_damping_on, tilde_m, beta_0, a, 0, w, w_inv, k0, mm, kn, ks) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing) - return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, nothing, L) + return integration_launcher(BeamTracking.bkb_multipole!, params, photon_params, tm, edge_params, L) end @inline function thick_bdipole(tm::MatrixKick, bunch, bm, L) @@ -185,18 +198,12 @@ end w_inv = inv_rot_quaternion(0,0,tilt) q = chargeof(bunch.species) mc2 = massof(bunch.species) + a = gyromagnetic_anomaly(bunch.species) + edge_params = (a, tilde_m, 0, kn[1], 0, 0) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, gyromagnetic_anomaly(bunch.species), w, w_inv, k1, mm, kn, ks) + params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, a, w, w_inv, k1, mm, kn, ks) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing) - return integration_launcher(BeamTracking.mkm_quadrupole!, params, photon_params, tm, nothing, L) -end - -@inline function thick_bdipole(tm::Yoshida, bunch, bm, L) - if bm.order[2] == 2 - return thick_bdipole(MatrixKick(order=tm.order, num_steps=tm.num_steps, ds_step=tm.ds_step, radiation_damping_on=tm.radiation_damping_on, radiation_fluctuations_on=tm.radiation_fluctuations_on), bunch, bm, L) - else - return thick_bdipole(BendKick(order=tm.order, num_steps=tm.num_steps, ds_step=tm.ds_step, radiation_damping_on=tm.radiation_damping_on, radiation_fluctuations_on=tm.radiation_fluctuations_on), bunch, bm, L) - end + return integration_launcher(BeamTracking.mkm_quadrupole!, params, photon_params, tm, edge_params, L) end @inline function thick_pure_bquadrupole(tm::Union{Yoshida,MatrixKick}, bunch, bm, L) @@ -272,7 +279,7 @@ end w = rot_quaternion(0,0,tilt) w_inv = inv_rot_quaternion(0,0,tilt) a = gyromagnetic_anomaly(bunch.species) - edge_params = ifelse(tm.fringe_on, (a, tilde_m, 0, Kn0, e1, e2), nothing) + edge_params = (a, tilde_m, 0, Kn0, e1, e2) q = chargeof(bunch.species) mc2 = massof(bunch.species) E0 = mc2/tilde_m/beta_0 @@ -313,8 +320,24 @@ end p0c = BeamTracking.R_to_pc(bunch.species, p_over_q_ref) q = chargeof(bunch.species) mc2 = massof(bunch.species) + if mm[1] == 0 + Ksol = kn[1] + if length(mm) > 1 && mm[2] == 1 + Kn0 = kn[2] + else + Kn0 = 0 + end + elseif mm[1] == 1 + Ksol = 0 + Kn0 = kn[1] + else + Ksol = 0 + Kn0 = 0 + end + a = gyromagnetic_anomaly(bunch.species) + edge_params = (a, tilde_m, Ksol, Kn0, 0, 0) E0 = mc2/tilde_m/beta_0 - params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, E_ref, p0c, gyromagnetic_anomaly(bunch.species), omega, t0, E0_over_Rref, mm, kn, ks) + params = (q, mc2, tm.radiation_damping_on, beta_0, gamsqr_0, tilde_m, E_ref, p0c, a, omega, t0, E0_over_Rref, mm, kn, ks) photon_params = ifelse(tm.radiation_fluctuations_on, (q, mc2, E0, 0, 0, mm, kn, ks), nothing) - return integration_launcher(BeamTracking.cavity!, params, photon_params, tm, nothing, L) + return integration_launcher(BeamTracking.cavity!, params, photon_params, tm, edge_params, L) end diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl index cb00b02..51edb88 100644 --- a/src/BeamTracking.jl +++ b/src/BeamTracking.jl @@ -19,6 +19,7 @@ import GTPSA: sincu, sinhcu export Bunch, State, ParticleView, Time, TimeDependentParam export Yoshida, Yoshida, MatrixKick, BendKick, SolenoidKick, DriftKick, Exact +export NoEnd, BothEnds, EntranceEnd, ExitEnd export track! diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl index 6bc1df3..e1e60fa 100644 --- a/src/kernels/yoshida.jl +++ b/src/kernels/yoshida.jl @@ -2,8 +2,8 @@ # =============== I N T E G R A T O R S =============== # -@makekernel fastgtpsa=true function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L) - if !isnothing(edge_params) +@makekernel fastgtpsa=true function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) end @@ -19,16 +19,17 @@ if !isnothing(photon_params) stochastic_radiation!(i, coords, photon_params..., ds_step / 2) end - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end -@makekernel fastgtpsa=true function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L) +@makekernel fastgtpsa=true function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step w1 = 1.3512071919596577718181151794851757586002349853515625*ds_step - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) end @@ -46,18 +47,19 @@ end if !isnothing(photon_params) stochastic_radiation!(i, coords, photon_params..., ds_step / 2) end - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end -@makekernel fastgtpsa=true function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L) +@makekernel fastgtpsa=true function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) w0 = 1.315186320683911169737712043570355*ds_step w1 = -1.17767998417887100694641568096432*ds_step w2 = 0.235573213359358133684793182978535*ds_step w3 = 0.784513610477557263819497633866351*ds_step - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) end @@ -79,13 +81,14 @@ end if !isnothing(photon_params) stochastic_radiation!(i, coords, photon_params..., ds_step / 2) end - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end -@makekernel fastgtpsa=true function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, L) +@makekernel fastgtpsa=true function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) w0 = 1.7084530707869978*ds_step w1 = 0.102799849391985*ds_step w2 = -1.96061023297549*ds_step @@ -94,7 +97,7 @@ end w5 = -1.44485223686048*ds_step w6 = 0.253693336566229*ds_step w7 = 0.914844246229740*ds_step - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) end @@ -124,7 +127,8 @@ end if !isnothing(photon_params) stochastic_radiation!(i, coords, photon_params..., ds_step / 2) end - if !isnothing(edge_params) + if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end \ No newline at end of file diff --git a/src/tracking_methods.jl b/src/tracking_methods.jl index 483aea7..8fa06a6 100644 --- a/src/tracking_methods.jl +++ b/src/tracking_methods.jl @@ -1,3 +1,11 @@ +# ========== Fringe =========================== +abstract type AbstractFringeAt end +struct NoEnd <: AbstractFringeAt end +struct BothEnds <: AbstractFringeAt end +struct EntranceEnd <: AbstractFringeAt end +struct ExitEnd <: AbstractFringeAt end + + # ========== Yoshida =========================== abstract type AbstractYoshida end macro def_integrator_struct(name) @@ -8,9 +16,9 @@ macro def_integrator_struct(name) ds_step::Float64 radiation_damping_on::Bool radiation_fluctuations_on::Bool - fringe_on::Bool + fringe_at::AbstractFringeAt - function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_on::Bool=true) + function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_at::AbstractFringeAt=BothEnds()) _order = order _num_steps = num_steps _ds_step = ds_step @@ -27,7 +35,7 @@ macro def_integrator_struct(name) elseif _ds_step > 0 _num_steps = -1 end - return new(_order, _num_steps, _ds_step, radiation_damping_on, radiation_fluctuations_on, fringe_on) + return new(_order, _num_steps, _ds_step, radiation_damping_on, radiation_fluctuations_on, fringe_at) end end end @@ -41,4 +49,10 @@ end # ========== Exact =========================== -struct Exact end \ No newline at end of file +struct Exact + fringe_at::AbstractFringeAt + + function Exact(; fringe_at::AbstractFringeAt=BothEnds()) + return new(fringe_at) + end +end \ No newline at end of file diff --git a/src/utils/quaternions.jl b/src/utils/quaternions.jl index cbd44ea..cec8e0a 100644 --- a/src/utils/quaternions.jl +++ b/src/utils/quaternions.jl @@ -31,7 +31,7 @@ function sincos_quaternion(x::TPS{T}) where {T} #sq = one(x) # Using FastGTPSA! for the following makes other kernels run out of temps @FastGTPSA begin - if x < 0.1 + if x < ε #0.1 while !(conv_sin && conv_cos) && N < N_max y = -y*x/((2*N)*(2*N - 1)) result_sin = prev_sin + y/(2*N + 1) diff --git a/test/BeamlinesExt_test.jl b/test/BeamlinesExt_test.jl index c95df85..1941210 100644 --- a/test/BeamlinesExt_test.jl +++ b/test/BeamlinesExt_test.jl @@ -421,7 +421,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-14) # Pure bend: - ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_on=false)) + ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd())) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -432,7 +432,7 @@ || b0.coords.q ≈ -[0.99999555887473 0.00000197685011 0.00297918168991 0.00008187412527]) # Pure solenoid: - ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_on=false)) + ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -444,7 +444,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Solenoid with quadrupole: - ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_on=false)) + ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -456,7 +456,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # SK multiple steps: - ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_on=false)) + ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -468,7 +468,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Step size: - ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_on=false)) + ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -480,7 +480,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Straight pure dipole (DK): - ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2)) + ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -492,7 +492,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Straight pure dipole (BK): - ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10)) + ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -504,7 +504,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6) # Straight dipole with quadrupole (DK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2)) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -516,7 +516,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Straight dipole with quadrupole (BK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10)) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -528,7 +528,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6) # Straight dipole with quadrupole (MK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10)) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd())) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -612,7 +612,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-7) # Quadrupole with dipole and sextupole (MK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10)) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10, fringe_at=NoEnd())) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -745,7 +745,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-7) # With solenoid (RK4): - ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2)) + ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2, fringe_at=NoEnd())) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -769,7 +769,7 @@ @test b0.coords.q ≈ q_expected || b0.coords.q ≈ -q_expected # With solenoid and quadrupole: - ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20)) + ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20, fringe_at=NoEnd())) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -859,7 +859,7 @@ b0 = Bunch([0.4 0.4 0.4 0.4 0.4 -0.5], [1.0 0.0 0.0 0.0], p_over_q_ref=p_over_q_ref, species=Species("electron")) v_init = copy(b0.coords.v) q_init = copy(b0.coords.q) - ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick()) + ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick(fringe_at=NoEnd())) track!(b0, Beamline([ele_dipole], p_over_q_ref=p_over_q_ref)) @test b0.coords.state[1] == STATE_LOST @test v_init == b0.coords.v diff --git a/test/IntegrationTracking_test.jl b/test/IntegrationTracking_test.jl index bbdf52a..90e4262 100644 --- a/test/IntegrationTracking_test.jl +++ b/test/IntegrationTracking_test.jl @@ -83,7 +83,7 @@ ker = BeamTracking.bkb_multipole! num_steps = 10 ds_step = T(0.2) - return ker, params, nothing, ds_step, num_steps, nothing, L + return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L end function integrator_args(::Type{T}) where {T} @@ -105,7 +105,7 @@ ker = BeamTracking.dkd_multipole! num_steps = 1 ds_step = T(2) - return ker, params, nothing, ds_step, num_steps, nothing, L + return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L end function cavity_args(::Type{T}) where {T} From d876b7062ec80b0aa40c7ea56e1fb596b44b3e6e Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 13:37:18 -0500 Subject: [PATCH 04/30] work --- src/BeamTracking.jl | 1 + src/batch.jl | 28 ++++++++++++++++++++++++++-- src/kernel.jl | 26 ++++++++++++++++++++++---- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl index cb00b02..c9ae6ac 100644 --- a/src/BeamTracking.jl +++ b/src/BeamTracking.jl @@ -30,6 +30,7 @@ include("utils/z_to_time.jl") include("types.jl") include("time.jl") +include("batch.jl") include("kernel.jl") include("tracking_methods.jl") diff --git a/src/batch.jl b/src/batch.jl index 2ed5b27..6375732 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -78,13 +78,37 @@ for op in (:+,:-,:*,:/,:^) @eval begin Base.$op(ba::BatchParam, b::Number) = (let b = b; return BatchParam(map(x->$op(x, b), ba.batch)); end) Base.$op(a::Number, bb::BatchParam) = (let a = a; return BatchParam(map(x->$op(a, x), bb.batch)); end) + function Base.$op(ba::BatchParam, bb::BatchParam) - if !(ba.batch isa Number) && !(bb.batch isa Number) && length(ba.batch) != length(bb.batch) + if ba.batch isa Number + if bb.batch isa Number + return BatchParam($op(ba.batch, bb.batch)) + else + let a = ba.batch + # WE HAVE TO WRITE THIS BY HAND FOR EACH OP!!! + if a ≈ 0 # So that e.g. big arrays aren't made for skew strengths when only normal is batch + return BatchParam(0f0) + else + return BatchParam(map((bbi)->$op(a, bbi), bb.batch)) + end + end + end + elseif bb.batch isa Number + let b = bb.batch + if b ≈ 0 + return BatchParam(0f0) + else + return BatchParam(map((bai)->$op(bai, b), ba.batch)) + end + end + elseif length(ba.batch) == length(bb.batch) + return BatchParam(map((bai,bbi)->$op(bai, bbi), ba.batch, bb.batch)) + else error("Cannot perform operation $($op) with two non-scalar BatchParams of differing lengths (received lengths $(length(ba.batch)) and $(length(bb.batch))).") end - return BatchParam(map((x,y)->$op(x, y), ba.batch, bb.batch)) end + end end diff --git a/src/kernel.jl b/src/kernel.jl index 3fba566..b664a5f 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -9,7 +9,12 @@ blank_kernel!(args...) = nothing kernel::K = blank_kernel! args::A = () function KernelCall(kernel, args) - _args = map(t->time_lower(t), args) + if args != () + @show args + @show batch_lower(args) + error("") + end + _args = map(t->time_lower(batch_lower(t)), args) new{typeof(kernel),typeof(_args)}(kernel, _args) end end @@ -55,16 +60,29 @@ _generic_kernel!(i, coords, kc) = __generic_kernel!(i, coords, kc.chain, kc.ref) @unroll function __generic_kernel!(i, coords::Coords, chain, ref) @unroll for kcall in chain - args = process_args(i, coords, kcall.args, ref) + bargs = process_batch_args(i, kcall.args) + args = process_time_args(i, coords, bargs, ref) (kcall.kernel)(i, coords, args...) end return nothing end -function process_args(i, coords, args, ref) +function process_batch_args(i, args) + @show args + error("") + if static_batchcheck(args) + # beval(args, i) + #error("") + return beval(args, i) + else + return args + end +end + +function process_time_args(i, coords, args, ref) if !isnothing(ref) && static_timecheck(args) let t = compute_time(coords.v[i,ZI], coords.v[i,PZI], ref) - return map(arg->teval(arg, t), args) + return teval(args, t) end else return args From 95bcadf3c023d183411d78d0620be2342728a823 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 14:26:52 -0500 Subject: [PATCH 05/30] make enums --- Project.toml | 3 +- ext/BeamTrackingBeamlinesExt/exact.jl | 4 +- ext/BeamTrackingBeamlinesExt/utils.jl | 18 ++ ext/BeamTrackingBeamlinesExt/yoshida.jl | 10 +- src/BeamTracking.jl | 5 +- src/kernels/yoshida.jl | 232 ++++++++++++------------ src/tracking_methods.jl | 15 +- test/BeamlinesExt_test.jl | 28 +-- test/IntegrationTracking_test.jl | 4 +- 9 files changed, 172 insertions(+), 147 deletions(-) diff --git a/Project.toml b/Project.toml index 6ee4eba..5c924d5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,12 +1,13 @@ name = "BeamTracking" uuid = "8ef5c10a-4ca3-437f-8af5-b84d8af36df0" authors = ["mattsignorelli and contributors"] -version = "0.5.1" +version = "0.5.5" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" AtomicAndPhysicalConstants = "5c0d271c-5419-4163-b387-496237733d8b" +EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" GTPSA = "b27dd330-f138-47c5-815b-40db9dd9b6e8" HostCPUFeatures = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" diff --git a/ext/BeamTrackingBeamlinesExt/exact.jl b/ext/BeamTrackingBeamlinesExt/exact.jl index 42ac898..1c09d8e 100644 --- a/ext/BeamTrackingBeamlinesExt/exact.jl +++ b/ext/BeamTrackingBeamlinesExt/exact.jl @@ -76,12 +76,12 @@ end @inline function thick_bend_pure_bdipole(tm::Exact, bunch, bendparams, bm1, L) g = bendparams.g_ref tilt = bendparams.tilt_ref - if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == EntranceEnd + if tm.fringe_at == Fringe.BothEnds || tm.fringe_at == Fringe.EntranceEnd e1 = bendparams.e1 else e1 = 0 end - if typeof(tm.fringe_at) == BothEnds || typeof(tm.fringe_at) == ExitEnd + if tm.fringe_at == Fringe.BothEnds || tm.fringe_at == Fringe.ExitEnd e2 = bendparams.e2 else e2 = 0 diff --git a/ext/BeamTrackingBeamlinesExt/utils.jl b/ext/BeamTrackingBeamlinesExt/utils.jl index 0072bd6..f47bfa3 100644 --- a/ext/BeamTrackingBeamlinesExt/utils.jl +++ b/ext/BeamTrackingBeamlinesExt/utils.jl @@ -169,4 +169,22 @@ function rf_phi0(rfparams) else error("RF parameter zero_phase value not set correctly.") end +end + +#--------------------------------------------------------------------------------------------------- + +function fringe_in(f::Fringe.T) + if f == Fringe.BothEnds || f == Fringe.EntranceEnd + return Val{true}() + else + return Val{false}() + end +end + +function fringe_out(f::Fringe.T) + if f == Fringe.BothEnds || f == Fringe.ExitEnd + return Val{true}() + else + return Val{false}() + end end \ No newline at end of file diff --git a/ext/BeamTrackingBeamlinesExt/yoshida.jl b/ext/BeamTrackingBeamlinesExt/yoshida.jl index 79c63df..76e4efc 100644 --- a/ext/BeamTrackingBeamlinesExt/yoshida.jl +++ b/ext/BeamTrackingBeamlinesExt/yoshida.jl @@ -9,14 +9,16 @@ num_steps = Int(ceil(L / ds_step)) ds_step = L / num_steps end + fin = fringe_in(tm.fringe_at) + fout = fringe_out(tm.fringe_at) if order == 2 - return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) + return KernelCall(BeamTracking.order_two_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L)) elseif order == 4 - return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) + return KernelCall(BeamTracking.order_four_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L)) elseif order == 6 - return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) + return KernelCall(BeamTracking.order_six_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L)) elseif order == 8 - return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, tm.fringe_at, L)) + return KernelCall(BeamTracking.order_eight_integrator!, (ker, params, photon_params, ds_step, num_steps, edge_params, fin, fout, L)) end end diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl index 51edb88..07117b0 100644 --- a/src/BeamTracking.jl +++ b/src/BeamTracking.jl @@ -11,7 +11,8 @@ using GTPSA, Accessors, SpecialFunctions, AtomicAndPhysicalConstants, - Random + Random, + EnumX using KernelAbstractions @@ -19,7 +20,7 @@ import GTPSA: sincu, sinhcu export Bunch, State, ParticleView, Time, TimeDependentParam export Yoshida, Yoshida, MatrixKick, BendKick, SolenoidKick, DriftKick, Exact -export NoEnd, BothEnds, EntranceEnd, ExitEnd +export Fringe export track! diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl index e1e60fa..4bed375 100644 --- a/src/kernels/yoshida.jl +++ b/src/kernels/yoshida.jl @@ -2,133 +2,141 @@ # =============== I N T E G R A T O R S =============== # -@makekernel fastgtpsa=true function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - for step in 1:num_steps - ker(i, coords, params..., ds_step) - if !isnothing(photon_params) && (step < num_steps) - stochastic_radiation!(i, coords, photon_params..., ds_step) +@inline function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} + @inbounds begin + if !isnothing(edge_params) && fringe_in + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + for step in 1:num_steps + ker(i, coords, params..., ds_step) + if !isnothing(photon_params) && (step < num_steps) + stochastic_radiation!(i, coords, photon_params..., ds_step) + end + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + if !isnothing(edge_params) && fringe_out + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end -@makekernel fastgtpsa=true function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) - w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step - w1 = 1.3512071919596577718181151794851757586002349853515625*ds_step - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - for step in 1:num_steps - ker(i, coords, params..., w1) - ker(i, coords, params..., w0) - ker(i, coords, params..., w1) - if !isnothing(photon_params) && (step < num_steps) - stochastic_radiation!(i, coords, photon_params..., ds_step) +@inline function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} + @inbounds begin + w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step + w1 = 1.3512071919596577718181151794851757586002349853515625*ds_step + if !isnothing(edge_params) && fringe_in + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + for step in 1:num_steps + ker(i, coords, params..., w1) + ker(i, coords, params..., w0) + ker(i, coords, params..., w1) + if !isnothing(photon_params) && (step < num_steps) + stochastic_radiation!(i, coords, photon_params..., ds_step) + end + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + if !isnothing(edge_params) && fringe_out + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end -@makekernel fastgtpsa=true function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) - w0 = 1.315186320683911169737712043570355*ds_step - w1 = -1.17767998417887100694641568096432*ds_step - w2 = 0.235573213359358133684793182978535*ds_step - w3 = 0.784513610477557263819497633866351*ds_step - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - for step in 1:num_steps - ker(i, coords, params..., w3) - ker(i, coords, params..., w2) - ker(i, coords, params..., w1) - ker(i, coords, params..., w0) - ker(i, coords, params..., w1) - ker(i, coords, params..., w2) - ker(i, coords, params..., w3) - if !isnothing(photon_params) && (step < num_steps) - stochastic_radiation!(i, coords, photon_params..., ds_step) +@inline function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} + @inbounds begin + w0 = 1.315186320683911169737712043570355*ds_step + w1 = -1.17767998417887100694641568096432*ds_step + w2 = 0.235573213359358133684793182978535*ds_step + w3 = 0.784513610477557263819497633866351*ds_step + if !isnothing(edge_params) && fringe_in + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + for step in 1:num_steps + ker(i, coords, params..., w3) + ker(i, coords, params..., w2) + ker(i, coords, params..., w1) + ker(i, coords, params..., w0) + ker(i, coords, params..., w1) + ker(i, coords, params..., w2) + ker(i, coords, params..., w3) + if !isnothing(photon_params) && (step < num_steps) + stochastic_radiation!(i, coords, photon_params..., ds_step) + end + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + if !isnothing(edge_params) && fringe_out + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end -@makekernel fastgtpsa=true function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, fringe_at, L) - w0 = 1.7084530707869978*ds_step - w1 = 0.102799849391985*ds_step - w2 = -1.96061023297549*ds_step - w3 = 1.93813913762276*ds_step - w4 = -0.158240635368243*ds_step - w5 = -1.44485223686048*ds_step - w6 = 0.253693336566229*ds_step - w7 = 0.914844246229740*ds_step - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == EntranceEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - for step in 1:num_steps - ker(i, coords, params..., w7) - ker(i, coords, params..., w6) - ker(i, coords, params..., w5) - ker(i, coords, params..., w4) - ker(i, coords, params..., w3) - ker(i, coords, params..., w2) - ker(i, coords, params..., w1) - ker(i, coords, params..., w0) - ker(i, coords, params..., w1) - ker(i, coords, params..., w2) - ker(i, coords, params..., w3) - ker(i, coords, params..., w4) - ker(i, coords, params..., w5) - ker(i, coords, params..., w6) - ker(i, coords, params..., w7) - if !isnothing(photon_params) && (step < num_steps) - stochastic_radiation!(i, coords, photon_params..., ds_step) +@inline function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} + @inbounds begin + w0 = 1.7084530707869978*ds_step + w1 = 0.102799849391985*ds_step + w2 = -1.96061023297549*ds_step + w3 = 1.93813913762276*ds_step + w4 = -0.158240635368243*ds_step + w5 = -1.44485223686048*ds_step + w6 = 0.253693336566229*ds_step + w7 = 0.914844246229740*ds_step + if !isnothing(edge_params) && fringe_in + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + for step in 1:num_steps + ker(i, coords, params..., w7) + ker(i, coords, params..., w6) + ker(i, coords, params..., w5) + ker(i, coords, params..., w4) + ker(i, coords, params..., w3) + ker(i, coords, params..., w2) + ker(i, coords, params..., w1) + ker(i, coords, params..., w0) + ker(i, coords, params..., w1) + ker(i, coords, params..., w2) + ker(i, coords, params..., w3) + ker(i, coords, params..., w4) + ker(i, coords, params..., w5) + ker(i, coords, params..., w6) + ker(i, coords, params..., w7) + if !isnothing(photon_params) && (step < num_steps) + stochastic_radiation!(i, coords, photon_params..., ds_step) + end + end + if !isnothing(photon_params) + stochastic_radiation!(i, coords, photon_params..., ds_step / 2) + end + if !isnothing(edge_params) && fringe_out + a, tilde_m, Ksol, Kn0, e1, e2 = edge_params + linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end - if !isnothing(photon_params) - stochastic_radiation!(i, coords, photon_params..., ds_step / 2) - end - if !isnothing(edge_params) && (typeof(fringe_at) == BothEnds || typeof(fringe_at) == ExitEnd) - a, tilde_m, Ksol, Kn0, e1, e2 = edge_params - linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end end \ No newline at end of file diff --git a/src/tracking_methods.jl b/src/tracking_methods.jl index 8fa06a6..b3d7034 100644 --- a/src/tracking_methods.jl +++ b/src/tracking_methods.jl @@ -1,10 +1,5 @@ # ========== Fringe =========================== -abstract type AbstractFringeAt end -struct NoEnd <: AbstractFringeAt end -struct BothEnds <: AbstractFringeAt end -struct EntranceEnd <: AbstractFringeAt end -struct ExitEnd <: AbstractFringeAt end - +@enumx Fringe NoEnd BothEnds EntranceEnd ExitEnd # ========== Yoshida =========================== abstract type AbstractYoshida end @@ -16,9 +11,9 @@ macro def_integrator_struct(name) ds_step::Float64 radiation_damping_on::Bool radiation_fluctuations_on::Bool - fringe_at::AbstractFringeAt + fringe_at::Fringe.T - function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_at::AbstractFringeAt=BothEnds()) + function $(esc(name))(; order::Int=4, num_steps::Int=-1, ds_step::Float64=-1.0, radiation_damping_on::Bool=false, radiation_fluctuations_on::Bool=false, fringe_at::Fringe.T=Fringe.BothEnds) _order = order _num_steps = num_steps _ds_step = ds_step @@ -50,9 +45,9 @@ end # ========== Exact =========================== struct Exact - fringe_at::AbstractFringeAt + fringe_at::Fringe.T - function Exact(; fringe_at::AbstractFringeAt=BothEnds()) + function Exact(; fringe_at::Fringe.T=Fringe.BothEnds) return new(fringe_at) end end \ No newline at end of file diff --git a/test/BeamlinesExt_test.jl b/test/BeamlinesExt_test.jl index 1941210..4abd74f 100644 --- a/test/BeamlinesExt_test.jl +++ b/test/BeamlinesExt_test.jl @@ -421,7 +421,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-14) # Pure bend: - ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd())) + ele = LineElement(L=2.0, g=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=Fringe.NoEnd)) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -432,7 +432,7 @@ || b0.coords.q ≈ -[0.99999555887473 0.00000197685011 0.00297918168991 0.00008187412527]) # Pure solenoid: - ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_at=NoEnd())) + ele = LineElement(L=1.0, Ksol=2.0, tracking_method=Yoshida(order=2, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -444,7 +444,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Solenoid with quadrupole: - ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=2, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -456,7 +456,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # SK multiple steps: - ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, num_steps=2, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -468,7 +468,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Step size: - ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Ksol=0.1, Kn1=0.1, tracking_method=SolenoidKick(order=4, ds_step=1.0, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -480,7 +480,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Straight pure dipole (DK): - ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Kn0=0.1, tilt0=pi/3, tracking_method=DriftKick(order=2, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -492,7 +492,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Straight pure dipole (BK): - ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Kn0=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -504,7 +504,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6) # Straight dipole with quadrupole (DK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.03, tracking_method=DriftKick(order=2, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -516,7 +516,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 6e-9) # Straight dipole with quadrupole (BK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=BendKick(order=6, num_steps=10, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -528,7 +528,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-6) # Straight dipole with quadrupole (MK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.1, tracking_method=Yoshida(order=6, num_steps=10, fringe_at=Fringe.NoEnd)) v = collect(transpose(@vars(D10))) q = TPS64{D10}[1 0 0 0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -612,7 +612,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 2e-7) # Quadrupole with dipole and sextupole (MK): - ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10, fringe_at=NoEnd())) + ele = LineElement(L=2.0, Kn0=0.1, Kn1=0.2, Kn2=0.3, tracking_method=MatrixKick(order=6, num_steps=10, fringe_at=Fringe.NoEnd)) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -745,7 +745,7 @@ @test quaternion_coeffs_approx_equal(q_expected, q_z, 1e-7) # With solenoid (RK4): - ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2, fringe_at=NoEnd())) + ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=0.6, tracking_method=Yoshida(order=6, num_steps=2, fringe_at=Fringe.NoEnd)) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -769,7 +769,7 @@ @test b0.coords.q ≈ q_expected || b0.coords.q ≈ -q_expected # With solenoid and quadrupole: - ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20, fringe_at=NoEnd())) + ele = LineElement(L=4.01667, voltage=3321.0942126011, rf_frequency=591142.68014977, Ksol=-0.3, Kn1=0.15, tracking_method=Yoshida(order=6, num_steps=20, fringe_at=Fringe.NoEnd)) v = [0.01 0.02 0.03 0.04 0.05 0.06] q = [1.0 0.0 0.0 0.0] b0 = Bunch(v, q, p_over_q_ref=p_over_q_ref, species=Species("electron")) @@ -859,7 +859,7 @@ b0 = Bunch([0.4 0.4 0.4 0.4 0.4 -0.5], [1.0 0.0 0.0 0.0], p_over_q_ref=p_over_q_ref, species=Species("electron")) v_init = copy(b0.coords.v) q_init = copy(b0.coords.q) - ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick(fringe_at=NoEnd())) + ele_dipole = LineElement(L=1.0, Kn0=1e-8, Kn1=1e-8, tracking_method=BendKick(fringe_at=Fringe.NoEnd)) track!(b0, Beamline([ele_dipole], p_over_q_ref=p_over_q_ref)) @test b0.coords.state[1] == STATE_LOST @test v_init == b0.coords.v diff --git a/test/IntegrationTracking_test.jl b/test/IntegrationTracking_test.jl index 90e4262..f07dcba 100644 --- a/test/IntegrationTracking_test.jl +++ b/test/IntegrationTracking_test.jl @@ -83,7 +83,7 @@ ker = BeamTracking.bkb_multipole! num_steps = 10 ds_step = T(0.2) - return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L + return ker, params, nothing, ds_step, num_steps, nothing, Val{false}(), Val{false}(), L end function integrator_args(::Type{T}) where {T} @@ -105,7 +105,7 @@ ker = BeamTracking.dkd_multipole! num_steps = 1 ds_step = T(2) - return ker, params, nothing, ds_step, num_steps, nothing, NoEnd(), L + return ker, params, nothing, ds_step, num_steps, nothing, Val{false}(), Val{false}(), L end function cavity_args(::Type{T}) where {T} From df489c236fb7d50044591bcc362daa3cfb160608 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 14:32:19 -0500 Subject: [PATCH 06/30] @eles->@elements --- test/lattices/alignment_lat.jl | 2 +- test/lattices/aperture_lat.jl | 2 +- test/lattices/esr.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/lattices/alignment_lat.jl b/test/lattices/alignment_lat.jl index 356c104..45eb861 100644 --- a/test/lattices/alignment_lat.jl +++ b/test/lattices/alignment_lat.jl @@ -1,7 +1,7 @@ using BeamTracking using Beamlines -@eles begin +@elements begin drift1 = Drift(L = 2.0, x_offset = 0.1, y_offset = 0.2, z_offset = 0.3, x_rot = 0.04, y_rot = 0.05, tilt = 0.06, tracking_method = Exact()) diff --git a/test/lattices/aperture_lat.jl b/test/lattices/aperture_lat.jl index 557bee1..8814784 100644 --- a/test/lattices/aperture_lat.jl +++ b/test/lattices/aperture_lat.jl @@ -1,7 +1,7 @@ using BeamTracking using Beamlines -@eles begin +@elements begin d_error = LineElement(L=1, x1_limit=1, dx=5) d1_rect = Drift(L=1, x1_limit = 1, x2_limit = 2, y1_limit = 3, y2_limit = 5, aperture_shape = ApertureShape.Rectangular, aperture_active = false) diff --git a/test/lattices/esr.jl b/test/lattices/esr.jl index a9d3622..01f6444 100644 --- a/test/lattices/esr.jl +++ b/test/lattices/esr.jl @@ -1,6 +1,6 @@ using BeamTracking, Beamlines -@eles begin +@elements begin IP6__1 = Marker() D000001__1 = Drift( L = 5.3) Q1ER_6 = Quadrupole( L = 1.8, Kn1 = -0.2291420342) From cc685cf86bd1560b2d73aef2603370ab6dbe0916 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 15:33:17 -0500 Subject: [PATCH 07/30] CUDA ext --- Project.toml | 3 +++ ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl | 13 +++++++++++++ src/kernels/radiation.jl | 2 +- src/utils/math_simd.jl | 10 +++------- 4 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl diff --git a/Project.toml b/Project.toml index 5c924d5..2ddae00 100644 --- a/Project.toml +++ b/Project.toml @@ -22,15 +22,18 @@ Unrolled = "9602ed7d-8fef-5bc8-8597-8f21381861e8" [weakdeps] Beamlines = "5bb90b03-0719-46b8-8ce4-1ef3afd3cd4b" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [extensions] BeamTrackingBeamlinesExt = "Beamlines" +BeamTrackingCUDAExt = "CUDA" [compat] Accessors = "0.1.42" Adapt = "4.3.0" AtomicAndPhysicalConstants = "0.8.0" Beamlines = "0.8.0" +CUDA = "5.9" GTPSA = "1.5.3" HostCPUFeatures = "0.1" KernelAbstractions = "0.9.35" diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl new file mode 100644 index 0000000..7ab9873 --- /dev/null +++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl @@ -0,0 +1,13 @@ +module BeamTrackingCUDAExt +using CUDA: CuArray +import BeamTracking: gaussian_random + +function gaussian_random(::CuArray, sigma1, sigma2) + s, c = sincospi(2 * rand()) + t = sqrt(-2 * log(rand())) + z0 = c*t*sigma1 + z1 = s*t*sigma2 + return z0, z1 +end + +end \ No newline at end of file diff --git a/src/kernels/radiation.jl b/src/kernels/radiation.jl index f9bcd20..acbead4 100644 --- a/src/kernels/radiation.jl +++ b/src/kernels/radiation.jl @@ -160,7 +160,7 @@ end sigma2_1 = one(sigma2) sigma = sqrt(vifelse(alive, sigma2, sigma2_1)) - dpz, theta = gaussian_random(sigma, sqrt(13/55)/gamma) + dpz, theta = gaussian_random(v, sigma, sqrt(13/55)/gamma) s, c = sincos(theta) b_perp_hat_x = vifelse(b_perp > 0, b_perp_x / b_perp, 0) diff --git a/src/utils/math_simd.jl b/src/utils/math_simd.jl index 6ccbf19..211985c 100644 --- a/src/utils/math_simd.jl +++ b/src/utils/math_simd.jl @@ -165,18 +165,14 @@ compiler bug, but CUDA.rand seems to be ok. Nonetheless this may also give CPU performance benefits with radiation because we already need to compute two randn's anyways. """ -function gaussian_random(sigma1, sigma2) - s, c = sincospi(2 * rand()) - t = sqrt(-2 * log(rand())) - z0 = c*t*sigma1 - z1 = s*t*sigma2 - return z0, z1 +function gaussian_random(::Matrix, sigma1, sigma2) + return randn()*sigma1, randn()*sigma2 end """ See gaussian_random, but for SIMD vectors. """ -function gaussian_random(sigma1::SIMD.Vec, sigma2::SIMD.Vec) +function gaussian_random(::Matrix, sigma1::SIMD.Vec, sigma2::SIMD.Vec) return SIMDMathFunctions.vmap(gaussian_random, sigma1, sigma2) end From aefac4b0f7603eb395559740a699b210293e5a45 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Wed, 4 Feb 2026 16:45:44 -0500 Subject: [PATCH 08/30] tests --- .../BeamlinesExt/beamlines_stochastic_test.jl | 69 +------------------ 1 file changed, 1 insertion(+), 68 deletions(-) diff --git a/test/BeamlinesExt/beamlines_stochastic_test.jl b/test/BeamlinesExt/beamlines_stochastic_test.jl index 9bf854b..e96a96c 100644 --- a/test/BeamlinesExt/beamlines_stochastic_test.jl +++ b/test/BeamlinesExt/beamlines_stochastic_test.jl @@ -3,6 +3,7 @@ using Random @testset "Stochastic radiation" begin Random.seed!(0) + # Here just check that they don't bug out p_over_q_ref = BeamTracking.E_to_R(Species("electron"), 18e9) bend = SBend(g = 0.01, L = 2.0, tracking_method = Yoshida(order = 2, num_steps = 1, @@ -13,109 +14,41 @@ using Random b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.04889165969752571 - 0.021174046860678353 - 0.10556186702264685 - 0.03999912146491459 - 0.04761050793826471 - 0.05997673560385752]' - bend.tracking_method = Yoshida(order = 2, num_steps = 2, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.04889193258818915 - 0.021174302497216673 - 0.10556186827683302 - 0.03999908434104034 - 0.04761050068397177 - 0.059975733285604405]' - bend.tracking_method = Yoshida(order = 4, num_steps = 1, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.048891974265822244 - 0.02117431265208611 - 0.10556186796033079 - 0.03999910506374448 - 0.047610499467786394 - 0.059976284409328576]' - bend.tracking_method = Yoshida(order = 4, num_steps = 2, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.0488920016565342 - 0.021174218561770715 - 0.10556186797663822 - 0.039998878356961864 - 0.04761049873848289 - 0.05997027467880636]' - - bend.tracking_method = Yoshida(order = 6, num_steps = 1, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.04889226281541181 - 0.021174668413673003 - 0.10556186827012498 - 0.03999920072349538 - 0.04761049187637356 - 0.059978810393760046]' - - bend.tracking_method = Yoshida(order = 6, num_steps = 2, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - - @test b0.coords.v ≈ - [0.04889189599231092 - 0.02117408337833361 - 0.10556186775984797 - 0.03999890918412512 - 0.04761050140381129 - 0.059971097099417885]' - bend.tracking_method = Yoshida(order = 8, num_steps = 1, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.04889209188518841 - 0.02117482934639319 - 0.10556186796145814 - 0.039999846102478705 - 0.047610496377937794 - 0.059995919985280796]' - - bend.tracking_method = Yoshida(order = 8, num_steps = 2, radiation_damping_on = true, radiation_fluctuations_on = true) b0 = Bunch(copy(v0), species = line.species_ref, p_over_q_ref = line.p_over_q_ref) track!(b0, line) - @test b0.coords.v ≈ - [0.04889219169051286 - 0.02117499046271834 - 0.10556186830041747 - 0.039999857154632036 - 0.04761049389108041 - 0.05999621385138163]' - # Now just check SIMD , if it doesn't bug out bend.tracking_method = Yoshida(order = 8, num_steps = 2, radiation_damping_on = true, radiation_fluctuations_on = true) From 00273a15aa427ae5bab2f439f4fb10dfe9b796b1 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 06:09:29 -0500 Subject: [PATCH 09/30] fix --- src/kernel.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index b664a5f..1ca4698 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -9,11 +9,11 @@ blank_kernel!(args...) = nothing kernel::K = blank_kernel! args::A = () function KernelCall(kernel, args) - if args != () + #=if args != () @show args @show batch_lower(args) error("") - end + end=# _args = map(t->time_lower(batch_lower(t)), args) new{typeof(kernel),typeof(_args)}(kernel, _args) end @@ -68,8 +68,8 @@ _generic_kernel!(i, coords, kc) = __generic_kernel!(i, coords, kc.chain, kc.ref) end function process_batch_args(i, args) - @show args - error("") + #= @show args + error("")=# if static_batchcheck(args) # beval(args, i) #error("") From f7d839887789cf1505a07d441cb611680dd6d178 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 07:11:51 -0500 Subject: [PATCH 10/30] fix --- ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl index 7ab9873..6e1a168 100644 --- a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl +++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl @@ -1,8 +1,10 @@ module BeamTrackingCUDAExt -using CUDA: CuArray +using CUDA: CuDeviceArray import BeamTracking: gaussian_random -function gaussian_random(::CuArray, sigma1, sigma2) +# CUDA.jl has horrifying error message with randn, but not rand +# Box-Muller transform let's us get around that +function gaussian_random(::CuDeviceArray, sigma1, sigma2) s, c = sincospi(2 * rand()) t = sqrt(-2 * log(rand())) z0 = c*t*sigma1 From ba752c9ab287a706e72295ac9fb4f329e4c32b87 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 07:20:14 -0500 Subject: [PATCH 11/30] fix SIMD --- ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl | 12 ++++++++++-- src/utils/math_simd.jl | 10 ++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl index 6e1a168..007017a 100644 --- a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl +++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl @@ -2,8 +2,16 @@ module BeamTrackingCUDAExt using CUDA: CuDeviceArray import BeamTracking: gaussian_random -# CUDA.jl has horrifying error message with randn, but not rand -# Box-Muller transform let's us get around that +""" +This function returns two Gaussian random numbers with +mean 0 and standard deviations sigma1, sigma2 using a +Box-Muller transform. + +This was implemented because CUDA.randn has some horrible +compiler bug, but CUDA.rand seems to be ok. Nonetheless +this may also give CPU performance benefits with radiation +because we already need to compute two randn's anyways. +""" function gaussian_random(::CuDeviceArray, sigma1, sigma2) s, c = sincospi(2 * rand()) t = sqrt(-2 * log(rand())) diff --git a/src/utils/math_simd.jl b/src/utils/math_simd.jl index 211985c..2941601 100644 --- a/src/utils/math_simd.jl +++ b/src/utils/math_simd.jl @@ -157,13 +157,7 @@ end """ This function returns two Gaussian random numbers with -mean 0 and standard deviations sigma1, sigma2 using a -Box-Muller transform. - -This was implemented because CUDA.randn has some horrible -compiler bug, but CUDA.rand seems to be ok. Nonetheless -this may also give CPU performance benefits with radiation -because we already need to compute two randn's anyways. +mean 0 and standard deviations sigma1, sigma2. """ function gaussian_random(::Matrix, sigma1, sigma2) return randn()*sigma1, randn()*sigma2 @@ -174,5 +168,5 @@ end See gaussian_random, but for SIMD vectors. """ function gaussian_random(::Matrix, sigma1::SIMD.Vec, sigma2::SIMD.Vec) - return SIMDMathFunctions.vmap(gaussian_random, sigma1, sigma2) + return SIMDMathFunctions.vmap((s1,s2)->(randn()*s1, randn()*s2), sigma1, sigma2) end From 750bfc983a39b57fce0d6437061c212fc29fe298 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 10:05:30 -0500 Subject: [PATCH 12/30] improve unpacking type stability --- .ipynb_checkpoints/Untitled-checkpoint.ipynb | 6 + Untitled.ipynb | 855 ++++++++++++++++++ .../BeamTrackingBeamlinesExt.jl | 2 +- ext/BeamTrackingBeamlinesExt/unpack.jl | 2 +- ext/BeamTrackingBeamlinesExt/utils.jl | 66 +- .../BeamTrackingCUDAExt.jl | 4 +- src/BeamTracking.jl | 2 +- src/batch.jl | 161 +++- src/kernel.jl | 3 + 9 files changed, 1034 insertions(+), 67 deletions(-) create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb create mode 100644 Untitled.ipynb diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..a2c867f --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,855 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 51, + "id": "becfe72f-f151-4c6f-91cd-4e60b4b1acab", + "metadata": {}, + "outputs": [], + "source": [ + "using Revise\n", + "using ProfileView\n", + "using BenchmarkTools" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ca8e5ad0-3b8a-4ef5-9e34-0b9ee1c91477", + "metadata": {}, + "outputs": [], + "source": [ + "using BeamTracking, Beamlines" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "2da06e85-ae4e-4742-9b6e-60f3d68ca6f6", + "metadata": {}, + "outputs": [], + "source": [ + "using BeamTracking: batch_lower" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "4a3e6cf7-13ff-423b-a15d-f180a4aed64d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Beamline:\n", + " species_ref = electron\n", + " E_ref = 1.8e10\n", + "\n", + " \u001b[1m Index \u001b[0m \u001b[1m Name \u001b[0m \u001b[1m Kind \u001b[0m \u001b[1m s [m] \u001b[0m\n", + " 1 qf Quadrupole 0\n", + " 2 d Drift 0.5\n", + " 3 qd Quadrupole 1.7\n", + " 4 d Drift 2.2\n" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@elements begin\n", + " qf = Quadrupole(Kn1=0.36, L=0.5)\n", + " d = Drift(L=1.2)\n", + " qd = Quadrupole(Kn1=-0.36, L=0.5)\n", + "end\n", + "fodo = Beamline([qf, d, qd, d], species_ref=Species(\"electron\"), E_ref=18e9)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "e739ea1c-147a-447b-9a98-8585a51f9d24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Setting bunch.species = Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ) (reference species from the Beamline)\n", + "Setting bunch.p_over_q_ref = -60.04153711147289 (reference p_over_q_ref from the Beamline)\n" + ] + }, + { + "data": { + "text/plain": [ + "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.1341179241307242e-8, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "N = 1000\n", + "b0 = Bunch(zeros(N,6), zeros(N,4))\n", + "track!(b0, fodo)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "f41dc24b-8aac-4f70-ab9d-1a9a7eac0d19", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.1341179241307242e-8, 35225.12124230655))" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "kc = track!(b0, fodo.line[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6", + "metadata": {}, + "outputs": [], + "source": [ + "kn, ks = get_m" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "9ba053b1-968f-4c5f-84a1-d96837ef06de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5))" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kc.chain[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "ee21d4ec-c7b3-4371-9f79-b5a146913158", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "args= kc.chain[1].args" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "6c89cfe5-5b3e-46e7-a6e7-b713a54930fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "false" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1e-7≈0" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "b762b051-237e-4917-b0ff-8f35fb25beb1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "args[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "702fa820-feb2-46a1-b84d-a763de300a8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.0" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batch_lower(args[2][9])[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "3f49a56d-fd3b-4a50-9419-bfa5db328b51", + "metadata": {}, + "outputs": [], + "source": [ + "foreach(x->x.Bn0=1e-7, fodo.line)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "3e7a06b6-1f48-474a-807e-eb9515488897", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 2.309 ms (409 allocations: 19.30 KiB)\n" + ] + }, + { + "data": { + "text/plain": [ + "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.9552193012013305e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; … ; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@btime track!($b0, $fodo)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "73d3f135-ece0-4743-954b-8abb2ee4cdd4", + "metadata": {}, + "outputs": [], + "source": [ + "foreach(x->x.Kn0=BatchParam([2e-7, 1e-7]), fodo.line)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "21a8efb7-db32-4465-93fb-1d0e56b9a3e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 2.349 ms (923 allocations: 29.17 KiB)\n" + ] + }, + { + "data": { + "text/plain": [ + "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 4.0692151117807276e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [-5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; … ; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@btime track!($b0, $fodo)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "9f5d074b-31ca-420d-943d-67ca4fbe4804", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2-element Vector{BatchParam}:\n", + " BatchParam(-0.0)\n", + " BatchParam(0.0)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t[1][end]" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "213e0113-edba-427d-b8cc-1cf18352fc57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BMultipoleParams\n", + " Bn0 = BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n", + " Kn1 = BatchParam(0.36)\n" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bm = qf.BMultipoleParams" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "bef5d1b8-9aaa-4be9-abe6-d17c8f886aef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BeamTrackingBeamlinesExt" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const BTBL = Base.get_extension(BeamTracking, :BeamTrackingBeamlinesExt)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "2efbe468-28af-4e0c-b7ec-ee6aa4e980bd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "n = BatchParam[BatchParam([-1.2008307422294578e-5]), BatchParam(0.36)]\n" + ] + }, + { + "data": { + "text/plain": [ + "(BatchParam[BatchParam(0.0f0), BatchParam(0.36)], -60.04153711147289, Bool[0, 1])" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np, p_over_q_ref, normalized = BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "808e9a36-b6a9-4cca-9db9-1f5ff3c8c2e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.000219 seconds (176 allocations: 4.984 KiB)\n" + ] + }, + { + "data": { + "text/plain": [ + "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@time kn, ks = BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "d666c09a-0d3c-4adf-a88f-229ab6b0d52a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BatchParam" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "L=qf.L\n", + "promote_type(reduce(promote_type, typeof.(bm.n)), \n", + " reduce(promote_type, typeof.(bm.s)),\n", + " reduce(promote_type, typeof.(bm.tilt)),\n", + " typeof(L), typeof(p_over_q_ref)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "f1c249b6-f18d-49a9-ac92-ac931862d027", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BatchParam" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "promote_type(eltype(bm.n), typeof(qf.L), typeof(p_over_q_ref))" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "9598c822-1f59-42eb-959e-50a273355b66", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2-element StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}} with indices SOneTo(2):\n", + " BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n", + " BatchParam(0.36)" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "getfield(bm, :n)" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "4bf816aa-169a-43cc-8ced-3f9e899d2ebd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MethodInstance for BeamTrackingBeamlinesExt.get_strengths(::BMultipoleParams{BatchParam, 2}, ::Float64, ::Float64)\n", + " from get_strengths(\u001b[90mbm\u001b[39m, \u001b[90mL\u001b[39m, \u001b[90mp_over_q_ref\u001b[39m)\u001b[90m @\u001b[39m \u001b[90mBeamTrackingBeamlinesExt\u001b[39m \u001b[90m~/.julia/dev/BeamTracking/ext/BeamTrackingBeamlinesExt/\u001b[39m\u001b[90m\u001b[4mutils.jl:79\u001b[24m\u001b[39m\n", + "Arguments\n", + " #self#\u001b[36m::Core.Const(BeamTrackingBeamlinesExt.get_strengths)\u001b[39m\n", + " bm\u001b[36m::BMultipoleParams{BatchParam, 2}\u001b[39m\n", + " L\u001b[36m::Float64\u001b[39m\n", + " p_over_q_ref\u001b[36m::Float64\u001b[39m\n", + "Locals\n", + " sp\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + " np\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + " integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", + " normalized\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", + " order\u001b[36m::StaticArraysCore.SVector{2, Int64}\u001b[39m\n", + " tilt\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + " s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + " n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + " T\u001b[36m::Type{BatchParam}\u001b[39m\n", + " bmtilt\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n", + " bms\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n", + " bmn\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n", + "Body\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n", + "\u001b[90m1 ─\u001b[39m nothing\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(sp))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(np))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(integrated))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(normalized))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(order))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(tilt))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(s))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(n))\n", + "\u001b[90m│ \u001b[39m Core.NewvarNode(:(T))\n", + "\u001b[90m│ \u001b[39m (bmn = BeamTrackingBeamlinesExt.getfield(bm, :n))\n", + "\u001b[90m│ \u001b[39m (bms = BeamTrackingBeamlinesExt.getfield(bm, :s))\n", + "\u001b[90m│ \u001b[39m (bmtilt = BeamTrackingBeamlinesExt.getfield(bm, :tilt))\n", + "\u001b[90m│ \u001b[39m %14 = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %15 = BeamTrackingBeamlinesExt.isconcretetype(%14)\u001b[36m::Core.Const(true)\u001b[39m\n", + "\u001b[90m└──\u001b[39m goto #3 if not %15\n", + "\u001b[90m2 ─\u001b[39m %17 = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %18 = BeamTrackingBeamlinesExt.typeof(L)\u001b[36m::Core.Const(Float64)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %19 = BeamTrackingBeamlinesExt.typeof(p_over_q_ref)\u001b[36m::Core.Const(Float64)\u001b[39m\n", + "\u001b[90m│ \u001b[39m (T = BeamTrackingBeamlinesExt.promote_type(%17, %18, %19))\n", + "\u001b[90m└──\u001b[39m goto #4\n", + "\u001b[90m3 ─\u001b[39m Core.Const(:(bmn isa BeamTrackingBeamlinesExt.AbstractArray))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(goto %40 if not %22))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmn)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(Base.materialize(%25)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.reduce(%24, %26)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bms)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(Base.materialize(%29)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.reduce(%28, %30)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmtilt)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(Base.materialize(%33)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.reduce(%32, %34)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%27, %31, %35, %36, %37)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(goto %46))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmn)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(bms)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmtilt)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n", + "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n", + "\u001b[90m└──\u001b[39m Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%40, %41, %42, %43, %44)))\n", + "\u001b[90m4 ┄\u001b[39m %46 = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %47 = BeamTrackingBeamlinesExt.make_static(bmn)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %48 = Base.broadcasted(%46, %47)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n", + "\u001b[90m│ \u001b[39m (n = Base.materialize(%48))\n", + "\u001b[90m│ \u001b[39m %50 = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %51 = BeamTrackingBeamlinesExt.make_static(bms)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %52 = Base.broadcasted(%50, %51)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n", + "\u001b[90m│ \u001b[39m (s = Base.materialize(%52))\n", + "\u001b[90m│ \u001b[39m %54 = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %55 = BeamTrackingBeamlinesExt.make_static(bmtilt)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %56 = Base.broadcasted(%54, %55)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n", + "\u001b[90m│ \u001b[39m (tilt = Base.materialize(%56))\n", + "\u001b[90m│ \u001b[39m (order = BeamTrackingBeamlinesExt.getfield(bm, :order))\n", + "\u001b[90m│ \u001b[39m (normalized = BeamTrackingBeamlinesExt.getfield(bm, :normalized))\n", + "\u001b[90m│ \u001b[39m (integrated = BeamTrackingBeamlinesExt.getfield(bm, :integrated))\n", + "\u001b[90m│ \u001b[39m %61 = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %62 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %63 = n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %64 = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %65 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %66 = Base.broadcasted(%64, %65)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %67 = Base.broadcasted(%62, %63, %66)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %68 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %69 = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %70 = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %71 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %72 = Base.broadcasted(%70, %71)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %73 = Base.broadcasted(%68, %69, %72)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %74 = Base.broadcasted(%61, %67, %73)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m (np = Base.materialize(%74))\n", + "\u001b[90m│ \u001b[39m %76 = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %77 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %78 = Base.broadcasted(BeamTrackingBeamlinesExt.:-, n)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %79 = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %80 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %81 = Base.broadcasted(%79, %80)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %82 = Base.broadcasted(%77, %78, %81)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %83 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %84 = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %85 = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %86 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %87 = Base.broadcasted(%85, %86)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %88 = Base.broadcasted(%83, %84, %87)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %89 = Base.broadcasted(%76, %82, %88)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m (sp = Base.materialize(%89))\n", + "\u001b[90m│ \u001b[39m %91 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %92 = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %93 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %94 = Base.broadcasted(%91, %92, %93, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m (np = Base.materialize(%94))\n", + "\u001b[90m│ \u001b[39m %96 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %97 = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %98 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %99 = Base.broadcasted(%96, %97, %98, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m (sp = Base.materialize(%99))\n", + "\u001b[90m│ \u001b[39m %101 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %102 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %103 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %104 = Base.broadcasted(%101, %102, %103, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m (np = Base.materialize(%104))\n", + "\u001b[90m│ \u001b[39m %106 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", + "\u001b[90m│ \u001b[39m %107 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %108 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m %109 = Base.broadcasted(%106, %107, %108, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", + "\u001b[90m│ \u001b[39m (sp = Base.materialize(%109))\n", + "\u001b[90m│ \u001b[39m %111 = Core.tuple(np, sp)\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n", + "\u001b[90m└──\u001b[39m return %111\n", + "\n" + ] + } + ], + "source": [ + "@code_warntype BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "e93f6ce2-9383-49b5-b798-3d33380c8214", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", + " BatchParam([2.0e-7, 1.0e-7])\n", + " BatchParam(0.36)" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kn" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "fb0ca353-c7a2-4c90-bd93-4e74cbb2d981", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", + " BatchParam(-0.0)\n", + " BatchParam(0.0)" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ks" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "63312df7-bd24-4dc6-9369-5323c2121925", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", + " BatchParam(-0.0)\n", + " BatchParam(0.36)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kn" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "9bf348a6-928d-459c-96dc-54ce19162426", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", + " BatchParam(-0.0)\n", + " BatchParam(0.0)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ks" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "930fb9e4-ef96-42d9-8d0d-4bcec336f585", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "search: \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1my\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22md\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mize \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer Unde\u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mI\u001b[22m\u001b[0m\u001b[1mn\u001b[22miti\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer\n", + "\n" + ] + }, + { + "data": { + "text/latex": [ + "\\begin{verbatim}\n", + "finally\n", + "\\end{verbatim}\n", + "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n", + "\n", + "\\begin{verbatim}\n", + "f = open(\"file\")\n", + "try\n", + " operate_on_file(f)\n", + "finally\n", + " close(f)\n", + "end\n", + "\\end{verbatim}\n", + "When control leaves the \\href{@ref}{\\texttt{try}} block (for example, due to a \\href{@ref}{\\texttt{return}}, or just finishing normally), \\href{@ref}{\\texttt{close(f)}} will be executed. If the \\texttt{try} block exits due to an exception, the exception will continue propagating. A \\texttt{catch} block may be combined with \\texttt{try} and \\texttt{finally} as well. In this case the \\texttt{finally} block will run after \\texttt{catch} has handled the error.\n", + "\n" + ], + "text/markdown": [ + "```\n", + "finally\n", + "```\n", + "\n", + "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n", + "\n", + "```julia\n", + "f = open(\"file\")\n", + "try\n", + " operate_on_file(f)\n", + "finally\n", + " close(f)\n", + "end\n", + "```\n", + "\n", + "When control leaves the [`try`](@ref) block (for example, due to a [`return`](@ref), or just finishing normally), [`close(f)`](@ref) will be executed. If the `try` block exits due to an exception, the exception will continue propagating. A `catch` block may be combined with `try` and `finally` as well. In this case the `finally` block will run after `catch` has handled the error.\n" + ], + "text/plain": [ + "\u001b[36m finally\u001b[39m\n", + "\n", + " Run some code when a given block of code exits, regardless of how it exits.\n", + " For example, here is how we can guarantee that an opened file is closed:\n", + "\n", + "\u001b[36m f = open(\"file\")\u001b[39m\n", + "\u001b[36m try\u001b[39m\n", + "\u001b[36m operate_on_file(f)\u001b[39m\n", + "\u001b[36m finally\u001b[39m\n", + "\u001b[36m close(f)\u001b[39m\n", + "\u001b[36m end\u001b[39m\n", + "\n", + " When control leaves the \u001b[36mtry\u001b[39m block (for example, due to a \u001b[36mreturn\u001b[39m, or just\n", + " finishing normally), \u001b[36mclose(f)\u001b[39m will be executed. If the \u001b[36mtry\u001b[39m block exits due\n", + " to an exception, the exception will continue propagating. A \u001b[36mcatch\u001b[39m block may\n", + " be combined with \u001b[36mtry\u001b[39m and \u001b[36mfinally\u001b[39m as well. In this case the \u001b[36mfinally\u001b[39m block\n", + " will run after \u001b[36mcatch\u001b[39m has handled the error." + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "?finally" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2606c31-0971-4091-a110-b22588873197", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia Global (10 threads) 1.10", + "language": "julia", + "name": "julia-global-_10-threads_-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl index 5c67483..9883a6b 100644 --- a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl +++ b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl @@ -16,7 +16,7 @@ function track!( kwargs... ) coords = bunch.coords - @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...) + return @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...) return bunch end diff --git a/ext/BeamTrackingBeamlinesExt/unpack.jl b/ext/BeamTrackingBeamlinesExt/unpack.jl index e9bb656..06a2eef 100644 --- a/ext/BeamTrackingBeamlinesExt/unpack.jl +++ b/ext/BeamTrackingBeamlinesExt/unpack.jl @@ -246,7 +246,7 @@ function universal!( # Evolve time through whole element bunch.t_ref += L/beta_gamma_to_v(beta_gamma_ref) - + return kc # noinline necessary here for small binaries and faster execution @noinline launch!(coords, kc; kwargs...) return nothing diff --git a/ext/BeamTrackingBeamlinesExt/utils.jl b/ext/BeamTrackingBeamlinesExt/utils.jl index f47bfa3..0482138 100644 --- a/ext/BeamTrackingBeamlinesExt/utils.jl +++ b/ext/BeamTrackingBeamlinesExt/utils.jl @@ -77,31 +77,34 @@ This works for both BMultipole and BMultipoleParams. Branchless bc SIMD -> basic no loss in computing both but benefit of branchless. """ @inline function get_strengths(bm, L, p_over_q_ref) - if isconcretetype(eltype(bm.n)) - T = promote_type(eltype(bm.n), + bmn = getfield(bm, :n) + bms = getfield(bm, :s) + bmtilt = getfield(bm, :tilt) + if isconcretetype(eltype(bmn)) + T = promote_type(eltype(bmn), typeof(L), typeof(p_over_q_ref) ) else - if bm.n isa AbstractArray - T = promote_type(reduce(promote_type, typeof.(bm.n)), - reduce(promote_type, typeof.(bm.s)), - reduce(promote_type, typeof.(bm.tilt)), + if bmn isa AbstractArray + T = promote_type(reduce(promote_type, typeof.(bmn)), + reduce(promote_type, typeof.(bms)), + reduce(promote_type, typeof.(bmtilt)), typeof(L), typeof(p_over_q_ref) ) else - T = promote_type(typeof(bm.n), - typeof(bm.s), - typeof(bm.tilt), + T = promote_type(typeof(bmn), + typeof(bms), + typeof(bmtilt), typeof(L), typeof(p_over_q_ref) ) end end - n = T.(make_static(bm.n)) - s = T.(make_static(bm.s)) - tilt = T.(make_static(bm.tilt)) - order = bm.order - normalized = bm.normalized - integrated = bm.integrated + n = T.(make_static(bmn)) + s = T.(make_static(bms)) + tilt = T.(make_static(bmtilt)) + order = getfield(bm, :order) + normalized = getfield(bm, :normalized) + integrated = getfield(bm, :integrated) np = @. n*cos(order*tilt) + s*sin(order*tilt) sp = @. -n*sin(order*tilt) + s*cos(order*tilt) np = @. ifelse(!normalized, np/p_over_q_ref, np) @@ -112,31 +115,34 @@ no loss in computing both but benefit of branchless. end @inline function get_integrated_strengths(bm, L, p_over_q_ref) - if isconcretetype(eltype(bm.n)) - T = promote_type(eltype(bm.n), + bmn = getfield(bm, :n) + bms = getfield(bm, :s) + bmtilt = getfield(bm, :tilt) + if isconcretetype(eltype(bmn)) + T = promote_type(eltype(bmn), typeof(L), typeof(p_over_q_ref) ) else - if bm.n isa AbstractArray - T = promote_type(reduce(promote_type, typeof.(bm.n)), - reduce(promote_type, typeof.(bm.s)), - reduce(promote_type, typeof.(bm.tilt)), + if bmn isa AbstractArray + T = promote_type(reduce(promote_type, typeof.(bmn)), + reduce(promote_type, typeof.(bms)), + reduce(promote_type, typeof.(bmtilt)), typeof(L), typeof(p_over_q_ref) ) else - T = promote_type(typeof(bm.n), - typeof(bm.s), - typeof(bm.tilt), + T = promote_type(typeof(bmn), + typeof(bms), + typeof(bmtilt), typeof(L), typeof(p_over_q_ref) ) end end - n = T.(make_static(bm.n)) - s = T.(make_static(bm.s)) - tilt = T.(make_static(bm.tilt)) - order = bm.order - normalized = bm.normalized - integrated = bm.integrated + n = T.(make_static(bmn)) + s = T.(make_static(bms)) + tilt = T.(make_static(bmtilt)) + order = getfield(bm, :order) + normalized = getfield(bm, :normalized) + integrated = getfield(bm, :integrated) np = @. n*cos(order*tilt) + s*sin(order*tilt) sp = @. -n*sin(order*tilt) + s*cos(order*tilt) np = @. ifelse(!normalized, np/p_over_q_ref, np) diff --git a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl index 007017a..fd14bba 100644 --- a/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl +++ b/ext/BeamTrackingCUDAExt/BeamTrackingCUDAExt.jl @@ -8,9 +8,7 @@ mean 0 and standard deviations sigma1, sigma2 using a Box-Muller transform. This was implemented because CUDA.randn has some horrible -compiler bug, but CUDA.rand seems to be ok. Nonetheless -this may also give CPU performance benefits with radiation -because we already need to compute two randn's anyways. +compiler bug, but CUDA.rand seems to be ok. """ function gaussian_random(::CuDeviceArray, sigma1, sigma2) s, c = sincospi(2 * rand()) diff --git a/src/BeamTracking.jl b/src/BeamTracking.jl index 2e21520..ac941f8 100644 --- a/src/BeamTracking.jl +++ b/src/BeamTracking.jl @@ -18,7 +18,7 @@ using KernelAbstractions import GTPSA: sincu, sinhcu -export Bunch, State, ParticleView, Time, TimeDependentParam +export Bunch, State, ParticleView, Time, TimeDependentParam, BatchParam export Yoshida, Yoshida, MatrixKick, BendKick, SolenoidKick, DriftKick, Exact export Fringe export track! diff --git a/src/batch.jl b/src/batch.jl index 6375732..2d63e26 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -74,49 +74,148 @@ Base.zero(b::BatchParam) = BatchParam(zero(first(b.batch))) Base.one(b::BatchParam) = BatchParam(one(first(b.batch))) # Now define the math operations: -for op in (:+,:-,:*,:/,:^) - @eval begin - Base.$op(ba::BatchParam, b::Number) = (let b = b; return BatchParam(map(x->$op(x, b), ba.batch)); end) - Base.$op(a::Number, bb::BatchParam) = (let a = a; return BatchParam(map(x->$op(a, x), bb.batch)); end) - - function Base.$op(ba::BatchParam, bb::BatchParam) - if ba.batch isa Number - if bb.batch isa Number - return BatchParam($op(ba.batch, bb.batch)) - else - let a = ba.batch - # WE HAVE TO WRITE THIS BY HAND FOR EACH OP!!! - if a ≈ 0 # So that e.g. big arrays aren't made for skew strengths when only normal is batch - return BatchParam(0f0) - else - return BatchParam(map((bbi)->$op(a, bbi), bb.batch)) - end - end - end - elseif bb.batch isa Number - let b = bb.batch - if b ≈ 0 - return BatchParam(0f0) - else - return BatchParam(map((bai)->$op(bai, b), ba.batch)) - end +# The operations are individually-specialized for each operator, assuming that the +# most expensive step is creating temporary arrays, not type instability. As such, +# each are defined in a way as to minimize number of temporary arrays during unpacking, +# checking for e.g. 0's and 1's. +function _batch_addsub(batch_a, batch_b, op::T) where {T<:Union{typeof(+),typeof(-)}} + if batch_a isa Number + if batch_b isa Number + return BatchParam(op(batch_a, batch_b)) + else + if batch_a ≈ 0 # add/sub by zero gives identity + return BatchParam(batch_b) + else + let a = batch_a + return BatchParam(map((bi)->op(a, bi), batch_b)) end - elseif length(ba.batch) == length(bb.batch) - return BatchParam(map((bai,bbi)->$op(bai, bbi), ba.batch, bb.batch)) + end + end + elseif batch_b isa Number + if batch_b ≈ 0 # add/sub by zero gives identity + return BatchParam(batch_a) + else + let b = batch_b + return BatchParam(map((ai)->op(ai, b), batch_a)) + end + end + elseif length(batch_a) == length(batch_b) + return BatchParam(map((ai,bi)->op(ai, bi), batch_a, batch_b)) + else + error("Cannot perform operation $(op) with two non-scalar BatchParams of differing + lengths (received lengths $(length(batch_a)) and $(length(batch_b))).") + end +end + +Base.:+(ba::BatchParam, n::Number) = _batch_addsub(ba.batch, n, +) +Base.:+(n::Number, bb::BatchParam) = _batch_addsub(n, bb.batch, +) +Base.:+(ba::BatchParam, bb::BatchParam) = _batch_addsub(ba.batch, bb.batch, +) + +Base.:-(ba::BatchParam, n::Number) = _batch_addsub(ba.batch, n, -) +Base.:-(n::Number, bb::BatchParam) = _batch_addsub(n, bb.batch, -) +Base.:-(ba::BatchParam, bb::BatchParam) = _batch_addsub(ba.batch, bb.batch, -) + +function _batch_mul(batch_a, batch_b) + if batch_a isa Number + if batch_b isa Number + return BatchParam(*(batch_a, batch_b)) + else + if batch_a ≈ 0 # mul by 0 gives 0 -> make scalar + return BatchParam(0f0) + elseif batch_a ≈ 1 # mul by 1 gives identity + return BatchParam(batch_b) else - error("Cannot perform operation $($op) with two non-scalar BatchParams of differing - lengths (received lengths $(length(ba.batch)) and $(length(bb.batch))).") + let a = batch_a + return BatchParam(map((bi)->*(a, bi), batch_b)) + end + end + end + elseif batch_b isa Number + if batch_b ≈ 0 # mul by 0 gives 0 -> make scalar + return BatchParam(0f0) + elseif batch_b ≈ 1 # mul by 1 gives identity + return BatchParam(batch_a) + else + let b = batch_b + return BatchParam(map((ai)->*(ai, b), batch_a)) + end + end + elseif length(batch_a) == length(batch_b) + return BatchParam(map((ai,bi)->*(ai, bi), batch_a, batch_b)) + else + error("Cannot perform operation * with two non-scalar BatchParams of differing + lengths (received lengths $(length(batch_a)) and $(length(batch_b))).") + end +end + +Base.:*(ba::BatchParam, n::Number) = _batch_mul(ba.batch, n) +Base.:*(n::Number, bb::BatchParam) = _batch_mul(n, bb.batch) +Base.:*(ba::BatchParam, bb::BatchParam) = _batch_mul(ba.batch, bb.batch) + +function _batch_div(batch_a, batch_b) + if batch_a isa Number + if batch_b isa Number + return BatchParam(/(batch_a, batch_b)) + else + let a = batch_a + return BatchParam(map((bi)->/(a, bi), batch_b)) + end + end + elseif batch_b isa Number + if batch_b ≈ 0 # div by 0 gives Inf -> make scalar + return BatchParam(Inf32) + elseif batch_b ≈ 1 # div by 1 gives identity + return BatchParam(batch_a) + else + let b = batch_b + return BatchParam(map((ai)->/(ai, b), batch_a)) end end + elseif length(batch_a) == length(batch_b) + return BatchParam(map((ai,bi)->/(ai, bi), batch_a, batch_b)) + else + error("Cannot perform operation / with two non-scalar BatchParams of differing + lengths (received lengths $(length(batch_a)) and $(length(batch_b))).") + end +end + +Base.:/(ba::BatchParam, n::Number) = _batch_div(ba.batch, n) +Base.:/(n::Number, bb::BatchParam) = _batch_div(n, bb.batch) +Base.:/(ba::BatchParam, bb::BatchParam) = _batch_div(ba.batch, bb.batch) +# for now no special things for pow, unsure if called anywhere. +function _batch_pow(batch_a, batch_b) + if batch_a isa Number + if batch_b isa Number + return BatchParam(^(batch_a, batch_b)) + else + let a = batch_a + return BatchParam(map((bi)->^(a, bi), batch_b)) + end + end + elseif batch_b isa Number + let b = batch_b + return BatchParam(map((ai)->^(ai, b), batch_a)) + end + elseif length(batch_a) == length(batch_b) + return BatchParam(map((ai,bi)->^(ai, bi), batch_a, batch_b)) + else + error("Cannot perform operation ^ with two non-scalar BatchParams of differing + lengths (received lengths $(length(batch_a)) and $(length(batch_b))).") end end +Base.:^(ba::BatchParam, n::Number) = _batch_pow(ba.batch, n) +Base.:^(n::Number, bb::BatchParam) = _batch_pow(n, bb.batch) +Base.:^(ba::BatchParam, bb::BatchParam) = _batch_pow(ba.batch, bb.batch) + function Base.literal_pow(::typeof(^), ba::BatchParam, ::Val{N}) where {N} return BatchParam(map(x->Base.literal_pow(^, x, Val{N}()), ba.batch)) end -for t = (:+, :-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv, +Base.:+(b::BatchParam) = b # identity + +for t = (:-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv, :coth, :asin, :acos, :atan, :acot, :asinh, :acosh, :atanh, :acoth, :sinc, :csc, :float, :csch, :acsc, :acsch, :sec, :sech, :asec, :asech, :conj, :log10, :isnan, :sign, :abs) @eval begin diff --git a/src/kernel.jl b/src/kernel.jl index 1ca4698..c943c4f 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -9,11 +9,14 @@ blank_kernel!(args...) = nothing kernel::K = blank_kernel! args::A = () function KernelCall(kernel, args) + #=if args != () @show args @show batch_lower(args) + @show typeof(batch_lower(args)) error("") end=# + return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args)) _args = map(t->time_lower(batch_lower(t)), args) new{typeof(kernel),typeof(_args)}(kernel, _args) end From ebba96d2bcca2bb11cb7caed29d3b3ad26d4ccbb Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 10:40:25 -0500 Subject: [PATCH 13/30] kind of working --- Untitled.ipynb | 271 ++++++------------ .../BeamTrackingBeamlinesExt.jl | 2 +- ext/BeamTrackingBeamlinesExt/unpack.jl | 2 +- src/batch.jl | 12 +- src/kernel.jl | 8 +- 5 files changed, 108 insertions(+), 187 deletions(-) diff --git a/Untitled.ipynb b/Untitled.ipynb index a2c867f..811310e 100644 --- a/Untitled.ipynb +++ b/Untitled.ipynb @@ -14,12 +14,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 206, "id": "ca8e5ad0-3b8a-4ef5-9e34-0b9ee1c91477", "metadata": {}, "outputs": [], "source": [ - "using BeamTracking, Beamlines" + "using BeamTracking, Beamlines, StaticArrays, SIMD" ] }, { @@ -68,29 +68,20 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": null, + "id": "c977d76f-afc1-4387-93fe-43efcf565893", + "metadata": {}, + "outputs": [], + "source": [ + "a = SA[1,2]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "e739ea1c-147a-447b-9a98-8585a51f9d24", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Setting bunch.species = Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ) (reference species from the Beamline)\n", - "Setting bunch.p_over_q_ref = -60.04153711147289 (reference p_over_q_ref from the Beamline)\n" - ] - }, - { - "data": { - "text/plain": [ - "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.1341179241307242e-8, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "N = 1000\n", "b0 = Bunch(zeros(N,6), zeros(N,4))\n", @@ -99,49 +90,69 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 180, "id": "f41dc24b-8aac-4f70-ab9d-1a9a7eac0d19", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.1341179241307242e-8, 35225.12124230655))" + "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.8012461147958558e-8, 35225.12124230655))" ] }, - "execution_count": 81, + "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "\n", "kc = track!(b0, fodo.line[1])" ] }, { "cell_type": "code", "execution_count": null, - "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6", + "id": "2e9aac96-d73e-4d21-a10a-0034262b9ab3", "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 202, + "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (1.0e-7, 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, 1.0e-7, 0, 0), Val{true}(), Val{true}(), 0.5)" + ] + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "kn, ks = get_m" + "args = kc.chain[1].args\n", + "#@time BeamTracking.static_batchcheck(args)\n", + "@inbounds BeamTracking.beval(args, 2)" ] }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 174, "id": "9ba053b1-968f-4c5f-84a1-d96837ef06de", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5))" + "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, SVector{2, BatchParam}, SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5))" ] }, - "execution_count": 95, + "execution_count": 174, "metadata": {}, "output_type": "execute_result" } @@ -152,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 175, "id": "ee21d4ec-c7b3-4371-9f79-b5a146913158", "metadata": { "scrolled": true @@ -161,10 +172,10 @@ { "data": { "text/plain": [ - "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam(-0.0), 0, 0), Val{true}(), Val{true}(), 0.5)" + "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)" ] }, - "execution_count": 82, + "execution_count": 175, "metadata": {}, "output_type": "execute_result" } @@ -175,44 +186,65 @@ }, { "cell_type": "code", - "execution_count": 93, - "id": "6c89cfe5-5b3e-46e7-a6e7-b713a54930fd", + "execution_count": 176, + "id": "5f17a22d-aae7-4a1d-a7db-d5ec80fd1c7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "true" + ] + }, + "execution_count": 176, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "args[end-3][4].batch === args[2][9][1].batch" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "ecf2a0dd-c119-4d0e-87ab-18d31cdba961", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "false" + "BatchParam([2.0e-7, 1.0e-7])" ] }, - "execution_count": 93, + "execution_count": 177, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "1e-7≈0" + "args[2][9][1]" ] }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 178, "id": "b762b051-237e-4917-b0ff-8f35fb25beb1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam(-0.0), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])" + "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)" ] }, - "execution_count": 90, + "execution_count": 178, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "args[2]" + "batch_lower(args)" ] }, { @@ -276,7 +308,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 207, "id": "73d3f135-ece0-4743-954b-8abb2ee4cdd4", "metadata": {}, "outputs": [], @@ -507,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 153, "id": "4bf816aa-169a-43cc-8ced-3f9e899d2ebd", "metadata": {}, "outputs": [ @@ -515,145 +547,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "MethodInstance for BeamTrackingBeamlinesExt.get_strengths(::BMultipoleParams{BatchParam, 2}, ::Float64, ::Float64)\n", - " from get_strengths(\u001b[90mbm\u001b[39m, \u001b[90mL\u001b[39m, \u001b[90mp_over_q_ref\u001b[39m)\u001b[90m @\u001b[39m \u001b[90mBeamTrackingBeamlinesExt\u001b[39m \u001b[90m~/.julia/dev/BeamTracking/ext/BeamTrackingBeamlinesExt/\u001b[39m\u001b[90m\u001b[4mutils.jl:79\u001b[24m\u001b[39m\n", - "Arguments\n", - " #self#\u001b[36m::Core.Const(BeamTrackingBeamlinesExt.get_strengths)\u001b[39m\n", - " bm\u001b[36m::BMultipoleParams{BatchParam, 2}\u001b[39m\n", - " L\u001b[36m::Float64\u001b[39m\n", - " p_over_q_ref\u001b[36m::Float64\u001b[39m\n", - "Locals\n", - " sp\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - " np\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - " integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", - " normalized\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", - " order\u001b[36m::StaticArraysCore.SVector{2, Int64}\u001b[39m\n", - " tilt\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - " s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - " n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - " T\u001b[36m::Type{BatchParam}\u001b[39m\n", - " bmtilt\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n", - " bms\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n", - " bmn\u001b[36m::StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}}\u001b[39m\n", - "Body\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n", - "\u001b[90m1 ─\u001b[39m nothing\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(sp))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(np))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(integrated))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(normalized))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(order))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(tilt))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(s))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(n))\n", - "\u001b[90m│ \u001b[39m Core.NewvarNode(:(T))\n", - "\u001b[90m│ \u001b[39m (bmn = BeamTrackingBeamlinesExt.getfield(bm, :n))\n", - "\u001b[90m│ \u001b[39m (bms = BeamTrackingBeamlinesExt.getfield(bm, :s))\n", - "\u001b[90m│ \u001b[39m (bmtilt = BeamTrackingBeamlinesExt.getfield(bm, :tilt))\n", - "\u001b[90m│ \u001b[39m %14 = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %15 = BeamTrackingBeamlinesExt.isconcretetype(%14)\u001b[36m::Core.Const(true)\u001b[39m\n", - "\u001b[90m└──\u001b[39m goto #3 if not %15\n", - "\u001b[90m2 ─\u001b[39m %17 = BeamTrackingBeamlinesExt.eltype(bmn)\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %18 = BeamTrackingBeamlinesExt.typeof(L)\u001b[36m::Core.Const(Float64)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %19 = BeamTrackingBeamlinesExt.typeof(p_over_q_ref)\u001b[36m::Core.Const(Float64)\u001b[39m\n", - "\u001b[90m│ \u001b[39m (T = BeamTrackingBeamlinesExt.promote_type(%17, %18, %19))\n", - "\u001b[90m└──\u001b[39m goto #4\n", - "\u001b[90m3 ─\u001b[39m Core.Const(:(bmn isa BeamTrackingBeamlinesExt.AbstractArray))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(goto %40 if not %22))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmn)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(Base.materialize(%25)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.reduce(%24, %26)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bms)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(Base.materialize(%29)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.reduce(%28, %30)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.promote_type))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(Base.broadcasted(BeamTrackingBeamlinesExt.typeof, bmtilt)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(Base.materialize(%33)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.reduce(%32, %34)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%27, %31, %35, %36, %37)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(goto %46))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmn)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(bms)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(bmtilt)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(L)))\n", - "\u001b[90m│ \u001b[39m Core.Const(:(BeamTrackingBeamlinesExt.typeof(p_over_q_ref)))\n", - "\u001b[90m└──\u001b[39m Core.Const(:(T = BeamTrackingBeamlinesExt.promote_type(%40, %41, %42, %43, %44)))\n", - "\u001b[90m4 ┄\u001b[39m %46 = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %47 = BeamTrackingBeamlinesExt.make_static(bmn)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %48 = Base.broadcasted(%46, %47)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n", - "\u001b[90m│ \u001b[39m (n = Base.materialize(%48))\n", - "\u001b[90m│ \u001b[39m %50 = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %51 = BeamTrackingBeamlinesExt.make_static(bms)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %52 = Base.broadcasted(%50, %51)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n", - "\u001b[90m│ \u001b[39m (s = Base.materialize(%52))\n", - "\u001b[90m│ \u001b[39m %54 = T\u001b[36m::Core.Const(BatchParam)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %55 = BeamTrackingBeamlinesExt.make_static(bmtilt)\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %56 = Base.broadcasted(%54, %55)\u001b[36m::Core.PartialStruct(Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, Type{BatchParam}, Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Any[Core.Const(StaticArraysCore.StaticArrayStyle{1}()), Core.Const(BatchParam), Tuple{StaticArraysCore.SVector{2, BatchParam}}, Core.Const(nothing)])\u001b[39m\n", - "\u001b[90m│ \u001b[39m (tilt = Base.materialize(%56))\n", - "\u001b[90m│ \u001b[39m (order = BeamTrackingBeamlinesExt.getfield(bm, :order))\n", - "\u001b[90m│ \u001b[39m (normalized = BeamTrackingBeamlinesExt.getfield(bm, :normalized))\n", - "\u001b[90m│ \u001b[39m (integrated = BeamTrackingBeamlinesExt.getfield(bm, :integrated))\n", - "\u001b[90m│ \u001b[39m %61 = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %62 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %63 = n\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %64 = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %65 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %66 = Base.broadcasted(%64, %65)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %67 = Base.broadcasted(%62, %63, %66)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %68 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %69 = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %70 = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %71 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %72 = Base.broadcasted(%70, %71)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %73 = Base.broadcasted(%68, %69, %72)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %74 = Base.broadcasted(%61, %67, %73)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m (np = Base.materialize(%74))\n", - "\u001b[90m│ \u001b[39m %76 = BeamTrackingBeamlinesExt.:+\u001b[36m::Core.Const(+)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %77 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %78 = Base.broadcasted(BeamTrackingBeamlinesExt.:-, n)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %79 = BeamTrackingBeamlinesExt.sin\u001b[36m::Core.Const(sin)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %80 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %81 = Base.broadcasted(%79, %80)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %82 = Base.broadcasted(%77, %78, %81)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %83 = BeamTrackingBeamlinesExt.:*\u001b[36m::Core.Const(*)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %84 = s\u001b[36m::StaticArraysCore.SVector{2, BatchParam}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %85 = BeamTrackingBeamlinesExt.cos\u001b[36m::Core.Const(cos)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %86 = Base.broadcasted(BeamTrackingBeamlinesExt.:*, order, tilt)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %87 = Base.broadcasted(%85, %86)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %88 = Base.broadcasted(%83, %84, %87)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %89 = Base.broadcasted(%76, %82, %88)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(-), Tuple{StaticArraysCore.SVector{2, BatchParam}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(sin), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, BatchParam}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(cos), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(*), Tuple{StaticArraysCore.SVector{2, Int64}, StaticArraysCore.SVector{2, BatchParam}}}}}}}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m (sp = Base.materialize(%89))\n", - "\u001b[90m│ \u001b[39m %91 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %92 = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %93 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %94 = Base.broadcasted(%91, %92, %93, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m (np = Base.materialize(%94))\n", - "\u001b[90m│ \u001b[39m %96 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %97 = Base.broadcasted(BeamTrackingBeamlinesExt.:!, normalized)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %98 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, p_over_q_ref)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %99 = Base.broadcasted(%96, %97, %98, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(!), Tuple{StaticArraysCore.SVector{2, Bool}}}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m (sp = Base.materialize(%99))\n", - "\u001b[90m│ \u001b[39m %101 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %102 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %103 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, np, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %104 = Base.broadcasted(%101, %102, %103, np)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m (np = Base.materialize(%104))\n", - "\u001b[90m│ \u001b[39m %106 = BeamTrackingBeamlinesExt.ifelse\u001b[36m::Core.Const(ifelse)\u001b[39m\n", - "\u001b[90m│ \u001b[39m %107 = integrated\u001b[36m::StaticArraysCore.SVector{2, Bool}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %108 = Base.broadcasted(BeamTrackingBeamlinesExt.:/, sp, L)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m %109 = Base.broadcasted(%106, %107, %108, sp)\u001b[36m::Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(ifelse), Tuple{StaticArraysCore.SVector{2, Bool}, Base.Broadcast.Broadcasted{StaticArraysCore.StaticArrayStyle{1}, Nothing, typeof(/), Tuple{StaticArraysCore.SVector{2, BatchParam}, Float64}}, StaticArraysCore.SVector{2, BatchParam}}}\u001b[39m\n", - "\u001b[90m│ \u001b[39m (sp = Base.materialize(%109))\n", - "\u001b[90m│ \u001b[39m %111 = Core.tuple(np, sp)\u001b[36m::Tuple{StaticArraysCore.SVector{2, BatchParam}, StaticArraysCore.SVector{2, BatchParam}}\u001b[39m\n", - "\u001b[90m└──\u001b[39m return %111\n", - "\n" + " 0.000089 seconds (104 allocations: 1.906 KiB)\n" ] + }, + { + "data": { + "text/plain": [ + "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "@code_warntype BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" + "@time BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" ] }, { diff --git a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl index 9883a6b..5c67483 100644 --- a/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl +++ b/ext/BeamTrackingBeamlinesExt/BeamTrackingBeamlinesExt.jl @@ -16,7 +16,7 @@ function track!( kwargs... ) coords = bunch.coords - return @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...) + @noinline _track!(coords, bunch, ele, ele.tracking_method, scalar_params, ramp_particle_energy_without_rf; kwargs...) return bunch end diff --git a/ext/BeamTrackingBeamlinesExt/unpack.jl b/ext/BeamTrackingBeamlinesExt/unpack.jl index 06a2eef..e9bb656 100644 --- a/ext/BeamTrackingBeamlinesExt/unpack.jl +++ b/ext/BeamTrackingBeamlinesExt/unpack.jl @@ -246,7 +246,7 @@ function universal!( # Evolve time through whole element bunch.t_ref += L/beta_gamma_to_v(beta_gamma_ref) - return kc + # noinline necessary here for small binaries and faster execution @noinline launch!(coords, kc; kwargs...) return nothing diff --git a/src/batch.jl b/src/batch.jl index 2d63e26..cd4aaa8 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -262,7 +262,7 @@ batch_lower(bp::T) where {T<:Tuple} = map(bi->batch_lower(bi), bp) batch_lower(bp::SArray{N,BatchParam}) where {N} = batch_lower(Tuple(bp)) static_batchcheck(bp) = false -static_batchcheck(::BatchParam) = true +static_batchcheck(::_LoweredBatchParam) = true @unroll function static_batchcheck(t::Tuple) @unroll for ti in t if static_batchcheck(ti) @@ -272,6 +272,7 @@ static_batchcheck(::BatchParam) = true return false end +#= """ lane2vec(lane::SIMD.VecRange{N}) @@ -289,10 +290,15 @@ function lane2vec(lane::SIMD.VecRange{N}) where {N} return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}())) end end +=# + +@inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)] -lane2vec(i) = i +@inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} + modlane = SIMD.VecRange{N}(mod1(lane.i, B)) # mod start around batch + return b.batch[modlane] +end -@inline beval(b::_LoweredBatchParam, i) = b.batch[lane2vec(i)] @inline beval(b, i) = b # === THIS BLOCK WAS WRITTEN BY CLAUDE === diff --git a/src/kernel.jl b/src/kernel.jl index c943c4f..4bdf407 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -16,12 +16,15 @@ blank_kernel!(args...) = nothing @show typeof(batch_lower(args)) error("") end=# - return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args)) + #return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args)) _args = map(t->time_lower(batch_lower(t)), args) new{typeof(kernel),typeof(_args)}(kernel, _args) end end +# In case KernelCall contains batch GPU array +Adapt.@adapt_structure KernelCall + # Store the state of the reference coordinate system # Needed for time-dependent parameters struct RefState{T,U} @@ -36,6 +39,9 @@ struct KernelChain{C<:Tuple{Vararg{<:KernelCall}}, S<:Union{Nothing,RefState}} KernelChain(chain, ref=nothing) = new{typeof(chain), typeof(ref)}(chain, ref) end +# In case KernelChain contains batch GPU array +Adapt.@adapt_structure KernelChain + KernelChain(::Val{N}, ref=nothing) where {N} = KernelChain(ntuple(t->KernelCall(), Val{N}()), ref) push(kc::KernelChain, kcall::Nothing) = kc From 00b9bf39bdbfb1fd3a34a3e2b6bfb808c99549f9 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 11:28:47 -0500 Subject: [PATCH 14/30] fix Adapt for GPU batch param --- Untitled.ipynb | 58 ++++++++++++++++---------------------------------- src/batch.jl | 2 +- 2 files changed, 19 insertions(+), 41 deletions(-) diff --git a/Untitled.ipynb b/Untitled.ipynb index 811310e..82d9d15 100644 --- a/Untitled.ipynb +++ b/Untitled.ipynb @@ -34,79 +34,57 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 225, "id": "4a3e6cf7-13ff-423b-a15d-f180a4aed64d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Beamline:\n", - " species_ref = electron\n", - " E_ref = 1.8e10\n", - "\n", - " \u001b[1m Index \u001b[0m \u001b[1m Name \u001b[0m \u001b[1m Kind \u001b[0m \u001b[1m s [m] \u001b[0m\n", - " 1 qf Quadrupole 0\n", - " 2 d Drift 0.5\n", - " 3 qd Quadrupole 1.7\n", - " 4 d Drift 2.2\n" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "@elements begin\n", " qf = Quadrupole(Kn1=0.36, L=0.5)\n", " d = Drift(L=1.2)\n", " qd = Quadrupole(Kn1=-0.36, L=0.5)\n", "end\n", - "fodo = Beamline([qf, d, qd, d], species_ref=Species(\"electron\"), E_ref=18e9)" + "fodo = Beamline(repeat([qf, d, qd, d], 10000), species_ref=Species(\"electron\"), E_ref=18e9);" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 226, "id": "c977d76f-afc1-4387-93fe-43efcf565893", "metadata": {}, "outputs": [], "source": [ - "a = SA[1,2]\n" + "foreach(x->x.Bn0=1e-7, fodo.line)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 227, "id": "e739ea1c-147a-447b-9a98-8585a51f9d24", "metadata": {}, - "outputs": [], - "source": [ - "N = 1000\n", - "b0 = Bunch(zeros(N,6), zeros(N,4))\n", - "track!(b0, fodo)" - ] - }, - { - "cell_type": "code", - "execution_count": 180, - "id": "f41dc24b-8aac-4f70-ab9d-1a9a7eac0d19", - "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 12.356829 seconds (2.74 M allocations: 130.921 MiB, 0.41% gc time)\n" + ] + }, { "data": { "text/plain": [ - "BeamTracking.KernelChain{Tuple{BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}, Vararg{BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}, 5}}, BeamTracking.RefState{Float64, Float64}}((BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, Tuple{BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Float64}, Tuple{Float64, Float64}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ()), BeamTracking.KernelCall{typeof(BeamTracking.blank_kernel!), Tuple{}}(BeamTracking.blank_kernel!, ())), BeamTracking.RefState{Float64, Float64}(1.8012461147958558e-8, 35225.12124230655))" + "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 0.00011341179241308246, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; … ; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" ] }, - "execution_count": 180, + "execution_count": 227, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "kc = track!(b0, fodo.line[1])" + "N = 200\n", + "b0 = Bunch(zeros(N,6), zeros(N,4), species=fodo.species_ref, p_over_q_ref=fodo.p_over_q_ref)\n", + "@time track!(b0, fodo)" ] }, { diff --git a/src/batch.jl b/src/batch.jl index cd4aaa8..3a5e1e1 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -59,7 +59,7 @@ end # Necessary for GPU compatibility if batch is a GPU array function Adapt.adapt_structure(to, lbp::_LoweredBatchParam{N}) where {N} batch = Adapt.adapt_structure(to, lbp.batch) - return _LoweredBatchParam{N,typeof(batch)}(batch) + return _LoweredBatchParam{N}(batch) end # BatchParam will act like a number From 4d2d0240fb1ee1f5b436af1de4da5bc8251777ca Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 14:34:49 -0500 Subject: [PATCH 15/30] done --- Untitled.ipynb | 742 --------------------------------------------- src/batch.jl | 52 ++-- src/kernel.jl | 12 - test/batch_test.jl | 167 ++++++++++ test/runtests.jl | 3 +- 5 files changed, 199 insertions(+), 777 deletions(-) delete mode 100644 Untitled.ipynb create mode 100644 test/batch_test.jl diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index 82d9d15..0000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,742 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 51, - "id": "becfe72f-f151-4c6f-91cd-4e60b4b1acab", - "metadata": {}, - "outputs": [], - "source": [ - "using Revise\n", - "using ProfileView\n", - "using BenchmarkTools" - ] - }, - { - "cell_type": "code", - "execution_count": 206, - "id": "ca8e5ad0-3b8a-4ef5-9e34-0b9ee1c91477", - "metadata": {}, - "outputs": [], - "source": [ - "using BeamTracking, Beamlines, StaticArrays, SIMD" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "2da06e85-ae4e-4742-9b6e-60f3d68ca6f6", - "metadata": {}, - "outputs": [], - "source": [ - "using BeamTracking: batch_lower" - ] - }, - { - "cell_type": "code", - "execution_count": 225, - "id": "4a3e6cf7-13ff-423b-a15d-f180a4aed64d", - "metadata": {}, - "outputs": [], - "source": [ - "@elements begin\n", - " qf = Quadrupole(Kn1=0.36, L=0.5)\n", - " d = Drift(L=1.2)\n", - " qd = Quadrupole(Kn1=-0.36, L=0.5)\n", - "end\n", - "fodo = Beamline(repeat([qf, d, qd, d], 10000), species_ref=Species(\"electron\"), E_ref=18e9);" - ] - }, - { - "cell_type": "code", - "execution_count": 226, - "id": "c977d76f-afc1-4387-93fe-43efcf565893", - "metadata": {}, - "outputs": [], - "source": [ - "foreach(x->x.Bn0=1e-7, fodo.line)" - ] - }, - { - "cell_type": "code", - "execution_count": 227, - "id": "e739ea1c-147a-447b-9a98-8585a51f9d24", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 12.356829 seconds (2.74 M allocations: 130.921 MiB, 0.41% gc time)\n" - ] - }, - { - "data": { - "text/plain": [ - "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 0.00011341179241308246, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; … ; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0; 5.330798569349469e-7 4.314649197259394e-8 … -1.4509402652506234e-11 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" - ] - }, - "execution_count": 227, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "N = 200\n", - "b0 = Bunch(zeros(N,6), zeros(N,4), species=fodo.species_ref, p_over_q_ref=fodo.p_over_q_ref)\n", - "@time track!(b0, fodo)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2e9aac96-d73e-4d21-a10a-0034262b9ab3", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 202, - "id": "8ee3fc38-d335-4a74-ada7-b9db39406fe6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (1.0e-7, 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, 1.0e-7, 0, 0), Val{true}(), Val{true}(), 0.5)" - ] - }, - "execution_count": 202, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "args = kc.chain[1].args\n", - "#@time BeamTracking.static_batchcheck(args)\n", - "@inbounds BeamTracking.beval(args, 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "9ba053b1-968f-4c5f-84a1-d96837ef06de", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BeamTracking.KernelCall{typeof(BeamTracking.order_four_integrator!), Tuple{typeof(BeamTracking.dkd_multipole!), Tuple{Float64, Float64, Bool, Float64, Float64, Float64, Float64, SVector{2, Int64}, SVector{2, BatchParam}, SVector{2, BatchParam}}, Nothing, Float64, Int64, Tuple{Float64, Float64, Int64, BatchParam, Int64, Int64}, Val{true}, Val{true}, Float64}}(BeamTracking.order_four_integrator!, (BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5))" - ] - }, - "execution_count": 174, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kc.chain[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 175, - "id": "ee21d4ec-c7b3-4371-9f79-b5a146913158", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)]), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BatchParam([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)" - ] - }, - "execution_count": 175, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "args= kc.chain[1].args" - ] - }, - { - "cell_type": "code", - "execution_count": 176, - "id": "5f17a22d-aae7-4a1d-a7db-d5ec80fd1c7c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "true" - ] - }, - "execution_count": 176, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "args[end-3][4].batch === args[2][9][1].batch" - ] - }, - { - "cell_type": "code", - "execution_count": 177, - "id": "ecf2a0dd-c119-4d0e-87ab-18d31cdba961", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BatchParam([2.0e-7, 1.0e-7])" - ] - }, - "execution_count": 177, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "args[2][9][1]" - ] - }, - { - "cell_type": "code", - "execution_count": 178, - "id": "b762b051-237e-4917-b0ff-8f35fb25beb1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(BeamTracking.dkd_multipole!, (-1.0, 510998.95069, false, 0.9999999995970371, 1.2408091675351963e9, 2.8388830605328518e-5, 0.0011596521804599913, [1, 2], (BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0.36), (-0.0, 0.0)), nothing, 0.5, 1, (0.0011596521804599913, 2.8388830605328518e-5, 0, BeamTracking._LoweredBatchParam{2, SVector{2, Float64}}([2.0e-7, 1.0e-7]), 0, 0), Val{true}(), Val{true}(), 0.5)" - ] - }, - "execution_count": 178, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "batch_lower(args)" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "id": "702fa820-feb2-46a1-b84d-a763de300a8c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "-0.0" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "batch_lower(args[2][9])[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "3f49a56d-fd3b-4a50-9419-bfa5db328b51", - "metadata": {}, - "outputs": [], - "source": [ - "foreach(x->x.Bn0=1e-7, fodo.line)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "3e7a06b6-1f48-474a-807e-eb9515488897", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 2.309 ms (409 allocations: 19.30 KiB)\n" - ] - }, - { - "data": { - "text/plain": [ - "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 1.9552193012013305e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; … ; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0; 4.7582316649897195e-7 5.5059121732740164e-8 … -2.4967911429221368e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "@btime track!($b0, $fodo)" - ] - }, - { - "cell_type": "code", - "execution_count": 207, - "id": "73d3f135-ece0-4743-954b-8abb2ee4cdd4", - "metadata": {}, - "outputs": [], - "source": [ - "foreach(x->x.Kn0=BatchParam([2e-7, 1e-7]), fodo.line)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "21a8efb7-db32-4465-93fb-1d0e56b9a3e2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 2.349 ms (923 allocations: 29.17 KiB)\n" - ] - }, - { - "data": { - "text/plain": [ - "Bunch{Float64, Float64, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}}(Species(electron, charge=-1.0e, mass=510998.95069 eV/c², spin=0.5ħ), -60.04153711147289, 4.0692151117807276e-5, BeamTracking.Coords{Vector{UInt8}, Matrix{Float64}, Matrix{Float64}}(UInt8[0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 … 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01], [-5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; … ; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0; -5.065523823532325e-7 -4.503556187786561e-8 … -7.2681535442629006e-12 0.0], [0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; … ; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]))" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "@btime track!($b0, $fodo)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "9f5d074b-31ca-420d-943d-67ca4fbe4804", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2-element Vector{BatchParam}:\n", - " BatchParam(-0.0)\n", - " BatchParam(0.0)" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "t[1][end]" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "id": "213e0113-edba-427d-b8cc-1cf18352fc57", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BMultipoleParams\n", - " Bn0 = BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n", - " Kn1 = BatchParam(0.36)\n" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bm = qf.BMultipoleParams" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "bef5d1b8-9aaa-4be9-abe6-d17c8f886aef", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BeamTrackingBeamlinesExt" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "const BTBL = Base.get_extension(BeamTracking, :BeamTrackingBeamlinesExt)" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "id": "2efbe468-28af-4e0c-b7ec-ee6aa4e980bd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "n = BatchParam[BatchParam([-1.2008307422294578e-5]), BatchParam(0.36)]\n" - ] - }, - { - "data": { - "text/plain": [ - "(BatchParam[BatchParam(0.0f0), BatchParam(0.36)], -60.04153711147289, Bool[0, 1])" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np, p_over_q_ref, normalized = BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "id": "808e9a36-b6a9-4cca-9db9-1f5ff3c8c2e8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.000219 seconds (176 allocations: 4.984 KiB)\n" - ] - }, - { - "data": { - "text/plain": [ - "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])" - ] - }, - "execution_count": 133, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "@time kn, ks = BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "id": "d666c09a-0d3c-4adf-a88f-229ab6b0d52a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BatchParam" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "L=qf.L\n", - "promote_type(reduce(promote_type, typeof.(bm.n)), \n", - " reduce(promote_type, typeof.(bm.s)),\n", - " reduce(promote_type, typeof.(bm.tilt)),\n", - " typeof(L), typeof(p_over_q_ref)\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "id": "f1c249b6-f18d-49a9-ac92-ac931862d027", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BatchParam" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "promote_type(eltype(bm.n), typeof(qf.L), typeof(p_over_q_ref))" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "9598c822-1f59-42eb-959e-50a273355b66", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2-element StaticArraysCore.SizedVector{2, BatchParam, Vector{BatchParam}} with indices SOneTo(2):\n", - " BatchParam([-1.2008307422294578e-5, -6.004153711147289e-6])\n", - " BatchParam(0.36)" - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "getfield(bm, :n)" - ] - }, - { - "cell_type": "code", - "execution_count": 153, - "id": "4bf816aa-169a-43cc-8ced-3f9e899d2ebd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.000089 seconds (104 allocations: 1.906 KiB)\n" - ] - }, - { - "data": { - "text/plain": [ - "(BatchParam[BatchParam([2.0e-7, 1.0e-7]), BatchParam(0.36)], BatchParam[BatchParam(-0.0), BatchParam(0.0)])" - ] - }, - "execution_count": 153, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "@time BTBL.get_strengths(bm, qf.L, qf.p_over_q_ref)" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "id": "e93f6ce2-9383-49b5-b798-3d33380c8214", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", - " BatchParam([2.0e-7, 1.0e-7])\n", - " BatchParam(0.36)" - ] - }, - "execution_count": 130, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kn" - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "id": "fb0ca353-c7a2-4c90-bd93-4e74cbb2d981", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", - " BatchParam(-0.0)\n", - " BatchParam(0.0)" - ] - }, - "execution_count": 131, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ks" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "63312df7-bd24-4dc6-9369-5323c2121925", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", - " BatchParam(-0.0)\n", - " BatchParam(0.36)" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "kn" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "9bf348a6-928d-459c-96dc-54ce19162426", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2-element StaticArraysCore.SVector{2, BatchParam} with indices SOneTo(2):\n", - " BatchParam(-0.0)\n", - " BatchParam(0.0)" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ks" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "930fb9e4-ef96-42d9-8d0d-4bcec336f585", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "search: \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1my\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22md\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22m\u001b[0m\u001b[1ml\u001b[22m \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mize \u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mi\u001b[22m\u001b[0m\u001b[1mn\u001b[22m\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer Unde\u001b[0m\u001b[1mf\u001b[22m\u001b[0m\u001b[1mI\u001b[22m\u001b[0m\u001b[1mn\u001b[22miti\u001b[0m\u001b[1ma\u001b[22m\u001b[0m\u001b[1ml\u001b[22mizer\n", - "\n" - ] - }, - { - "data": { - "text/latex": [ - "\\begin{verbatim}\n", - "finally\n", - "\\end{verbatim}\n", - "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n", - "\n", - "\\begin{verbatim}\n", - "f = open(\"file\")\n", - "try\n", - " operate_on_file(f)\n", - "finally\n", - " close(f)\n", - "end\n", - "\\end{verbatim}\n", - "When control leaves the \\href{@ref}{\\texttt{try}} block (for example, due to a \\href{@ref}{\\texttt{return}}, or just finishing normally), \\href{@ref}{\\texttt{close(f)}} will be executed. If the \\texttt{try} block exits due to an exception, the exception will continue propagating. A \\texttt{catch} block may be combined with \\texttt{try} and \\texttt{finally} as well. In this case the \\texttt{finally} block will run after \\texttt{catch} has handled the error.\n", - "\n" - ], - "text/markdown": [ - "```\n", - "finally\n", - "```\n", - "\n", - "Run some code when a given block of code exits, regardless of how it exits. For example, here is how we can guarantee that an opened file is closed:\n", - "\n", - "```julia\n", - "f = open(\"file\")\n", - "try\n", - " operate_on_file(f)\n", - "finally\n", - " close(f)\n", - "end\n", - "```\n", - "\n", - "When control leaves the [`try`](@ref) block (for example, due to a [`return`](@ref), or just finishing normally), [`close(f)`](@ref) will be executed. If the `try` block exits due to an exception, the exception will continue propagating. A `catch` block may be combined with `try` and `finally` as well. In this case the `finally` block will run after `catch` has handled the error.\n" - ], - "text/plain": [ - "\u001b[36m finally\u001b[39m\n", - "\n", - " Run some code when a given block of code exits, regardless of how it exits.\n", - " For example, here is how we can guarantee that an opened file is closed:\n", - "\n", - "\u001b[36m f = open(\"file\")\u001b[39m\n", - "\u001b[36m try\u001b[39m\n", - "\u001b[36m operate_on_file(f)\u001b[39m\n", - "\u001b[36m finally\u001b[39m\n", - "\u001b[36m close(f)\u001b[39m\n", - "\u001b[36m end\u001b[39m\n", - "\n", - " When control leaves the \u001b[36mtry\u001b[39m block (for example, due to a \u001b[36mreturn\u001b[39m, or just\n", - " finishing normally), \u001b[36mclose(f)\u001b[39m will be executed. If the \u001b[36mtry\u001b[39m block exits due\n", - " to an exception, the exception will continue propagating. A \u001b[36mcatch\u001b[39m block may\n", - " be combined with \u001b[36mtry\u001b[39m and \u001b[36mfinally\u001b[39m as well. In this case the \u001b[36mfinally\u001b[39m block\n", - " will run after \u001b[36mcatch\u001b[39m has handled the error." - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "?finally" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b2606c31-0971-4091-a110-b22588873197", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Julia Global (10 threads) 1.10", - "language": "julia", - "name": "julia-global-_10-threads_-1.10" - }, - "language_info": { - "file_extension": ".jl", - "mimetype": "application/julia", - "name": "julia", - "version": "1.10.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/batch.jl b/src/batch.jl index 3a5e1e1..b9ce3c6 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -48,6 +48,13 @@ struct BatchParam separate processes where each process has its own lattice with its own TimeDependentParams. =# + function BatchParam(batch::AbstractArray) + if length(batch) == 1 + error("Cannot make BatchParam with array of length 1") + end + return new(batch) + end + BatchParam(n::Number) = new(n) end struct _LoweredBatchParam{N,V<:AbstractArray} @@ -213,6 +220,29 @@ function Base.literal_pow(::typeof(^), ba::BatchParam, ::Val{N}) where {N} return BatchParam(map(x->Base.literal_pow(^, x, Val{N}()), ba.batch)) end +atan2(bpa::BatchParam, bpb::BatchParam) = _batch_atan2(bpa.batch, bpb.batch) + +function _batch_atan2(batch_a, batch_b) + if batch_a isa Number + if batch_b isa Number + return BatchParam(atan2(batch_a, batch_b)) + else + let a = batch_a + return BatchParam(map((bi)->atan2(a, bi), batch_b)) + end + end + elseif batch_b isa Number + let b = batch_b + return BatchParam(map((ai)->atan2(ai, b), batch_a)) + end + elseif length(batch_a) == length(batch_b) + return BatchParam(map((ai,bi)->atan2(ai, bi), batch_a, batch_b)) + else + error("Cannot perform operation ^ with two non-scalar BatchParams of differing + lengths (received lengths $(length(batch_a)) and $(length(batch_b))).") + end +end + Base.:+(b::BatchParam) = b # identity for t = (:-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :inv, @@ -223,8 +253,6 @@ for t = (:-, :sqrt, :exp, :log, :sin, :cos, :tan, :cot, :sinh, :cosh, :tanh, :in end end -atan2(b1::BatchParam, b2::BatchParam) = BatchParam(map((x,y)->atan2(x,y), b1.batch, b2.batch)) - for t = (:unit, :sincu, :sinhc, :sinhcu, :asinc, :asincu, :asinhc, :asinhcu, :erf, :erfc, :erfcx, :erfi, :wf, :rect) @eval begin @@ -272,26 +300,6 @@ static_batchcheck(::_LoweredBatchParam) = true return false end -#= -""" - lane2vec(lane::SIMD.VecRange{N}) - -Given a SIMD.VecRange, will return an equivalent SIMD.Vec that -can be used in arithmetic operations for mapping integer indices -of particles to a given element in a batch. -""" -function lane2vec(lane::SIMD.VecRange{N}) where {N} - # Try to match with vector register size, but - # only up to UInt32 -> ~4.3 billion particles, - # probably max on CPU... - if Int(pick_vector_width(UInt32)) == N - return SIMD.Vec{N,UInt32}(ntuple(i->lane.i+i-1, Val{N}())) - else - return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}())) - end -end -=# - @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)] @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} diff --git a/src/kernel.jl b/src/kernel.jl index 4bdf407..1ab8522 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -9,14 +9,6 @@ blank_kernel!(args...) = nothing kernel::K = blank_kernel! args::A = () function KernelCall(kernel, args) - - #=if args != () - @show args - @show batch_lower(args) - @show typeof(batch_lower(args)) - error("") - end=# - #return new{typeof(kernel),typeof(map(t->time_lower(t), args))}(kernel, map(t->time_lower(t), args)) _args = map(t->time_lower(batch_lower(t)), args) new{typeof(kernel),typeof(_args)}(kernel, _args) end @@ -77,11 +69,7 @@ _generic_kernel!(i, coords, kc) = __generic_kernel!(i, coords, kc.chain, kc.ref) end function process_batch_args(i, args) - #= @show args - error("")=# if static_batchcheck(args) - # beval(args, i) - #error("") return beval(args, i) else return args diff --git a/test/batch_test.jl b/test/batch_test.jl new file mode 100644 index 0000000..1c20b2d --- /dev/null +++ b/test/batch_test.jl @@ -0,0 +1,167 @@ +using Beamlines +using BeamTracking +using Test +using LinearAlgebra + +# Pushes 4 equal particles with 4 batched parameters +# compares when particle is pushed through 4 different elements +function test_batch( + batch_params, + nonbatch_params=(;); + v=nothing, + q=nothing, + extra_tests=(), +) + + if isnothing(v) + v = repeat((rand(1,6).-0.5)*1e-4, 4, 1) + end + if isnothing(q) + q = repeat(mapslices(normalize, rand(1,4), dims=2), 4, 1) + end + + E_ref = 5e9 + species = Species("electron") + + ele_1 = LineElement() + ele_2 = LineElement() + ele_3 = LineElement() + ele_4 = LineElement() + ele_batch = LineElement() + for (k,val) in pairs(batch_params) + val_1 = -val + val_2 = -0.5*val + val_3 = 0.5*val + val_4 = val + setproperty!(ele_1, k, val_1) + setproperty!(ele_2, k, val_2) + setproperty!(ele_3, k, val_3) + setproperty!(ele_4, k, val_4) + setproperty!(ele_batch, k, BatchParam([val_1, val_2, val_3, val_4])) + end + for (k,val) in pairs(nonbatch_params) + setproperty!(ele_1, k, val) + setproperty!(ele_2, k, val) + setproperty!(ele_3, k, val) + setproperty!(ele_4, k, val) + setproperty!(ele_batch, k, val) + end + + bl_1 = Beamline([ele_1], E_ref=E_ref, species_ref=species) + bl_2 = Beamline([ele_2], E_ref=E_ref, species_ref=species) + bl_3 = Beamline([ele_3], E_ref=E_ref, species_ref=species) + bl_4 = Beamline([ele_4], E_ref=E_ref, species_ref=species) + bl_batch = Beamline([ele_batch], E_ref=E_ref, species_ref=species) + + b0_1 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_1.p_over_q_ref, species=bl_1.species_ref) + b0_2 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_2.p_over_q_ref, species=bl_2.species_ref) + b0_3 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_3.p_over_q_ref, species=bl_3.species_ref) + b0_4 = Bunch(v[1,:]', q[1,:]'; p_over_q_ref=bl_4.p_over_q_ref, species=bl_4.species_ref) + + b0_batch = Bunch(v, q; p_over_q_ref=bl_batch.p_over_q_ref, species=bl_batch.species_ref) + + track!(b0_1, bl_1) + track!(b0_2, bl_2) + track!(b0_3, bl_3) + track!(b0_4, bl_4) + + # Ensure branchlessness of parameters with explicit SIMD + track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) + + @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v + @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v + @test b0_batch.coords.v[3,:]' ≈ b0_3.coords.v + @test b0_batch.coords.v[4,:]' ≈ b0_4.coords.v + + @test b0_batch.coords.q[1,:]' ≈ b0_1.coords.q + @test b0_batch.coords.q[2,:]' ≈ b0_2.coords.q + @test b0_batch.coords.q[3,:]' ≈ b0_3.coords.q + @test b0_batch.coords.q[4,:]' ≈ b0_4.coords.q + + for extra_test in extra_tests + @test extra_test(b0_plus, b0_minus, b0_time) + end +end + +@testset "Batch" begin + # Test each of the splits: DKD, MKM, SKS, BKB + # MKM: + test_batch((;Kn1=0.36), (;L=0.5)) + test_batch((;Kn1=0.36, Ks2=-1.2, Kn12=105.), (;L=3.4)) + + # SKS: + test_batch((;Ksol=1.2), (;L=3.4)) + test_batch((;Ksol=0.23, Kn1=0.36, Ks2=-1.2, Kn12=105.), (;L=3.4)) + + # DKD: + test_batch((;Kn0=1e-2, Kn1=-2, Kn3=14), (;L=5.1)) + + # BKB not working at the moment because bend multipoles not + # implemented and Ks0 (also stored) becomes a TimeDependentParam + # test_batch((;Kn0=1e-4, e1=2e-2, e2=3e-2), (;L=1.4)) + + Kn1 = 0.36 + Ks2 = -1.2 + L = 3.4 + Kn12 = 105. + Kn3 = 14 + Kn0 = 1e-2 + Ksol = -0.23 + test_batch((;Kn1=Kn1), (;L=L)) + test_batch((;Kn1=Kn1, Ks2=Ks2, Kn12=Kn12), (;L=L)) + + # SKS: + test_batch((;Ksol=Ksol), (;L=L)) + test_batch((;Ksol=Ksol, Kn1=Kn1, Ks2=Ks2, Kn12=Kn12), (;L=L)) + + # DKD: + test_batch((;Kn0=Kn0, Kn1=Kn1, Kn3=Kn3), (;L=L)) + + # Now with different types of multipoles entered: + test_batch((;Bn1=Kn1), (;L=0.5)) + test_batch((;Bn1L=Kn1, Ks2L=Ks2, Bn12=Kn12), (;L=L)) + + # SKS: + test_batch((;Bsol=Ksol), (;L=L)) + test_batch((;BsolL=Ksol), (;L=L)) + test_batch((;KsolL=Ksol, Bn1L=Kn1, Bs2=Ks2, Kn12L=Kn12), (;L=L)) + + # DKD: + test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=L)) + test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=L)) + test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=0)) + test_batch((;Bn0L=Kn0, Kn1L=Kn1, Bn3=Kn3), (;L=0)) + + #= + # Aperture: + # let's make a time-dependent aperture which oscillates but will allow both + # particles through + function check_state(p, m, t, state=BeamTracking.STATE_ALIVE) + if !all(p.coords.state .== m.coords.state .== t.coords.state .== state) + error("Test failed: p.coords.state = $(p.coords.state), m.coords.state = $(m.coords.state), + t.coords.state = $(t.coords.state), state = $state") + else + return true + end + end + + test_batch( + (;x2_limit=1), + (;aperture_shape = ApertureShape.Rectangular); + v=[0.5 0 0 0 0 0], + ft=(v)->v*cos(2*Time()), + extra_tests=(check_state,) + ) + + # now one where all particle should die + test_batch( + (;x2_limit=1), + (;aperture_shape = ApertureShape.Rectangular); + v=[0.5 0 0 0 0 0], + ft=(v)->v*sin(2*Time()), + extra_tests=((p,m,t)->check_state(p,m,t,BeamTracking.STATE_LOST_POS_X),) + ) + +=# + +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7fefaea..9584a71 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -200,4 +200,5 @@ include("alignment_tracking_test.jl") include("aperture_tracking_test.jl") include("ExactTracking_test.jl") include("IntegrationTracking_test.jl") -include("time_test.jl") \ No newline at end of file +include("time_test.jl") +include("batch_test.jl") \ No newline at end of file From 2b433ab0dfcabf8e86ae1ca1d1ab678acf6f9f41 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 14:35:05 -0500 Subject: [PATCH 16/30] bump --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2ddae00..3e0f888 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BeamTracking" uuid = "8ef5c10a-4ca3-437f-8af5-b84d8af36df0" authors = ["mattsignorelli and contributors"] -version = "0.5.5" +version = "0.5.6" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" From 06d85ee092205401854ddd5289925c62c57e580e Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 14:58:49 -0500 Subject: [PATCH 17/30] linear algebra --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3e0f888..ee70594 100644 --- a/Project.toml +++ b/Project.toml @@ -52,8 +52,9 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" GTPSA = "b27dd330-f138-47c5-815b-40db9dd9b6e8" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "Distributions", "JET", "GTPSA", "BenchmarkTools", "Beamlines", "StaticArrays"] +test = ["Test", "Distributions", "JET", "GTPSA", "BenchmarkTools", "Beamlines", "StaticArrays", "LinearAlgebra"] From 0b75697bf7b11c92a03b83a0518917e6305ca914 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 16:06:37 -0500 Subject: [PATCH 18/30] maybe fix --- src/batch.jl | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/batch.jl b/src/batch.jl index b9ce3c6..b4b30a3 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -303,8 +303,26 @@ end @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)] @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} - modlane = SIMD.VecRange{N}(mod1(lane.i, B)) # mod start around batch - return b.batch[modlane] + m = rem(lane2vec(lane), B) + return b.batch[vifelse(m == 0, B, m)] +end + +""" + lane2vec(lane::SIMD.VecRange{N}) + +Given a SIMD.VecRange, will return an equivalent SIMD.Vec that +can be used in arithmetic operations for mapping integer indices +of particles to a given element in a batch. +""" +function lane2vec(lane::SIMD.VecRange{N}) where {N} + # Try to match with vector register size, but + # only up to UInt32 -> ~4.3 billion particles, + # probably max on CPU... + if Int(pick_vector_width(UInt32)) == N + return SIMD.Vec{N,UInt32}(ntuple(i->lane.i+i-1, Val{N}())) + else + return SIMD.Vec{N,UInt64}(ntuple(i->lane.i+i-1, Val{N}())) + end end @inline beval(b, i) = b From 34317bfd92ae3bfc0baf3f5c2d80a9db5cc1daba Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 16:30:47 -0500 Subject: [PATCH 19/30] remove inbounds --- src/kernel.jl | 12 ++++++------ src/kernels/rfcavity_kick.jl | 4 ++-- src/kernels/yoshida.jl | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index 1ab8522..3c321e4 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -203,9 +203,9 @@ macro makekernel(args...) if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds return quote @inline function $(fcn_name)($(args...)) - @inbounds begin + #@inbounds begin $(body) - end + #end end end else # no inbounds @@ -219,17 +219,17 @@ macro makekernel(args...) if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds return quote @inline function $(fcn_name)($(args...)) - @inbounds begin @FastGTPSA begin + #@inbounds begin @FastGTPSA begin $(body) - end end + #end end end end else # no inbounds return quote @inline function $(fcn_name)($(args...)) - @FastGTPSA begin + #@FastGTPSA begin $(body) - end + #end end end diff --git a/src/kernels/rfcavity_kick.jl b/src/kernels/rfcavity_kick.jl index 9e22898..dd56281 100644 --- a/src/kernels/rfcavity_kick.jl +++ b/src/kernels/rfcavity_kick.jl @@ -107,7 +107,7 @@ end function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm, kn, ks, L) - @FastGTPSA begin @inbounds begin + #@FastGTPSA begin @inbounds begin v = coords.v alive = (coords.state[i] == STATE_ALIVE) #r2 = v[i,XI]*v[i,XI] + v[i,YI]*v[i,YI] @@ -152,7 +152,7 @@ function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm else omega = (ox, oy, oz) end - end end + #end end return omega end diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl index 4bed375..a6f77fb 100644 --- a/src/kernels/yoshida.jl +++ b/src/kernels/yoshida.jl @@ -3,7 +3,7 @@ # @inline function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - @inbounds begin + #@inbounds begin if !isnothing(edge_params) && fringe_in a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) @@ -24,12 +24,12 @@ a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end + #end end @inline function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - @inbounds begin + #@inbounds begin w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step w1 = 1.3512071919596577718181151794851757586002349853515625*ds_step if !isnothing(edge_params) && fringe_in @@ -54,12 +54,12 @@ end a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end + #end end @inline function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - @inbounds begin + #@inbounds begin w0 = 1.315186320683911169737712043570355*ds_step w1 = -1.17767998417887100694641568096432*ds_step w2 = 0.235573213359358133684793182978535*ds_step @@ -90,12 +90,12 @@ end a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end + #end end @inline function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - @inbounds begin + #@inbounds begin w0 = 1.7084530707869978*ds_step w1 = 0.102799849391985*ds_step w2 = -1.96061023297549*ds_step @@ -138,5 +138,5 @@ end a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - end + #end end \ No newline at end of file From 00b75f0d1313ce21ad53f88254cb824471c64d8e Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 16:31:23 -0500 Subject: [PATCH 20/30] make first test --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9584a71..3f791cb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -195,10 +195,10 @@ function quaternion_coeffs_approx_equal(q_expected, q_calculated, ϵ) return all_ok end +include("batch_test.jl") include("BeamlinesExt_test.jl") include("alignment_tracking_test.jl") include("aperture_tracking_test.jl") include("ExactTracking_test.jl") include("IntegrationTracking_test.jl") include("time_test.jl") -include("batch_test.jl") \ No newline at end of file From 9edd6ae2310451a151e10be9ef361f39fcced745 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 16:37:09 -0500 Subject: [PATCH 21/30] try no explicit SIMD --- test/batch_test.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/batch_test.jl b/test/batch_test.jl index 1c20b2d..e96220b 100644 --- a/test/batch_test.jl +++ b/test/batch_test.jl @@ -66,7 +66,7 @@ function test_batch( track!(b0_4, bl_4) # Ensure branchlessness of parameters with explicit SIMD - track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) + track!(b0_batch, bl_batch; use_explicit_SIMD=false, use_KA=true) @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v From 99317cd50215021e4fc77d711305a77cb32026a4 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 16:52:52 -0500 Subject: [PATCH 22/30] put inbounds bck in --- src/kernel.jl | 12 ++++++------ src/kernels/yoshida.jl | 16 ++++++++-------- test/runtests.jl | 2 ++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/kernel.jl b/src/kernel.jl index 3c321e4..1ab8522 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -203,9 +203,9 @@ macro makekernel(args...) if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds return quote @inline function $(fcn_name)($(args...)) - #@inbounds begin + @inbounds begin $(body) - #end + end end end else # no inbounds @@ -219,17 +219,17 @@ macro makekernel(args...) if isnothing(idx_inbounds) || kwargvals[idx_inbounds] # inbounds return quote @inline function $(fcn_name)($(args...)) - #@inbounds begin @FastGTPSA begin + @inbounds begin @FastGTPSA begin $(body) - #end end + end end end end else # no inbounds return quote @inline function $(fcn_name)($(args...)) - #@FastGTPSA begin + @FastGTPSA begin $(body) - #end + end end end diff --git a/src/kernels/yoshida.jl b/src/kernels/yoshida.jl index a6f77fb..4bed375 100644 --- a/src/kernels/yoshida.jl +++ b/src/kernels/yoshida.jl @@ -3,7 +3,7 @@ # @inline function order_two_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - #@inbounds begin + @inbounds begin if !isnothing(edge_params) && fringe_in a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e1, 1) @@ -24,12 +24,12 @@ a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - #end + end end @inline function order_four_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - #@inbounds begin + @inbounds begin w0 = -1.7024143839193153215916254339390434324741363525390625*ds_step w1 = 1.3512071919596577718181151794851757586002349853515625*ds_step if !isnothing(edge_params) && fringe_in @@ -54,12 +54,12 @@ end a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - #end + end end @inline function order_six_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - #@inbounds begin + @inbounds begin w0 = 1.315186320683911169737712043570355*ds_step w1 = -1.17767998417887100694641568096432*ds_step w2 = 0.235573213359358133684793182978535*ds_step @@ -90,12 +90,12 @@ end a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - #end + end end @inline function order_eight_integrator!(i, coords::Coords, ker, params, photon_params, ds_step, num_steps, edge_params, ::Val{fringe_in}, ::Val{fringe_out}, L) where {fringe_in,fringe_out} - #@inbounds begin + @inbounds begin w0 = 1.7084530707869978*ds_step w1 = 0.102799849391985*ds_step w2 = -1.96061023297549*ds_step @@ -138,5 +138,5 @@ end a, tilde_m, Ksol, Kn0, e1, e2 = edge_params linear_bend_fringe!(i, coords, a, tilde_m, Ksol, Kn0, e2, -1) end - #end + end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 3f791cb..5082bae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,6 +15,8 @@ using BeamTracking: Coords, KernelCall, Q0, QX, QY, QZ, STATE_ALIVE, STATE_LOST, quat_mul, quat_rotate, gaussian_random using Beamlines: isactive +@show BeamTracking.REGISTER_SIZE + BenchmarkTools.DEFAULT_PARAMETERS.gctrial = false BenchmarkTools.DEFAULT_PARAMETERS.evals = 2 From 98dbce80f0ed77d80c0c634a6affa19f30b367b6 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 16:53:14 -0500 Subject: [PATCH 23/30] put inbounds back in --- src/kernels/rfcavity_kick.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/rfcavity_kick.jl b/src/kernels/rfcavity_kick.jl index dd56281..9e22898 100644 --- a/src/kernels/rfcavity_kick.jl +++ b/src/kernels/rfcavity_kick.jl @@ -107,7 +107,7 @@ end function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm, kn, ks, L) - #@FastGTPSA begin @inbounds begin + @FastGTPSA begin @inbounds begin v = coords.v alive = (coords.state[i] == STATE_ALIVE) #r2 = v[i,XI]*v[i,XI] + v[i,YI]*v[i,YI] @@ -152,7 +152,7 @@ function omega_cavity(i, coords::Coords, a, tilde_m, omega, t0, E0_over_Rref, mm else omega = (ox, oy, oz) end - #end end + end end return omega end From 006125c2c5468510507ae8e4298af390be235976 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 19:57:11 -0500 Subject: [PATCH 24/30] comments fix compiler bug >:( --- src/batch.jl | 5 ++++- test/batch_test.jl | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/batch.jl b/src/batch.jl index b4b30a3..619886d 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -304,7 +304,10 @@ end @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} m = rem(lane2vec(lane), B) - return b.batch[vifelse(m == 0, B, m)] + i = vifelse(m == 0, B, m) + #@show i + #@show b.batch[i] + return b.batch[i] end """ diff --git a/test/batch_test.jl b/test/batch_test.jl index e96220b..1c20b2d 100644 --- a/test/batch_test.jl +++ b/test/batch_test.jl @@ -66,7 +66,7 @@ function test_batch( track!(b0_4, bl_4) # Ensure branchlessness of parameters with explicit SIMD - track!(b0_batch, bl_batch; use_explicit_SIMD=false, use_KA=true) + track!(b0_batch, bl_batch; use_explicit_SIMD=true, use_KA=false) @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v From bf597bd0acf57f4f6a9d5e344021aa971b9ce132 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Thu, 5 Feb 2026 20:07:01 -0500 Subject: [PATCH 25/30] error if called on Linux or Windows Date: Fri, 6 Feb 2026 09:50:20 -0500 Subject: [PATCH 26/30] check if x64 or aarch problem with lts --- .github/workflows/CI.yml | 12 ++++++++++++ src/batch.jl | 6 +++--- test/batch_test.jl | 12 +++++------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index b86cccf..109317c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -40,6 +40,18 @@ jobs: arch: aarch64 - os: macos-latest arch: x64 + include: + - os: ubuntu-24.04-arm + version: 'lts' + arch: aarch64 + include: + - os: ubuntu-24.04-arm + version: '1.11' + arch: aarch64 + include: + - os: ubuntu-24.04-arm + version: '1.12' + arch: aarch64 steps: - uses: actions/checkout@v6 - uses: julia-actions/setup-julia@v2 diff --git a/src/batch.jl b/src/batch.jl index 4cf2a46..e75700f 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -303,11 +303,11 @@ end @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)] @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} - @static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux())) + #=@static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux())) error("LLVM has a compiler bug with batch parameter explicit SIMD on Julia versions < 1.11 AND a Windows or Linux operating system. To get around - this, specify the track! flags: use_explicit_SIMD=false, use_KA=true.") - end + this, specify the `track!` keyword argument: `use_explicit_SIMD=false`") + end=# m = rem(lane2vec(lane), B) i = vifelse(m == 0, B, m) #@show i diff --git a/test/batch_test.jl b/test/batch_test.jl index cf48b9f..e7eb35c 100644 --- a/test/batch_test.jl +++ b/test/batch_test.jl @@ -66,14 +66,12 @@ function test_batch( track!(b0_4, bl_4) # Ensure branchlessness of parameters with explicit SIMD - if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux())) - use_explicit_SIMD=false - use_KA=true - else + #if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux())) + # use_explicit_SIMD=false + # else use_explicit_SIMD=true - use_KA=false - end - track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD, use_KA=use_KA) + #end + track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD) @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v @test b0_batch.coords.v[2,:]' ≈ b0_2.coords.v From 453e4370987205f47c9985fa4439045adbe7b640 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Fri, 6 Feb 2026 09:56:44 -0500 Subject: [PATCH 27/30] fix CI --- .github/workflows/CI.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 109317c..a3e09da 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -44,11 +44,9 @@ jobs: - os: ubuntu-24.04-arm version: 'lts' arch: aarch64 - include: - os: ubuntu-24.04-arm version: '1.11' arch: aarch64 - include: - os: ubuntu-24.04-arm version: '1.12' arch: aarch64 From 2bf74e03df8830fe388bed83d2c062c7325e7adc Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Fri, 6 Feb 2026 10:02:43 -0500 Subject: [PATCH 28/30] check time test --- src/batch.jl | 13 ++++++------- test/batch_test.jl | 8 ++++---- test/runtests.jl | 3 ++- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/batch.jl b/src/batch.jl index e75700f..c04fb59 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -303,15 +303,14 @@ end @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)] @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} - #=@static if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux())) - error("LLVM has a compiler bug with batch parameter explicit SIMD on Julia - versions < 1.11 AND a Windows or Linux operating system. To get around - this, specify the `track!` keyword argument: `use_explicit_SIMD=false`") - end=# + @static if (VERSION < v"1.11" && Sys.ARCH == :x64) + error("Julia's explicit SIMD.jl has a compiler bug that appears with batch + parameters on versions < 1.11 AND an x64 bit architecture, which we + detected that you have. To get around this, specify the `track!` + keyword argument `use_explicit_SIMD=false`") + end m = rem(lane2vec(lane), B) i = vifelse(m == 0, B, m) - #@show i - #@show b.batch[i] return b.batch[i] end diff --git a/test/batch_test.jl b/test/batch_test.jl index e7eb35c..e76685f 100644 --- a/test/batch_test.jl +++ b/test/batch_test.jl @@ -66,11 +66,11 @@ function test_batch( track!(b0_4, bl_4) # Ensure branchlessness of parameters with explicit SIMD - #if (VERSION < v"1.11" && (Sys.iswindows() || Sys.islinux())) - # use_explicit_SIMD=false - # else + if (VERSION < v"1.11" && Sys.ARCH == :x64) + use_explicit_SIMD=false + else use_explicit_SIMD=true - #end + end track!(b0_batch, bl_batch; use_explicit_SIMD=use_explicit_SIMD) @test b0_batch.coords.v[1,:]' ≈ b0_1.coords.v diff --git a/test/runtests.jl b/test/runtests.jl index 5082bae..b72231d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -198,9 +198,10 @@ function quaternion_coeffs_approx_equal(q_expected, q_calculated, ϵ) end include("batch_test.jl") +include("time_test.jl") include("BeamlinesExt_test.jl") include("alignment_tracking_test.jl") include("aperture_tracking_test.jl") include("ExactTracking_test.jl") include("IntegrationTracking_test.jl") -include("time_test.jl") + From eee00739dfc792ac8a4cfde26c1af5cddca265d6 Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Fri, 6 Feb 2026 10:24:11 -0500 Subject: [PATCH 29/30] show arch in tests --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index b72231d..46935da 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,6 +16,7 @@ using BeamTracking: Coords, KernelCall, Q0, QX, QY, QZ, STATE_ALIVE, STATE_LOST, using Beamlines: isactive @show BeamTracking.REGISTER_SIZE +@show Sys.ARCH BenchmarkTools.DEFAULT_PARAMETERS.gctrial = false BenchmarkTools.DEFAULT_PARAMETERS.evals = 2 From 86d5b3d8e16d33f15c34b7caa890e3739599191a Mon Sep 17 00:00:00 2001 From: mattsignorelli Date: Fri, 6 Feb 2026 13:56:47 -0500 Subject: [PATCH 30/30] check if fixed --- src/batch.jl | 4 ++-- src/kernel.jl | 2 +- test/batch_test.jl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/batch.jl b/src/batch.jl index c04fb59..c9331b4 100644 --- a/src/batch.jl +++ b/src/batch.jl @@ -303,9 +303,9 @@ end @inline beval(b::_LoweredBatchParam{B}, i) where {B} = b.batch[mod1(i, B)] @inline function beval(b::_LoweredBatchParam{B}, lane::SIMD.VecRange{N}) where {B,N} - @static if (VERSION < v"1.11" && Sys.ARCH == :x64) + @static if (VERSION < v"1.11" && Sys.ARCH == :x86_64) error("Julia's explicit SIMD.jl has a compiler bug that appears with batch - parameters on versions < 1.11 AND an x64 bit architecture, which we + parameters on versions < 1.11 AND an x86_64 bit architecture, which we detected that you have. To get around this, specify the `track!` keyword argument `use_explicit_SIMD=false`") end diff --git a/src/kernel.jl b/src/kernel.jl index 1ab8522..3898b9b 100644 --- a/src/kernel.jl +++ b/src/kernel.jl @@ -95,7 +95,7 @@ end groupsize::Union{Nothing,Integer}=nothing, #backend isa CPU ? floor(Int,REGISTER_SIZE/sizeof(eltype(v))) : 256 multithread_threshold::Integer=Threads.nthreads() > 1 ? 1750*Threads.nthreads() : typemax(Int), use_KA::Bool=!(get_backend(coords.v) isa CPU && isnothing(groupsize)), - use_explicit_SIMD::Bool=!use_KA && (@static VERSION < v"1.11" || Sys.ARCH != :aarch64) # Default to use explicit SIMD on CPU, excepts for Macs above LTS bc SIMD.jl bug + use_explicit_SIMD::Bool=!use_KA #&& (@static VERSION < v"1.11" || Sys.ARCH != :aarch64) # Default to use explicit SIMD on CPU, excepts for Macs above LTS bc SIMD.jl bug ) where {V} v = coords.v N_particle = size(v, 1) diff --git a/test/batch_test.jl b/test/batch_test.jl index e76685f..ee4b71c 100644 --- a/test/batch_test.jl +++ b/test/batch_test.jl @@ -66,7 +66,7 @@ function test_batch( track!(b0_4, bl_4) # Ensure branchlessness of parameters with explicit SIMD - if (VERSION < v"1.11" && Sys.ARCH == :x64) + if (VERSION < v"1.11" && Sys.ARCH == :x86_64) use_explicit_SIMD=false else use_explicit_SIMD=true