diff --git a/.gitignore b/.gitignore index 3bbdd8b3..2c8da963 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ *~ *.swp .DS_Store +*.png +examples/mpi/06-scatterv.jl diff --git a/examples/mpi/mpi_example.jl b/examples/mpi/mpi_example.jl new file mode 100644 index 00000000..fceb9744 --- /dev/null +++ b/examples/mpi/mpi_example.jl @@ -0,0 +1,99 @@ +using BeamTracking, Beamlines, MPI, BenchmarkTools, Plots, LaTeXStrings, Unitful, + PhysicalConstants, Random +# Read in the Electron Storage Ring of the Electron-Ion Collider +include("../../test/lattices/esr.jl") # Beamline symbol is "ring" +# Currently only Linear tracking is supported, enable it for each element +foreach(t -> t.tracking_method = Linear(), ring.line) +n_particles = parse(Int, ARGS[1]) + +MPI.Init() +comm = MPI.COMM_WORLD +rank = MPI.Comm_rank(comm) +comm_size = MPI.Comm_size(comm) +root = 0 + +if rank == 0 + start_time = time() +end + +# block distribution +block_size, block_remainder = divrem(n_particles, comm_size) + +if rank <= block_remainder + block_size = block_size + 1 + offset = rank * block_size +else + offset = block_remainder * (block_size + 1) + (rank - block_remainder) * block_size +end + +rank_indices = offset+1 : offset+block_size +block_size = Int(rank_indices[end] - rank_indices[1]) +println("Rank $rank has indices $rank_indices") + +# collect number of particles on each rank +counts = MPI.Gather(block_size, root, comm) +if rank == root + counts = counts .* 6 # for 6 phase space elements +end + +Random.seed!(rank) +bunch = Bunch(block_size) +# Track the bunch through the ESR +track!(bunch, ring) +# Also can track! individual elements +# track!(bunch, ring; outer_particle_loop=true) + +""" +Test before flatten and communication - matches Tracking_examples.jl when comm_size == 1 and n_particles == 100. +""" +# plot( +# scatter(bunch.v[:,1], bunch.v[:,2], label ="", xlabel = L"x", ylabel = L"p_x", markersize = 1), +# scatter(bunch.v[:,3], bunch.v[:,4], label ="", xlabel = L"y", ylabel = L"p_y", markersize = 1), +# scatter(bunch.v[:,5], bunch.v[:,6], label ="", xlabel = L"z", ylabel = L"p_z", +# markersize = 1), +# layout=(1,3), size=(600,300) +# ) +# savefig("mpi_example_plot_no_flatten.png") + +# vectorize bunch states for communication +flattened_v = Vector{Float64}(vec(transpose(bunch.v))) + +# allocate buffer +if rank == root + result_data = zeros(sum(counts)) + recv_buffer = VBuffer(result_data, counts) +else + recv_buffer = VBuffer(nothing) +end + +# collect data from ranks +MPI.Gatherv!(flattened_v, recv_buffer, 0, comm) +end_time = time() + +# exit non-root ranks +if rank != root + exit(0) +end +MPI.Finalize() + + +# # decompress states vector +# b0v = reshape(result_data, 6, :)' + + +# # plot dim vs. momentum +# plot( +# scatter(b0v[:,1], b0v[:,2], label ="", xlabel = L"x", ylabel = L"p_x", markersize = 1), +# scatter(b0v[:,3], b0v[:,4], label ="", xlabel = L"y", ylabel = L"p_y", markersize = 1), +# scatter(b0v[:,5], b0v[:,6], label ="", xlabel = L"z", ylabel = L"p_z", +# markersize = 1), +# layout=(1,3), size=(600,300) +# ) + +# savefig("mpi_example_plot.png") + +elapsed_time = end_time - start_time + +println("Run time: $elapsed_time seconds") + +exit(0) \ No newline at end of file diff --git a/examples/mpi/mpi_example_no_plot.jl b/examples/mpi/mpi_example_no_plot.jl new file mode 100644 index 00000000..00e3a9b8 --- /dev/null +++ b/examples/mpi/mpi_example_no_plot.jl @@ -0,0 +1,83 @@ +using BeamTracking, Beamlines, MPI, BenchmarkTools, Plots, LaTeXStrings, Unitful, + PhysicalConstants, Random +# Read in the Electron Storage Ring of the Electron-Ion Collider +include("../../test/lattices/esr.jl") # Beamline symbol is "ring" +# Currently only Linear tracking is supported, enable it for each element +foreach(t -> t.tracking_method = Linear(), ring.line) +n_particles = parse(Int, ARGS[1]) + +MPI.Init() +comm = MPI.COMM_WORLD +rank = MPI.Comm_rank(comm) +comm_size = MPI.Comm_size(comm) +root = 0 + +if rank == 0 + start_time = time() +end + +# block distribution +block_size, block_remainder = divrem(n_particles, comm_size) + +if rank <= block_remainder + block_size = block_size + 1 + offset = rank * block_size +else + offset = block_remainder * (block_size + 1) + (rank - block_remainder) * block_size +end + +rank_indices = offset+1 : offset+block_size +block_size = Int(rank_indices[end] - rank_indices[1]) + +# collect number of particles on each rank +counts = MPI.Gather(block_size, root, comm) +if rank == root + counts = counts .* 6 # for 6 phase space elements +end + +Random.seed!(rank) +bunch = Bunch(block_size) +# Track the bunch through the ESR +track!(bunch, ring) +# Also can track! individual elements +# track!(bunch, ring; outer_particle_loop=true) + +""" +Test before flatten and communication - matches Tracking_examples.jl when comm_size == 1 and n_particles == 100. +""" +# plot( +# scatter(bunch.v[:,1], bunch.v[:,2], label ="", xlabel = L"x", ylabel = L"p_x", markersize = 1), +# scatter(bunch.v[:,3], bunch.v[:,4], label ="", xlabel = L"y", ylabel = L"p_y", markersize = 1), +# scatter(bunch.v[:,5], bunch.v[:,6], label ="", xlabel = L"z", ylabel = L"p_z", +# markersize = 1), +# layout=(1,3), size=(600,300) +# ) +# savefig("mpi_example_plot_no_flatten.png") + +# vectorize bunch states for communication +flattened_v = Vector{Float64}(vec(transpose(bunch.v))) + +# allocate buffer +if rank == root + result_data = zeros(sum(counts)) + recv_buffer = VBuffer(result_data, counts) +else + recv_buffer = VBuffer(nothing) +end + +# collect data from ranks +MPI.Gatherv!(flattened_v, recv_buffer, 0, comm) +end_time = time() + +# exit non-root ranks +if rank != root + exit(0) +end +MPI.Finalize() + +elapsed_time = end_time - start_time + +println("Run time: $elapsed_time seconds") + +exit(0) + diff --git a/examples/mpi/scaling_plots.jl b/examples/mpi/scaling_plots.jl new file mode 100644 index 00000000..6acb19cb --- /dev/null +++ b/examples/mpi/scaling_plots.jl @@ -0,0 +1,34 @@ +using Plots + +threads = [32, 64, 128, 256] + +# n2_100k = [3.1540510654449463, 2.9544689655303955, 3.976494789123535, 4.019366979598999] +# n4_100k = [2.9284520149230957, 2.8642098903656006, 3.396040201187134, 3.907201051712036] +# n8_100k = [2.8360190391540527, 2.814426898956299, 3.7818939685821533, 3.8674139976501465] +# n16_100k = [2.877640962600708, 2.836548089981079, 3.3386030197143555, 4.1602020263671875] +# n32_100k = [2.9536869525909424, 3.1786229610443115, 3.973673105239868, 3.831165075302124] + +# # undef was 58.003605127334595 +# n2_200k = [NaN, 3.096147060394287, 3.6735999584198, 4.08241605758667] +# n4_200k = [3.224229097366333, 2.9790329933166504, 3.459972858428955, 3.8737339973449707] +# n8_200k = [2.913079023361206, 2.8580141067504883, 3.3868350982666016, 3.807229995727539] +# n16_200k = [2.9674999713897705, 2.8917369842529297, 3.900191068649292, 3.9239768981933594] +# n32_200k = [2.922312021255493, 2.9081051349639893, 3.365617036819458, 3.7916769981384277] + +n2_100k_local_work = [2.2434370517730713, 2.2717459201812744, 2.394063949584961, 2.376012086868286] +n4_100k_local_work = [2.544693946838379, 2.551508903503418, 2.7010128498077393, 2.570918083190918] +n8_100k_local_work = [3.8339920043945312, 3.8162360191345215, 4.066386938095093, 4.288684129714966] + +n2_100k_work_array = [2.3870041370391846, 2.432455062866211, 2.3610990047454834, 2.398801803588867] +n4_100k_work_array = [2.5696849822998047, 2.6017680168151855, 2.8834068775177, 2.7176730632781982] +n8_100k_work_array = [3.9740920066833496, 4.187608957290649, 4.045736074447632, 4.10685396194458] + +p1 = plot(threads, n2_100k_local_work / 2, label="n=2", xlabel="Threads", ylabel="Time per node (s)", title="Local Work Variable, 100k Particles", legend=:topleft) +plot!(threads, n4_100k_local_work / 4, label="n=4") +plot!(threads, n8_100k_local_work / 8, label="n=8") + +p2 = plot(threads, n2_100k_work_array / 2, label="n=2", xlabel="Threads", ylabel="Time per node (s)", title="Old Work Array, 100k Particles", legend=:topleft) +plot!(threads, n4_100k_work_array / 4, label="n=4") +plot!(threads, n8_100k_work_array / 8, label="n=8") + +plot(p1, p2, layout=(1,2), size=(1000, 500), margin=10Plots.mm) \ No newline at end of file diff --git a/examples/notebooks/Tracking_examples.jl b/examples/notebooks/Tracking_examples.jl index 6d48f744..31c130a4 100644 --- a/examples/notebooks/Tracking_examples.jl +++ b/examples/notebooks/Tracking_examples.jl @@ -5,7 +5,7 @@ using Markdown using InteractiveUtils # ╔═╡ e4c10317-3276-4a7a-a270-06c03ab6edd5 -using BeamTracking, Beamlines, BenchmarkTools, Plots, LaTeXStrings, Unitful, PhysicalConstants +using BeamTracking, Beamlines, BenchmarkTools, Plots, LaTeXStrings, Unitful, PhysicalConstants, Random # ╔═╡ 1c785b23-60c1-4f22-9056-b9896082b52c begin @@ -53,6 +53,8 @@ md"""Contruct a particle bunch and track it trough ESR""" # ╔═╡ 6a084ffd-4f22-4df9-b81f-e347522c0779 begin # Construct a bunch: +Random.seed!(357357) + N_particle = 100 b0 = Bunch(N_particle) @@ -87,12 +89,19 @@ plot( md"""For performance and storage optimization, espessialy on GPU's it is recommended to use a bits representation of a lattice. Ignore a warning message for now, it simply states that there is a way make things even faster.""" # ╔═╡ cca8aa2f-fbfe-4189-be63-3a53ed0824c9 +# ╠═╡ disabled = true +#=╠═╡ begin bitsring = BitsBeamline(ring) track!(b0, bitsring) end + ╠═╡ =# + +# ╔═╡ ec3a3159-ca83-4773-b503-9b31032fdfff +md"""Another usefull package is GTPSA.jl. Let's use it to create a map""" # ╔═╡ cdd63014-8bd7-4bc8-b000-9ba54bd97f11 +#=╠═╡ begin using GTPSA # GTPSA map: @@ -103,9 +112,7 @@ begin track!(b0_tpsa, ring) track!(b0_tpsa, bitsring) end - -# ╔═╡ ec3a3159-ca83-4773-b503-9b31032fdfff -md"""Another usefull package is GTPSA.jl. Let's use it to create a map""" + ╠═╡ =# # ╔═╡ 00000000-0000-0000-0000-000000000001 PLUTO_PROJECT_TOML_CONTENTS = """ @@ -117,6 +124,7 @@ GTPSA = "b27dd330-f138-47c5-815b-40db9dd9b6e8" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" PhysicalConstants = "5ad8b20f-a522-5ce9-bfc9-ddf1d5bda6ab" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" [compat] @@ -136,7 +144,7 @@ PLUTO_MANIFEST_TOML_CONTENTS = """ julia_version = "1.11.5" manifest_format = "2.0" -project_hash = "559f772a926270691d018758cb7e491b6f0634da" +project_hash = "b2ad30206e9c93c2533c3b14ff91d8ff6386ea74" [[deps.Accessors]] deps = ["CompositionsBase", "ConstructionBase", "Dates", "InverseFunctions", "MacroTools"] diff --git a/test/scaling_test.sh b/test/scaling_test.sh new file mode 100644 index 00000000..d97cbd21 --- /dev/null +++ b/test/scaling_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Loop over process counts (-n) and thread counts (-t) +for n in 2 4 8; do + for t in 32 64 128 256; do + echo "Running with -n $n and -t $t" + # srun -n $n --cpu-bind=cores julia -t $t mpi_example_no_plot.jl 100000 + mpiexecjl -n $n julia -t $t examples/mpi/mpi_example.jl 100000 + done +done \ No newline at end of file