diff --git a/.gitignore b/.gitignore
index 3bbdd8b3..2c8da963 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@
 *~
 *.swp
 .DS_Store
+*.png
 
+examples/mpi/06-scatterv.jl
diff --git a/examples/mpi/mpi_example.jl b/examples/mpi/mpi_example.jl
new file mode 100644
index 00000000..fceb9744
--- /dev/null
+++ b/examples/mpi/mpi_example.jl
@@ -0,0 +1,99 @@
+using BeamTracking, Beamlines, MPI, BenchmarkTools, Plots, LaTeXStrings, Unitful,
+ PhysicalConstants, Random
+# Read in the Electron Storage Ring of the Electron-Ion Collider
+include("../../test/lattices/esr.jl") # Beamline symbol is "ring"
+# Currently only Linear tracking is supported, enable it for each element
+foreach(t -> t.tracking_method = Linear(), ring.line)
+n_particles = parse(Int, ARGS[1])
+
+MPI.Init()
+comm = MPI.COMM_WORLD
+rank = MPI.Comm_rank(comm)
+comm_size = MPI.Comm_size(comm)
+root = 0
+
+if rank == 0
+	start_time = time()
+end
+
+# block distribution
+block_size, block_remainder = divrem(n_particles, comm_size)
+
+if rank <= block_remainder
+	block_size = block_size + 1
+	offset = rank * block_size
+else
+	offset = block_remainder * (block_size + 1) + (rank - block_remainder) * block_size
+end
+
+rank_indices = offset+1 : offset+block_size
+block_size = Int(rank_indices[end] - rank_indices[1])
+println("Rank $rank has indices $rank_indices")
+
+# collect number of particles on each rank
+counts = MPI.Gather(block_size, root, comm)
+if rank == root
+    counts = counts .* 6 # for 6 phase space elements
+end
+
+Random.seed!(rank)
+bunch = Bunch(block_size)
+# Track the bunch through the ESR
+track!(bunch, ring)
+# Also can track! individual elements
+# track!(bunch, ring; outer_particle_loop=true)
+
+"""
+Test before flatten and communication - matches Tracking_examples.jl when comm_size == 1 and n_particles == 100.
+"""
+# plot(
+# 	scatter(bunch.v[:,1], bunch.v[:,2], label ="", xlabel = L"x", ylabel = L"p_x", markersize = 1),
+# 	scatter(bunch.v[:,3], bunch.v[:,4], label ="", xlabel = L"y", ylabel = L"p_y", markersize = 1),
+# 	scatter(bunch.v[:,5], bunch.v[:,6], label ="", xlabel = L"z", ylabel = L"p_z",
+# 	markersize = 1),
+# 	layout=(1,3), size=(600,300)
+# )
+# savefig("mpi_example_plot_no_flatten.png")
+
+# vectorize bunch states for communication
+flattened_v = Vector{Float64}(vec(transpose(bunch.v)))
+
+# allocate buffer
+if rank == root
+	result_data = zeros(sum(counts))
+	recv_buffer = VBuffer(result_data, counts)
+else
+	recv_buffer = VBuffer(nothing)
+end
+
+# collect data from ranks
+MPI.Gatherv!(flattened_v, recv_buffer, 0, comm)
+end_time = time()
+
+# exit non-root ranks
+if rank != root
+	exit(0)
+end
+MPI.Finalize()
+
+
+# # decompress states vector
+# b0v = reshape(result_data, 6, :)'
+
+
+# # plot dim vs. momentum
+# plot(
+# 	scatter(b0v[:,1], b0v[:,2], label ="", xlabel = L"x", ylabel = L"p_x", markersize = 1),
+# 	scatter(b0v[:,3], b0v[:,4], label ="", xlabel = L"y", ylabel = L"p_y", markersize = 1),
+# 	scatter(b0v[:,5], b0v[:,6], label ="", xlabel = L"z", ylabel = L"p_z",
+# 	markersize = 1),
+# 	layout=(1,3), size=(600,300)
+# )
+
+# savefig("mpi_example_plot.png")
+
+elapsed_time = end_time - start_time
+
+println("Run time: $elapsed_time seconds")
+
+exit(0)
\ No newline at end of file
diff --git a/examples/mpi/mpi_example_no_plot.jl b/examples/mpi/mpi_example_no_plot.jl
new file mode 100644
index 00000000..00e3a9b8
--- /dev/null
+++ b/examples/mpi/mpi_example_no_plot.jl
@@ -0,0 +1,83 @@
+using BeamTracking, Beamlines, MPI, BenchmarkTools, Plots, LaTeXStrings, Unitful,
+ PhysicalConstants, Random
+# Read in the Electron Storage Ring of the Electron-Ion Collider
+include("../../test/lattices/esr.jl") # Beamline symbol is "ring"
+# Currently only Linear tracking is supported, enable it for each element
+foreach(t -> t.tracking_method = Linear(), ring.line)
+n_particles = parse(Int, ARGS[1])
+
+MPI.Init()
+comm = MPI.COMM_WORLD
+rank = MPI.Comm_rank(comm)
+comm_size = MPI.Comm_size(comm)
+root = 0
+
+if rank == 0
+	start_time = time()
+end
+
+# block distribution
+block_size, block_remainder = divrem(n_particles, comm_size)
+
+if rank <= block_remainder
+	block_size = block_size + 1
+	offset = rank * block_size
+else
+	offset = block_remainder * (block_size + 1) + (rank - block_remainder) * block_size
+end
+
+rank_indices = offset+1 : offset+block_size
+block_size = Int(rank_indices[end] - rank_indices[1])
+
+# collect number of particles on each rank
+counts = MPI.Gather(block_size, root, comm)
+if rank == root
+    counts = counts .* 6 # for 6 phase space elements
+end
+
+Random.seed!(rank)
+bunch = Bunch(block_size)
+# Track the bunch through the ESR
+track!(bunch, ring)
+# Also can track! individual elements
+# track!(bunch, ring; outer_particle_loop=true)
+
+"""
+Test before flatten and communication - matches Tracking_examples.jl when comm_size == 1 and n_particles == 100.
+"""
+# plot(
+# 	scatter(bunch.v[:,1], bunch.v[:,2], label ="", xlabel = L"x", ylabel = L"p_x", markersize = 1),
+# 	scatter(bunch.v[:,3], bunch.v[:,4], label ="", xlabel = L"y", ylabel = L"p_y", markersize = 1),
+# 	scatter(bunch.v[:,5], bunch.v[:,6], label ="", xlabel = L"z", ylabel = L"p_z",
+# 	markersize = 1),
+# 	layout=(1,3), size=(600,300)
+# )
+# savefig("mpi_example_plot_no_flatten.png")
+
+# vectorize bunch states for communication
+flattened_v = Vector{Float64}(vec(transpose(bunch.v)))
+
+# allocate buffer
+if rank == root
+	result_data = zeros(sum(counts))
+	recv_buffer = VBuffer(result_data, counts)
+else
+	recv_buffer = VBuffer(nothing)
+end
+
+# collect data from ranks
+MPI.Gatherv!(flattened_v, recv_buffer, 0, comm)
+end_time = time()
+
+# exit non-root ranks
+if rank != root
+	exit(0)
+end
+MPI.Finalize()
+
+elapsed_time = end_time - start_time
+
+println("Run time: $elapsed_time seconds")
+
+exit(0)
+
diff --git a/examples/mpi/scaling_plots.jl b/examples/mpi/scaling_plots.jl
new file mode 100644
index 00000000..6acb19cb
--- /dev/null
+++ b/examples/mpi/scaling_plots.jl
@@ -0,0 +1,34 @@
+using Plots
+
+threads = [32, 64, 128, 256]
+
+# n2_100k = [3.1540510654449463, 2.9544689655303955, 3.976494789123535, 4.019366979598999]
+# n4_100k = [2.9284520149230957, 2.8642098903656006, 3.396040201187134, 3.907201051712036]
+# n8_100k = [2.8360190391540527, 2.814426898956299, 3.7818939685821533, 3.8674139976501465]
+# n16_100k = [2.877640962600708, 2.836548089981079, 3.3386030197143555, 4.1602020263671875]
+# n32_100k = [2.9536869525909424, 3.1786229610443115, 3.973673105239868, 3.831165075302124]
+
+# # undef was 58.003605127334595
+# n2_200k = [NaN, 3.096147060394287, 3.6735999584198, 4.08241605758667]
+# n4_200k = [3.224229097366333, 2.9790329933166504, 3.459972858428955, 3.8737339973449707]
+# n8_200k = [2.913079023361206, 2.8580141067504883, 3.3868350982666016, 3.807229995727539]
+# n16_200k = [2.9674999713897705, 2.8917369842529297, 3.900191068649292, 3.9239768981933594]
+# n32_200k = [2.922312021255493, 2.9081051349639893, 3.365617036819458, 3.7916769981384277]
+
+n2_100k_local_work = [2.2434370517730713, 2.2717459201812744, 2.394063949584961, 2.376012086868286]
+n4_100k_local_work = [2.544693946838379, 2.551508903503418, 2.7010128498077393, 2.570918083190918]
+n8_100k_local_work = [3.8339920043945312, 3.8162360191345215, 4.066386938095093, 4.288684129714966]
+
+n2_100k_work_array = [2.3870041370391846, 2.432455062866211, 2.3610990047454834, 2.398801803588867]
+n4_100k_work_array = [2.5696849822998047, 2.6017680168151855, 2.8834068775177, 2.7176730632781982]
+n8_100k_work_array = [3.9740920066833496, 4.187608957290649, 4.045736074447632, 4.10685396194458]
+
+p1 = plot(threads, n2_100k_local_work / 2, label="n=2", xlabel="Threads", ylabel="Time per node (s)", title="Local Work Variable, 100k Particles", legend=:topleft)
+plot!(threads, n4_100k_local_work / 4, label="n=4")
+plot!(threads, n8_100k_local_work / 8, label="n=8")
+
+p2 = plot(threads, n2_100k_work_array / 2, label="n=2", xlabel="Threads", ylabel="Time per node (s)", title="Old Work Array, 100k Particles", legend=:topleft)
+plot!(threads, n4_100k_work_array / 4, label="n=4")
+plot!(threads, n8_100k_work_array / 8, label="n=8")
+
+plot(p1, p2, layout=(1,2), size=(1000, 500), margin=10Plots.mm)
\ No newline at end of file
diff --git a/examples/notebooks/Tracking_examples.jl b/examples/notebooks/Tracking_examples.jl
index 6d48f744..31c130a4 100644
--- a/examples/notebooks/Tracking_examples.jl
+++ b/examples/notebooks/Tracking_examples.jl
@@ -5,7 +5,7 @@ using Markdown
 using InteractiveUtils
 
 # ╔═╡ e4c10317-3276-4a7a-a270-06c03ab6edd5
-using BeamTracking, Beamlines, BenchmarkTools, Plots, LaTeXStrings, Unitful, PhysicalConstants
+using BeamTracking, Beamlines, BenchmarkTools, Plots, LaTeXStrings, Unitful, PhysicalConstants, Random
 
 # ╔═╡ 1c785b23-60c1-4f22-9056-b9896082b52c
 begin
@@ -53,6 +53,8 @@ md"""Contruct a particle bunch and track it trough ESR"""
 # ╔═╡ 6a084ffd-4f22-4df9-b81f-e347522c0779
 begin
 # Construct a bunch:
+Random.seed!(357357)
+	
 N_particle = 100
 b0 = Bunch(N_particle)
 
@@ -87,12 +89,19 @@ plot(
 md"""For performance and storage optimization, espessialy on GPU's it is recommended to use a bits representation of a lattice. Ignore a warning message for now, it simply states that there is a way make things even faster."""
 
 # ╔═╡ cca8aa2f-fbfe-4189-be63-3a53ed0824c9
+# ╠═╡ disabled = true
+#=╠═╡
 begin
 	bitsring = BitsBeamline(ring)
 	track!(b0, bitsring)
 end
+  ╠═╡ =#
+
+# ╔═╡ ec3a3159-ca83-4773-b503-9b31032fdfff
+md"""Another usefull package is GTPSA.jl. Let's use it to create a map"""
 
 # ╔═╡ cdd63014-8bd7-4bc8-b000-9ba54bd97f11
+#=╠═╡
 begin
 	using GTPSA
 	# GTPSA map:
@@ -103,9 +112,7 @@ begin
 	track!(b0_tpsa, ring)
 	track!(b0_tpsa, bitsring)
 end
-
-# ╔═╡ ec3a3159-ca83-4773-b503-9b31032fdfff
-md"""Another usefull package is GTPSA.jl. Let's use it to create a map"""
+  ╠═╡ =#
 
 # ╔═╡ 00000000-0000-0000-0000-000000000001
 PLUTO_PROJECT_TOML_CONTENTS = """
@@ -117,6 +124,7 @@ GTPSA = "b27dd330-f138-47c5-815b-40db9dd9b6e8"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 PhysicalConstants = "5ad8b20f-a522-5ce9-bfc9-ddf1d5bda6ab"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
 
 [compat]
@@ -136,7 +144,7 @@ PLUTO_MANIFEST_TOML_CONTENTS = """
 
 julia_version = "1.11.5"
 manifest_format = "2.0"
-project_hash = "559f772a926270691d018758cb7e491b6f0634da"
+project_hash = "b2ad30206e9c93c2533c3b14ff91d8ff6386ea74"
 
 [[deps.Accessors]]
 deps = ["CompositionsBase", "ConstructionBase", "Dates", "InverseFunctions", "MacroTools"]
diff --git a/test/scaling_test.sh b/test/scaling_test.sh
new file mode 100644
index 00000000..d97cbd21
--- /dev/null
+++ b/test/scaling_test.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Loop over process counts (-n) and thread counts (-t)
+for n in 2 4 8; do
+    for t in 32 64 128 256; do
+        echo "Running with -n $n and -t $t"
+        # srun -n $n --cpu-bind=cores julia -t $t mpi_example_no_plot.jl 100000
+        mpiexecjl -n $n julia -t $t examples/mpi/mpi_example.jl 100000
+    done
+done
\ No newline at end of file