diff --git a/.github/actions/spelling/allow/terms.txt b/.github/actions/spelling/allow/terms.txt index c0c1562b..042ca7e1 100644 --- a/.github/actions/spelling/allow/terms.txt +++ b/.github/actions/spelling/allow/terms.txt @@ -48,14 +48,17 @@ backpropagation biodynamo bioinformatics blogs +chrono cms codegen consteval +cout cplusplus cppyy cytokine cytokines doxygen +endl gitlab gpu gridlay @@ -70,6 +73,7 @@ llm llvm meetinglist microenvironments +nomarkdown omp openmp oop @@ -82,6 +86,7 @@ rntuple samtools samtoramntuple sbo +setprecision sitemap softsusy superbuilds @@ -129,6 +134,7 @@ MAMODE meetup metaprogramming Miapb +milli multilanguage omnidisciplinary optimisation diff --git a/_posts/2026-03-01-Creating teaching materials for C++ and CUDA with xeus-cpp.md b/_posts/2026-03-01-Creating teaching materials for C++ and CUDA with xeus-cpp.md index a9bb6cba..f96b8a7d 100644 --- a/_posts/2026-03-01-Creating teaching materials for C++ and CUDA with xeus-cpp.md +++ b/_posts/2026-03-01-Creating teaching materials for C++ and CUDA with xeus-cpp.md @@ -6,7 +6,8 @@ sitemap: false author: Hristiyan Shterev permalink: blogs/xeus_cpp_Hristiyan_Shterev_blog/ date: 2026-03-01 -tags: xeus-cpp cuda jupyter c++ xeus +tags: xeus-cpp cuda jupyter c++ xeus internship high-school systems-programming +custom_css: jupyter --- {% include dual-banner.html @@ -56,11 +57,140 @@ More specific goals include: ## Example -**CPU - std::sort vs GPU - Merge sort speed test** - -The example below shows a C++ benchmark comparing the performance of sorting a large array on a CPU versus a GPU. It provides a clear visual of how parallel processing can drastically outperform traditional sequential execution for data-heavy tasks. - - +{::nomarkdown} + +
+
+
+
+
+
+

CPU - std::sort vs GPU - Merge sort speed test

+

+ The example below shows a C++ benchmark comparing the performance of sorting a large array on a CPU versus a GPU. It provides a clear visual of how parallel processing can drastically outperform traditional sequential execution for data-heavy tasks. +

+ +

+ In the first cell we create the unsorted data that is going to be sorted by the CPU and GPU. We have loaded a compiled CUDA .so file beforehand. +

+
+
+
+ +
+
+
In [1]:
+
+
+
+
+unsigned int N_bench = 1048576;
+std::vector<unsigned int> data_cpu(N_bench);
+std::vector<unsigned int> data_gpu(N_bench);
+
+for (unsigned int i = 0; i < N_bench; i++) {
+    unsigned int val = N_bench - i;
+    data_cpu[i] = val;
+    data_gpu[i] = val;
+}
+
+
+
+
+
+
+
+
+
+
+

CPU and GPU sorting

+

+ Next we use std::sort and merge_sort_gpu_full function form the loaded CUDA code and measure the time the CPU and GPU sorts the data. +

+
+
+
+
+
+
In [2]:
+
+
+
+
+auto start_cpu = std::chrono::high_resolution_clock::now();
+std::sort(data_cpu.begin(), data_cpu.end());
+auto end_cpu = std::chrono::high_resolution_clock::now();
+
+std::chrono::duration<double, std::milli> cpu_ms = end_cpu - start_cpu;
+
+auto start_gpu = std::chrono::high_resolution_clock::now();
+merge_sort_gpu_full(data_gpu.data(), N_bench);
+auto end_gpu = std::chrono::high_resolution_clock::now();
+
+std::chrono::duration<double, std::milli> gpu_ms = end_gpu - start_gpu;
+
+
+
+
+
+
+
+
+
+
+

Printing the times and comparing them

+

+ Finally we print both times and compare them to see how much faster parallel processing is. +

+
+
+
+
+
+
In [3]:
+
+
+
+
+double speedup = cpu_ms.count() / gpu_ms.count();
+
+std::cout << "CPU (std::sort) took: " << std::fixed << std::setprecision(4) << cpu_ms.count() << " ms" << std::endl;
+std::cout << "GPU (Merge Sort) took: " << gpu_ms.count() << " ms" << std::endl;
+
+std::cout << std::endl;
+
+std::cout << "GPU Speedup: " << speedup << " times faster than CPU" << std::endl;
+
+
+
+
+
+
+
+
+
+
Out[3]:
+
+
+CPU (std::sort) took: 145.3539 ms
+GPU (Merge Sort) took: 9.6199 ms
+
+GPU Speedup: 15.1097 times faster than CPU
+
+
+
+
+
+
+
+ +


+ +{:/} ## Related links diff --git a/images/blog/MergeSortTest.png b/images/blog/MergeSortTest.png deleted file mode 100644 index 9b06a036..00000000 Binary files a/images/blog/MergeSortTest.png and /dev/null differ