3535
3636#include < atomic>
3737#include < ctime>
38+ #include < format>
39+ #include < iostream>
3840#include < string>
3941
4042#ifndef _WIN32
@@ -373,3 +375,107 @@ void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, cons
373375 WriteToConstantMemory ((char *)&processors ()->param .occupancyMap - (char *)processors (), &tmp, sizeof (tmp), stream, ev);
374376 }
375377}
378+
379+ GPUReconstructionCPU::debugWriter::debugWriter (std::string filenameCSV, bool markdown, uint32_t statNEvents) : mMarkdown{markdown}, mStatNEvents {statNEvents}
380+ {
381+ if (!filenameCSV.empty ()) {
382+ streamCSV.open (filenameCSV, std::ios::out | std::ios::app);
383+ }
384+ }
385+
386+ void GPUReconstructionCPU::debugWriter::header ()
387+ {
388+ if (streamCSV.is_open () && !streamCSV.tellp ()) {
389+ streamCSV << " type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
390+ }
391+
392+ if (mMarkdown ) {
393+ std::cout << " | | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n " ;
394+ std::cout << " |---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n " ;
395+ }
396+ }
397+
398+ void GPUReconstructionCPU::debugWriter::row (char type, uint32_t count, std::string name, double gpu_time, double cpu_time, double total_time, std::size_t memSize, std::string nEventReport)
399+ {
400+ double scale = 1000000.0 / mStatNEvents ;
401+
402+ if (streamCSV.is_open ()) {
403+ streamCSV << type << " ," ;
404+ if (count != 0 )
405+ streamCSV << count;
406+ streamCSV << " ," << name << " ," ;
407+ if (gpu_time != -1.0 )
408+ streamCSV << std::format (" {:.0f}" , gpu_time * scale);
409+ streamCSV << " ," ;
410+ if (cpu_time != -1.0 )
411+ streamCSV << std::format (" {:.0f}" , cpu_time * scale);
412+ streamCSV << " ," ;
413+ if (cpu_time != -1.0 && total_time != -1.0 )
414+ streamCSV << std::format (" {:.2f}" , cpu_time / total_time);
415+ streamCSV << " ," ;
416+ if (total_time != -1.0 )
417+ streamCSV << std::format (" {:.0f}" , total_time * scale);
418+ streamCSV << " ," ;
419+ if (memSize != 0 && count != 0 )
420+ streamCSV << std::format (" {:.3f},{},{}" , memSize / gpu_time * 1e-9 , memSize / mStatNEvents , memSize / mStatNEvents / count);
421+ else
422+ streamCSV << " ,," ;
423+ streamCSV << std::endl;
424+ }
425+
426+ if (mMarkdown ) {
427+ std::cout << " | " << type << " | " ;
428+ if (count != 0 )
429+ std::cout << std::format (" {:6} |" , count);
430+ else
431+ std::cout << " |" ;
432+ std::cout << std::format (" {:42}|" , name);
433+ if (gpu_time != -1.0 )
434+ std::cout << std::format (" {:10.0f} |" , gpu_time * scale);
435+ else
436+ std::cout << " |" ;
437+ if (cpu_time != -1.0 )
438+ std::cout << std::format (" {:10.0f} |" , cpu_time * scale);
439+ else
440+ std::cout << " |" ;
441+ if (cpu_time != -1.0 && total_time != -1.0 )
442+ std::cout << std::format (" {:8.2f} |" , cpu_time / total_time);
443+ else
444+ std::cout << " |" ;
445+ if (total_time != -1.0 )
446+ std::cout << std::format (" {:10.0f} |" , total_time * scale);
447+ else
448+ std::cout << " |" ;
449+ if (memSize != 0 && count != 0 )
450+ std::cout << std::format (" {:10.3f} |{:14} |{:14} |" , memSize / gpu_time * 1e-9 , memSize / mStatNEvents , memSize / mStatNEvents / count);
451+ else
452+ std::cout << " | | |" ;
453+ std::cout << std::endl;
454+ } else {
455+ if (name.substr (0 , 3 ) == " GPU" ) {
456+ char bandwidth[256 ] = " " ;
457+ if (memSize && mStatNEvents && gpu_time != 0.0 ) {
458+ snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , memSize / gpu_time * 1e-9 , memSize / mStatNEvents , memSize / mStatNEvents / count);
459+ }
460+ printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type, count, name.c_str (), gpu_time * scale, bandwidth);
461+ } else if (name.substr (0 , 3 ) == " TPC" ) {
462+ std::size_t n = name.find (' (' );
463+ std::string basename = name.substr (0 , n - 1 );
464+ std::string postfix = name.substr (n + 1 , name.size () - n - 2 );
465+ if (total_time != -1.0 ) {
466+ printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , postfix.c_str (),
467+ basename.c_str (), gpu_time * scale, " " , total_time * scale, cpu_time * scale, cpu_time / total_time);
468+ } else {
469+ printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , count, postfix.c_str (), basename.c_str (), gpu_time * scale,
470+ memSize / gpu_time * 1e-9 , memSize / mStatNEvents , memSize / mStatNEvents / count);
471+ }
472+ } else if (name == " Prepare" ) {
473+ printf (" Execution Time: General Step : %50s Time: %'10.0f us\n " , name.c_str (), gpu_time * scale);
474+ } else if (name == " Wall" ) {
475+ if (gpu_time != -1.0 ) {
476+ printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , gpu_time * scale, nEventReport.c_str ());
477+ }
478+ printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , total_time * scale, cpu_time * scale, cpu_time / total_time, nEventReport.c_str ());
479+ }
480+ }
481+ }
0 commit comments