Files
c4core/bm/bm_atox.cpp
Joao Paulo Magalhaes 280a73e39e [bm] add benchmark to compare read_dec() approaches:
set -xe ; cd ~/proj/rapidyaml/ext/c4core/ ; cmany b -t Release -V C4CORE_DEV=ON c4core-bm-atox-{uint8,int8,uint16,int16,uint32,int32,uint64,int64}
+ cd /home/jpmag/proj/rapidyaml/ext/c4core/
+ cmany b -t Release -V C4CORE_DEV=ON c4core-bm-atox-uint8 c4core-bm-atox-int8 c4core-bm-atox-uint16 c4core-bm-atox-int16 c4core-bm-atox-uint32 c4core-bm-atox-int32 c4core-bm-atox-uint64 c4core-bm-atox-int64
-----------------------------------------------
Build linux-x86_64-gxx11.2-Release
-----------------------------------------------
Entering directory /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release (was in /home/jpmag/proj/rapidyaml/ext/c4core)
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && cmake -C /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/cmany_preload.cmake -G Unix\ Makefiles -DCMAKE_EXPORT_COMPILE_COMMANDS=ON /home/jpmag/proj/rapidyaml/ext/c4core
--------
loading initial cache file /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/cmany_preload.cmake
-- cmany:preload----------------------
-- cmany: C4CORE_DEV=ON
-- cmany: CMAKE_BUILD_TYPE=Release
-- cmany: CMAKE_CXX_COMPILER=/usr/bin/c++
-- cmany: CMAKE_CXX_FLAGS=   -m64
-- cmany: CMAKE_C_COMPILER=/usr/bin/cc
-- cmany: CMAKE_C_FLAGS=   -m64
-- cmany: CMAKE_INSTALL_PREFIX=/home/jpmag/proj/rapidyaml/ext/c4core/install/linux-x86_64-gxx11.2-Release
-- cmany:preload----------------------
-- c4core: using C++ standard: C++20
-- c4core: setting C++ standard: 20
-- c4core: setting C++ standard required: ON
-- c4core: WERROR flags [Compile with warnings as errors]: -Werror;-pedantic-errors
-- c4core: STRICT_ALIASING flags [Enable strict aliasing]: -fstrict-aliasing
-- c4core: PEDANTIC flags [Compile in pedantic mode]: -Wall;-Wextra;-pedantic;-Wshadow;-Wnon-virtual-dtor;-Wcast-align;-Wunused;-Woverloaded-virtual;-Wpedantic;-Wconversion;-Wsign-conversion;-Wdouble-promotion;-Wfloat-equal;-Wformat=2;-Wlogical-op;-Wuseless-cast
-- c4core: testing requires doctest
-- c4core: doctest was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/test/ext/doctest/src"!
-- c4core: enabling benchmarks: to build, c4core-bm-build
-- c4core: enabling benchmarks: to run, c4core-bm-run
-- c4core: googlebenchmark was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/ext/googlebenchmark/src"!
-- LLVM FileCheck Found: /usr/bin/FileCheck
-- git version: v1.5.5 normalized to 1.5.5
-- Version: 1.5.5
-- Performing Test HAVE_STD_REGEX -- success
-- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile
-- Performing Test HAVE_POSIX_REGEX -- success
-- Performing Test HAVE_STEADY_CLOCK -- success
-- c4core: importing subproject fp (REMOTE)... GIT_REPOSITORY;https://github.com/jk-jeon/fp;GIT_TAG;master
-- c4core: fp was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/subprojects/fp/src"!
-- c4core: ryu was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/extern/ryu/src"!
-- c4core: stb was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/extern/stb/src"!
-- c4core: importing subproject fmtlib (REMOTE)... GIT_REPOSITORY;https://github.com/fmtlib/fmt;GIT_TAG;8.1.1
-- c4core: fmtlib was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/subprojects/fmtlib/src"!
-- Module support is disabled.
-- Version: 8.1.1
-- Build type: Release
-- CXX_STANDARD: 20
-- Required features: cxx_variadic_templates
-- c4core: packing the project: TYPE;LIBRARY
-- c4core: cpack tag: v0.1.9-45-g1428977-bmplots
-- Configuring done
-- Generating done
-- Build files have been written to: /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-uint8
--------
Consolidate compiler generated dependencies of target c4core
Consolidate compiler generated dependencies of target benchmark
[ 33%] Built target c4core
[ 83%] Built target benchmark
Consolidate compiler generated dependencies of target c4core-bm-atox
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-uint8: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint8.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<uint8.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint8.json
2022-03-23T00:13:33+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4300.44 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 0.80, 1.05, 0.97
----------------------------------------------------------------------------------------------
Benchmark                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------
unroll_switch_nocheck<uint8_t>         8392973 ns      8386211 ns           81 bytes_per_second=238.487M/s items_per_second=250.071M/s
unroll_switch<uint8_t>                10456104 ns     10448897 ns           67 bytes_per_second=191.408M/s items_per_second=200.706M/s
indexloop_restrictvar0<uint8_t>       12103472 ns     12094155 ns           58 bytes_per_second=165.369M/s items_per_second=173.402M/s
indexloop_restrictvar1<uint8_t>       11771205 ns     11762871 ns           59 bytes_per_second=170.027M/s items_per_second=178.286M/s
range_based_restrictvar0<uint8_t>     11965450 ns     11956624 ns           59 bytes_per_second=167.271M/s items_per_second=175.397M/s
range_based_restrictvar1<uint8_t>     12094350 ns     12084794 ns           58 bytes_per_second=165.497M/s items_per_second=173.536M/s
prefer_likely<uint8_t>                11915937 ns     11907887 ns           59 bytes_per_second=167.956M/s items_per_second=176.115M/s
no_early_return<uint8_t>              11870883 ns     11862498 ns           59 bytes_per_second=168.599M/s items_per_second=176.788M/s
no_early_return_auto_type<uint8_t>    11870134 ns     11861937 ns           59 bytes_per_second=168.607M/s items_per_second=176.797M/s
no_early_return_auto_type2<uint8_t>   11853827 ns     11846306 ns           59 bytes_per_second=168.829M/s items_per_second=177.03M/s
[100%] Built target c4core-bm-atox-uint8
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-int8
--------
[ 28%] Built target c4core
[ 71%] Built target benchmark
[ 85%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-int8: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int8.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<int8.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int8.json
2022-03-23T00:13:42+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4193.38 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 1.07, 1.10, 0.99
---------------------------------------------------------------------------------------------
Benchmark                                   Time             CPU   Iterations UserCounters...
---------------------------------------------------------------------------------------------
unroll_switch_nocheck<int8_t>         7539946 ns      7534275 ns           92 bytes_per_second=265.454M/s items_per_second=278.348M/s
unroll_switch<int8_t>                 8917795 ns      8911041 ns           78 bytes_per_second=224.441M/s items_per_second=235.343M/s
indexloop_restrictvar0<int8_t>        9686761 ns      9679325 ns           73 bytes_per_second=206.626M/s items_per_second=216.663M/s
indexloop_restrictvar1<int8_t>        9614853 ns      9607514 ns           73 bytes_per_second=208.17M/s items_per_second=218.282M/s
range_based_restrictvar0<int8_t>      9539157 ns      9531874 ns           73 bytes_per_second=209.822M/s items_per_second=220.015M/s
range_based_restrictvar1<int8_t>      9698901 ns      9691215 ns           72 bytes_per_second=206.372M/s items_per_second=216.397M/s
prefer_likely<int8_t>                 9569631 ns      9561717 ns           74 bytes_per_second=209.167M/s items_per_second=219.328M/s
no_early_return<int8_t>               9689644 ns      9682294 ns           72 bytes_per_second=206.563M/s items_per_second=216.597M/s
no_early_return_auto_type<int8_t>     9517147 ns      9509993 ns           74 bytes_per_second=210.305M/s items_per_second=220.521M/s
no_early_return_auto_type2<int8_t>    9641973 ns      9634033 ns           73 bytes_per_second=207.597M/s items_per_second=217.682M/s
[100%] Built target c4core-bm-atox-int8
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-uint16
--------
-- c4core: using C++ standard: C++20
-- c4core: setting C++ standard: 20
-- c4core: setting C++ standard required: ON
-- c4core: WERROR flags [Compile with warnings as errors]: -Werror;-pedantic-errors
-- c4core: STRICT_ALIASING flags [Enable strict aliasing]: -fstrict-aliasing
-- c4core: PEDANTIC flags [Compile in pedantic mode]: -Wall;-Wextra;-pedantic;-Wshadow;-Wnon-virtual-dtor;-Wcast-align;-Wunused;-Woverloaded-virtual;-Wpedantic;-Wconversion;-Wsign-conversion;-Wdouble-promotion;-Wfloat-equal;-Wformat=2;-Wlogical-op;-Wuseless-cast
-- c4core: testing requires doctest
-- c4core: doctest was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/test/ext/doctest/src"!
-- c4core: enabling benchmarks: to build, c4core-bm-build
-- c4core: enabling benchmarks: to run, c4core-bm-run
-- c4core: googlebenchmark was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/ext/googlebenchmark/src"!
-- LLVM FileCheck Found: /usr/bin/FileCheck
-- git version: v1.5.5 normalized to 1.5.5
-- Version: 1.5.5
-- Performing Test HAVE_STD_REGEX -- success
-- Performing Test HAVE_GNU_POSIX_REGEX -- failed to compile
-- Performing Test HAVE_POSIX_REGEX -- success
-- Performing Test HAVE_STEADY_CLOCK -- success
-- c4core: importing subproject fp (REMOTE)... GIT_REPOSITORY;https://github.com/jk-jeon/fp;GIT_TAG;master
-- c4core: fp was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/subprojects/fp/src"!
-- c4core: ryu was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/extern/ryu/src"!
-- c4core: stb was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/extern/stb/src"!
-- c4core: importing subproject fmtlib (REMOTE)... GIT_REPOSITORY;https://github.com/fmtlib/fmt;GIT_TAG;8.1.1
-- c4core: fmtlib was previously imported into this project - found at "/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/subprojects/fmtlib/src"!
-- Module support is disabled.
-- Version: 8.1.1
-- Build type: Release
-- CXX_STANDARD: 20
-- Required features: cxx_variadic_templates
-- c4core: packing the project: TYPE;LIBRARY
-- c4core: cpack tag: v0.1.9-45-g1428977-bmplots
-- Configuring done
-- Generating done
-- Build files have been written to: /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release
Consolidate compiler generated dependencies of target c4core
Consolidate compiler generated dependencies of target benchmark
[ 33%] Built target c4core
[ 83%] Built target benchmark
Consolidate compiler generated dependencies of target c4core-bm-atox
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-uint16: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint16.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<uint16.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint16.json
2022-03-23T00:13:50+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4394.25 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 1.15, 1.11, 1.00
-----------------------------------------------------------------------------------------------
Benchmark                                     Time             CPU   Iterations UserCounters...
-----------------------------------------------------------------------------------------------
unroll_switch_nocheck<uint16_t>         9436192 ns      9428794 ns           74 bytes_per_second=424.232M/s items_per_second=222.42M/s
unroll_switch<uint16_t>                10037432 ns     10028993 ns           71 bytes_per_second=398.844M/s items_per_second=209.109M/s
indexloop_restrictvar0<uint16_t>        9943620 ns      9935983 ns           70 bytes_per_second=402.577M/s items_per_second=211.066M/s
indexloop_restrictvar1<uint16_t>        9991532 ns      9983416 ns           70 bytes_per_second=400.664M/s items_per_second=210.064M/s
range_based_restrictvar0<uint16_t>     11058040 ns     11049474 ns           63 bytes_per_second=362.008M/s items_per_second=189.797M/s
range_based_restrictvar1<uint16_t>     11150938 ns     11142170 ns           63 bytes_per_second=358.997M/s items_per_second=188.218M/s
prefer_likely<uint16_t>                11245604 ns     11236812 ns           62 bytes_per_second=355.973M/s items_per_second=186.632M/s
no_early_return<uint16_t>              11055278 ns     11046419 ns           63 bytes_per_second=362.108M/s items_per_second=189.849M/s
no_early_return_auto_type<uint16_t>    11060052 ns     11051620 ns           63 bytes_per_second=361.938M/s items_per_second=189.76M/s
no_early_return_auto_type2<uint16_t>   11079854 ns     11071958 ns           63 bytes_per_second=361.273M/s items_per_second=189.411M/s
[100%] Built target c4core-bm-atox-uint16
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-int16
--------
[ 33%] Built target c4core
[ 83%] Built target benchmark
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-int16: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int16.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<int16.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int16.json
2022-03-23T00:13:59+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4329.79 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 1.28, 1.14, 1.01
----------------------------------------------------------------------------------------------
Benchmark                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------
unroll_switch_nocheck<int16_t>         9383226 ns      9376817 ns           74 bytes_per_second=426.584M/s items_per_second=223.653M/s
unroll_switch<int16_t>                13055510 ns     13046631 ns           54 bytes_per_second=306.593M/s items_per_second=160.743M/s
indexloop_restrictvar0<int16_t>       13400903 ns     13391308 ns           52 bytes_per_second=298.701M/s items_per_second=156.605M/s
indexloop_restrictvar1<int16_t>       13341958 ns     13332213 ns           52 bytes_per_second=300.025M/s items_per_second=157.3M/s
range_based_restrictvar0<int16_t>     13257294 ns     13248344 ns           52 bytes_per_second=301.925M/s items_per_second=158.295M/s
range_based_restrictvar1<int16_t>     13117857 ns     13108410 ns           53 bytes_per_second=305.148M/s items_per_second=159.985M/s
prefer_likely<int16_t>                13308193 ns     13298247 ns           53 bytes_per_second=300.792M/s items_per_second=157.701M/s
no_early_return<int16_t>              13257837 ns     13247375 ns           53 bytes_per_second=301.947M/s items_per_second=158.307M/s
no_early_return_auto_type<int16_t>    13193600 ns     13183894 ns           46 bytes_per_second=303.4M/s items_per_second=159.069M/s
no_early_return_auto_type2<int16_t>   14056139 ns     14044784 ns           51 bytes_per_second=284.803M/s items_per_second=149.319M/s
[100%] Built target c4core-bm-atox-int16
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-uint32
--------
[ 33%] Built target c4core
[ 83%] Built target benchmark
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-uint32: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint32.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<uint32.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint32.json
2022-03-23T00:14:07+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4277.39 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 1.54, 1.21, 1.03
-----------------------------------------------------------------------------------------------
Benchmark                                     Time             CPU   Iterations UserCounters...
-----------------------------------------------------------------------------------------------
unroll_switch_nocheck<uint32_t>        11386586 ns     11376915 ns           62 bytes_per_second=703.178M/s items_per_second=184.334M/s
unroll_switch<uint32_t>                16897262 ns     16883326 ns           41 bytes_per_second=473.84M/s items_per_second=124.214M/s
indexloop_restrictvar0<uint32_t>       15914146 ns     15900501 ns           43 bytes_per_second=503.129M/s items_per_second=131.892M/s
indexloop_restrictvar1<uint32_t>       15952181 ns     15938034 ns           45 bytes_per_second=501.944M/s items_per_second=131.582M/s
range_based_restrictvar0<uint32_t>     15962152 ns     15949119 ns           44 bytes_per_second=501.595M/s items_per_second=131.49M/s
range_based_restrictvar1<uint32_t>     16065944 ns     16054218 ns           43 bytes_per_second=498.311M/s items_per_second=130.629M/s
prefer_likely<uint32_t>                16037178 ns     16024634 ns           44 bytes_per_second=499.231M/s items_per_second=130.871M/s
no_early_return<uint32_t>              16088801 ns     16075600 ns           44 bytes_per_second=497.649M/s items_per_second=130.456M/s
no_early_return_auto_type<uint32_t>    16136587 ns     16122695 ns           43 bytes_per_second=496.195M/s items_per_second=130.075M/s
no_early_return_auto_type2<uint32_t>   16222267 ns     16207546 ns           43 bytes_per_second=493.597M/s items_per_second=129.394M/s
[100%] Built target c4core-bm-atox-uint32
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-int32
--------
[ 33%] Built target c4core
[ 83%] Built target benchmark
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-int32: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int32.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<int32.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int32.json
2022-03-23T00:14:16+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4296.01 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 1.49, 1.20, 1.03
----------------------------------------------------------------------------------------------
Benchmark                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------
unroll_switch_nocheck<int32_t>        14466587 ns     14455635 ns           48 bytes_per_second=553.417M/s items_per_second=145.075M/s
unroll_switch<int32_t>                20435481 ns     20422278 ns           34 bytes_per_second=391.729M/s items_per_second=102.689M/s
indexloop_restrictvar0<int32_t>       19313371 ns     19301110 ns           36 bytes_per_second=414.484M/s items_per_second=108.654M/s
indexloop_restrictvar1<int32_t>       18441923 ns     18430178 ns           38 bytes_per_second=434.071M/s items_per_second=113.789M/s
range_based_restrictvar0<int32_t>     18427086 ns     18414663 ns           38 bytes_per_second=434.436M/s items_per_second=113.885M/s
range_based_restrictvar1<int32_t>     18443435 ns     18431535 ns           38 bytes_per_second=434.039M/s items_per_second=113.781M/s
prefer_likely<int32_t>                18406958 ns     18395266 ns           37 bytes_per_second=434.895M/s items_per_second=114.005M/s
no_early_return<int32_t>              18948078 ns     18930050 ns           37 bytes_per_second=422.609M/s items_per_second=110.784M/s
no_early_return_auto_type<int32_t>    18603581 ns     18590095 ns           38 bytes_per_second=430.337M/s items_per_second=112.81M/s
no_early_return_auto_type2<int32_t>   18607525 ns     18594998 ns           38 bytes_per_second=430.223M/s items_per_second=112.78M/s
[100%] Built target c4core-bm-atox-int32
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-uint64
--------
[ 33%] Built target c4core
[ 83%] Built target benchmark
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-uint64: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint64.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<uint64.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-uint64.json
2022-03-23T00:14:25+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4342 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 1.42, 1.20, 1.03
-----------------------------------------------------------------------------------------------
Benchmark                                     Time             CPU   Iterations UserCounters...
-----------------------------------------------------------------------------------------------
unroll_switch_nocheck<uint64_t>        13176986 ns     13167678 ns           53 bytes_per_second=1.18662G/s items_per_second=159.265M/s
unroll_switch<uint64_t>                18494164 ns     18482311 ns           38 bytes_per_second=865.693M/s items_per_second=113.468M/s
indexloop_restrictvar0<uint64_t>       17573641 ns     17560898 ns           40 bytes_per_second=911.115M/s items_per_second=119.422M/s
indexloop_restrictvar1<uint64_t>       17270240 ns     17259526 ns           41 bytes_per_second=927.024M/s items_per_second=121.507M/s
range_based_restrictvar0<uint64_t>     17199046 ns     17188009 ns           41 bytes_per_second=930.882M/s items_per_second=122.013M/s
range_based_restrictvar1<uint64_t>     17075886 ns     17065445 ns           40 bytes_per_second=937.567M/s items_per_second=122.889M/s
prefer_likely<uint64_t>                17268703 ns     17257423 ns           41 bytes_per_second=927.137M/s items_per_second=121.522M/s
no_early_return<uint64_t>              17406670 ns     17393290 ns           40 bytes_per_second=919.895M/s items_per_second=120.572M/s
no_early_return_auto_type<uint64_t>    16553888 ns     16543145 ns           42 bytes_per_second=967.168M/s items_per_second=126.769M/s
no_early_return_auto_type2<uint64_t>   17869719 ns     17857090 ns           39 bytes_per_second=896.003M/s items_per_second=117.441M/s
[100%] Built target c4core-bm-atox-uint64
--------
$ cd /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release && make -j 20 c4core-bm-atox-int64
--------
[ 33%] Built target c4core
[ 83%] Built target benchmark
[100%] Built target c4core-bm-atox
[100%] c4core: running benchmark c4core-bm-atox, case c4core-bm-atox-int64: saving results in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int64.json
/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/c4core-bm-atox-0.1.9 --benchmark_filter=^.*<int64.*;--benchmark_out_format=json;--benchmark_out=/home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release/bm/bm-results/c4core-bm-atox-int64.json
2022-03-23T00:14:34+00:00
Running ./c4core-bm-atox-0.1.9
Run on (20 X 4374.04 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x10)
  L1 Instruction 32 KiB (x10)
  L2 Unified 1024 KiB (x10)
  L3 Unified 19712 KiB (x1)
Load Average: 2.79, 1.49, 1.12
----------------------------------------------------------------------------------------------
Benchmark                                    Time             CPU   Iterations UserCounters...
----------------------------------------------------------------------------------------------
unroll_switch_nocheck<int64_t>        12815077 ns     12804375 ns           55 bytes_per_second=1.22029G/s items_per_second=163.784M/s
unroll_switch<int64_t>                18745584 ns     18730907 ns           37 bytes_per_second=854.203M/s items_per_second=111.962M/s
indexloop_restrictvar0<int64_t>       17557627 ns     17544930 ns           40 bytes_per_second=911.944M/s items_per_second=119.53M/s
indexloop_restrictvar1<int64_t>       17155850 ns     17142421 ns           41 bytes_per_second=933.357M/s items_per_second=122.337M/s
range_based_restrictvar0<int64_t>     17599406 ns     17586218 ns           40 bytes_per_second=909.803M/s items_per_second=119.25M/s
range_based_restrictvar1<int64_t>     16844522 ns     16831810 ns           41 bytes_per_second=950.581M/s items_per_second=124.595M/s
prefer_likely<int64_t>                17272667 ns     17259154 ns           40 bytes_per_second=927.044M/s items_per_second=121.51M/s
no_early_return<int64_t>              17465586 ns     17452133 ns           40 bytes_per_second=916.793M/s items_per_second=120.166M/s
no_early_return_auto_type<int64_t>    17617614 ns     17603858 ns           39 bytes_per_second=908.892M/s items_per_second=119.13M/s
no_early_return_auto_type2<int64_t>   17192851 ns     17179817 ns           41 bytes_per_second=931.325M/s items_per_second=122.071M/s
[100%] Built target c4core-bm-atox-int64
Returning to directory /home/jpmag/proj/rapidyaml/ext/c4core (currently in /home/jpmag/proj/rapidyaml/ext/c4core/build/linux-x86_64-gxx11.2-Release)
Build: finished building (1m 10s): linux-x86_64-gxx11.2-Release
-----------------------------------------------
2022-04-23 10:46:40 +01:00

479 lines
15 KiB
C++

#include "./bm_charconv.hpp"
// this is an exploratory benchmark to compare the possible
// combinations for all the components of the read_dec() algorithm
template<class T>
bool range_based_restrictvar0(c4::csubstr s, T * v)
{
*v = 0;
for(char c : s)
{
if(C4_UNLIKELY(c < '0' || c > '9'))
return false;
*v = (*v) * T(10) + (T(c) - T('0'));
}
return true;
}
template<class T>
bool range_based_restrictvar1(c4::csubstr s, T *C4_RESTRICT v)
{
*v = 0;
for(char c : s)
{
if(C4_UNLIKELY(c < '0' || c > '9'))
return false;
*v = (*v) * T(10) + (T(c) - T('0'));
}
return true;
}
template<class T>
bool indexloop_restrictvar0(c4::csubstr s, T * v)
{
*v = 0;
for(size_t i = 0; i < s.len; ++i)
{
const char c = s.str[i];
if(C4_UNLIKELY(c < '0' || c > '9'))
return false;
*v = (*v) * T(10) + (T(c) - T('0'));
}
return true;
}
template<class T>
bool indexloop_restrictvar1(c4::csubstr s, T *C4_RESTRICT v)
{
*v = 0;
for(size_t i = 0; i < s.len; ++i)
{
const char c = s.str[i];
if(C4_UNLIKELY(c < '0' || c > '9'))
return false;
*v = (*v) * T(10) + (T(c) - T('0'));
}
return true;
}
template<class T>
bool prefer_likely(c4::csubstr s, T * v)
{
*v = 0;
for(char c : s)
{
if(C4_LIKELY(c >= '0' && c <= '9'))
*v = (*v) * T(10) + (T(c) - T('0'));
else
return false;
}
return true;
}
template<class T>
bool no_early_return(c4::csubstr s, T *C4_RESTRICT v)
{
*v = 0;
bool stat = true;
for(char c : s)
{
if(C4_LIKELY(c >= '0' && c <= '9'))
*v = (*v) * T(10) + (T(c) - T('0'));
else
{
stat = false;
break;
}
}
return stat;
}
template<class T>
bool no_early_return_auto_type(c4::csubstr s, T *C4_RESTRICT v)
{
*v = 0;
bool stat = true;
for(char c : s)
{
if(C4_LIKELY(c >= '0' && c <= '9'))
*v = (*v) * T(10) + (T)(c - '0');
else
{
stat = false;
break;
}
}
return stat;
}
template<class T>
bool no_early_return_auto_type2(c4::csubstr s, T *C4_RESTRICT v)
{
*v = 0;
bool stat = true;
for(char c : s)
{
if(C4_LIKELY(c >= '0' && c <= '9'))
{
*v *= 10;
*v += (T)(c - '0');
}
else
{
stat = false;
break;
}
}
return stat;
}
#define _(i) (T)(s.str[i] - '0')
C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wimplicit-fallthrough")
template<class T>
C4_ALWAYS_INLINE auto unroll_switch_nocheck(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 1, bool>::type
{
*v = 0;
switch(s.len)
{
case 1:
*v = _(0);
return true;
case 2:
*v = T(10) * _(0) + _(1);
return true;
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return true;
}
return false;
}
template<class T>
C4_ALWAYS_INLINE auto unroll_switch_nocheck(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 2, bool>::type
{
*v = 0;
switch(s.len)
{
case 1:
*v = _(0);
return true;
case 2:
*v = T(10) * _(0) + _(1);
return true;
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return true;
case 4:
*v = T(1000) * _(0) + T(100) * _(1) + T(10) * _(2) + _(3);
return true;
case 5:
*v = T(10000) * _(0) + T(1000) * _(1) + T(100) * _(2) + T(10) * _(3) + _(4);
return true;
}
return false;
}
template<class T>
C4_ALWAYS_INLINE auto unroll_switch_nocheck(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 4, bool>::type
{
switch(s.len)
{
case 1:
*v = _(0);
return true;
case 2:
*v = T(10) * _(0) + _(1);
return true;
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return true;
case 4:
*v = T(1000) * _(0) + T(100) * _(1) + T(10) * _(2) + _(3);
return true;
case 5:
*v = T(10000) * _(0) + T(1000) * _(1) + T(100) * _(2) + T(10) * _(3) + _(4);
return true;
case 6:
*v = T(100000) * _(0) + T(10000) * _(1) + T(1000) * _(2) + T(100) * _(3) + T(10) * _(4) + _(5);
return true;
case 7:
*v = T(1000000) * _(0) + T(100000) * _(1) + T(10000) * _(2) + T(1000) * _(3) + T(100) * _(4) + T(10) * _(5) + _(6);
return true;
case 8:
*v = T(10000000) * _(0) + T(1000000) * _(1) + T(100000) * _(2) + T(10000) * _(3) + T(1000) * _(4) + T(100) * _(5) + T(10) * _(6) + _(7);
return true;
case 9:
*v = T(100000000) * _(0) + T(10000000) * _(1) + T(1000000) * _(2) + T(100000) * _(3) + T(10000) * _(4) + T(1000) * _(5) + T(100) * _(6) + T(10) * _(7) + _(8);
return true;
case 10:
*v = T(1000000000) * _(0) + T(100000000) * _(1) + T(10000000) * _(2) + T(1000000) * _(3) + T(100000) * _(4) + T(10000) * _(5) + T(1000) * _(6) + T(100) * _(7) + T(10) * _(8) + _(9);
return true;
}
return false;
}
template<class T>
C4_ALWAYS_INLINE auto unroll_switch_nocheck(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 8, bool>::type
{
switch(s.len)
{
case 1:
*v = _(0);
return true;
case 2:
*v = T(10) * _(0) + _(1);
return true;
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return true;
case 4:
*v = T(1000) * _(0) + T(100) * _(1) + T(10) * _(2) + _(3);
return true;
case 5:
*v = T(10000) * _(0) + T(1000) * _(1) + T(100) * _(2) + T(10) * _(3) + _(4);
return true;
case 6:
*v = T(100000) * _(0) + T(10000) * _(1) + T(1000) * _(2) + T(100) * _(3) + T(10) * _(4) + _(5);
return true;
case 7:
*v = T(1000000) * _(0) + T(100000) * _(1) + T(10000) * _(2) + T(1000) * _(3) + T(100) * _(4) + T(10) * _(5) + _(6);
return true;
case 8:
*v = T(10000000) * _(0) + T(1000000) * _(1) + T(100000) * _(2) + T(10000) * _(3) + T(1000) * _(4) + T(100) * _(5) + T(10) * _(6) + _(7);
return true;
case 9:
*v = T(100000000) * _(0) + T(10000000) * _(1) + T(1000000) * _(2) + T(100000) * _(3) + T(10000) * _(4) + T(1000) * _(5) + T(100) * _(6) + T(10) * _(7) + _(8);
return true;
case 10:
*v = T(1000000000) * _(0) + T(100000000) * _(1) + T(10000000) * _(2) + T(1000000) * _(3) + T(100000) * _(4) + T(10000) * _(5) + T(1000) * _(6) + T(100) * _(7) + T(10) * _(8) + _(9);
return true;
default:
{
size_t i = 0;
*v = 0;
for( ; i + 10 < s.len; ++i)
*v = *v * T(10) + _(i);
*v = T(1000000000) * _(i) + T(100000000) * _(i+1) + T(10000000) * _(i+2) + T(1000000) * _(i+3) + T(100000) * _(i+4) + T(10000) * _(i+5) + T(1000) * _(i+6) + T(100) * _(i+7) + T(10) * _(i+8) + _(i+9);
return true;
}
}
return false;
}
#define ok(i) (s.str[i] >= '0' && s.str[i] <= '9')
template<class T>
C4_ALWAYS_INLINE auto unroll_switch(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 1, bool>::type
{
*v = 0;
switch(s.len)
{
case 1:
*v = _(0);
return ok(0);
case 2:
*v = T(10) * _(0) + _(1);
return ok(0) && ok(1);
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return ok(0) && ok(1) && ok(2);
}
return false;
}
template<class T>
C4_ALWAYS_INLINE auto unroll_switch(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 2, bool>::type
{
*v = 0;
switch(s.len)
{
case 1:
*v = _(0);
return true;
case 2:
*v = T(10) * _(0) + _(1);
return true;
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return ok(0) && ok(1) && ok(2);
case 4:
*v = T(1000) * _(0) + T(100) * _(1) + T(10) * _(2) + _(3);
return ok(0) && ok(1) && ok(2) && ok(3);
case 5:
*v = T(10000) * _(0) + T(1000) * _(1) + T(100) * _(2) + T(10) * _(3) + _(4);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4);
}
return false;
}
template<class T>
C4_ALWAYS_INLINE auto unroll_switch(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 4, bool>::type
{
switch(s.len)
{
case 1:
*v = _(0);
return ok(0);
case 2:
*v = T(10) * _(0) + _(1);
return ok(0) && ok(1);
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return ok(0) && ok(1) && ok(2);
case 4:
*v = T(1000) * _(0) + T(100) * _(1) + T(10) * _(2) + _(3);
return ok(0) && ok(1) && ok(2) && ok(3);
case 5:
*v = T(10000) * _(0) + T(1000) * _(1) + T(100) * _(2) + T(10) * _(3) + _(4);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4);
case 6:
*v = T(100000) * _(0) + T(10000) * _(1) + T(1000) * _(2) + T(100) * _(3) + T(10) * _(4) + _(5);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5);
case 7:
*v = T(1000000) * _(0) + T(100000) * _(1) + T(10000) * _(2) + T(1000) * _(3) + T(100) * _(4) + T(10) * _(5) + _(6);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6);
case 8:
*v = T(10000000) * _(0) + T(1000000) * _(1) + T(100000) * _(2) + T(10000) * _(3) + T(1000) * _(4) + T(100) * _(5) + T(10) * _(6) + _(7);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6) && ok(7);
case 9:
*v = T(100000000) * _(0) + T(10000000) * _(1) + T(1000000) * _(2) + T(100000) * _(3) + T(10000) * _(4) + T(1000) * _(5) + T(100) * _(6) + T(10) * _(7) + _(8);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6) && ok(7) && ok(8);
case 10:
*v = T(1000000000) * _(0) + T(100000000) * _(1) + T(10000000) * _(2) + T(1000000) * _(3) + T(100000) * _(4) + T(10000) * _(5) + T(1000) * _(6) + T(100) * _(7) + T(10) * _(8) + _(9);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6) && ok(7) && ok(8) && ok(9);
}
return false;
}
template<class T>
C4_ALWAYS_INLINE auto unroll_switch(c4::csubstr s, T *C4_RESTRICT v)
-> typename std::enable_if<sizeof(T) == 8, bool>::type
{
switch(s.len)
{
case 1:
*v = _(0);
return ok(0);
case 2:
*v = T(10) * _(0) + _(1);
return ok(0) && ok(1);
case 3:
*v = T(100) * _(0) + T(10) * _(1) + _(2);
return ok(0) && ok(1) && ok(2);
case 4:
*v = T(1000) * _(0) + T(100) * _(1) + T(10) * _(2) + _(3);
return ok(0) && ok(1) && ok(2) && ok(3);
case 5:
*v = T(10000) * _(0) + T(1000) * _(1) + T(100) * _(2) + T(10) * _(3) + _(4);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4);
case 6:
*v = T(100000) * _(0) + T(10000) * _(1) + T(1000) * _(2) + T(100) * _(3) + T(10) * _(4) + _(5);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5);
case 7:
*v = T(1000000) * _(0) + T(100000) * _(1) + T(10000) * _(2) + T(1000) * _(3) + T(100) * _(4) + T(10) * _(5) + _(6);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6);
case 8:
*v = T(10000000) * _(0) + T(1000000) * _(1) + T(100000) * _(2) + T(10000) * _(3) + T(1000) * _(4) + T(100) * _(5) + T(10) * _(6) + _(7);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6) && ok(7);
case 9:
*v = T(100000000) * _(0) + T(10000000) * _(1) + T(1000000) * _(2) + T(100000) * _(3) + T(10000) * _(4) + T(1000) * _(5) + T(100) * _(6) + T(10) * _(7) + _(8);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6) && ok(7) && ok(8);
case 10:
*v = T(1000000000) * _(0) + T(100000000) * _(1) + T(10000000) * _(2) + T(1000000) * _(3) + T(100000) * _(4) + T(10000) * _(5) + T(1000) * _(6) + T(100) * _(7) + T(10) * _(8) + _(9);
return ok(0) && ok(1) && ok(2) && ok(3) && ok(4) && ok(5) && ok(6) && ok(7) && ok(8) && ok(9);
default:
{
size_t i = 0;
*v = 0;
for( ; i + 10 < s.len; ++i)
{
*v = *v * T(10) + _(i);
if(C4_UNLIKELY(!ok(i)))
return false;
}
*v = T(1000000000) * _(i) + T(100000000) * _(i+1) + T(10000000) * _(i+2) + T(1000000) * _(i+3) + T(100000) * _(i+4) + T(10000) * _(i+5) + T(1000) * _(i+6) + T(100) * _(i+7) + T(10) * _(i+8) + _(i+9);
return ok(i) && ok(i+1) && ok(i+2) && ok(i+3) && ok(i+4) && ok(i+5) && ok(i+6) && ok(i+7) && ok(i+8) && ok(i+9);
}
}
return false;
}
C4_SUPPRESS_WARNING_GCC_CLANG_POP
#undef _
#define DECLARE_BM(func) \
template<class T> \
void func(bm::State &st) \
{ \
random_strings_cref values = mkstrings_positive<T>(); \
T val = {}; \
T sum = {}; \
for(auto _ : st) \
{ \
C4DOALL(kNumValues) \
{ \
const bool ok = func(values.next(), &val); \
sum += (T)(T(ok) + val); \
} \
} \
bm::DoNotOptimize(sum); \
report<T>(st, kNumValues); \
}
#define DEFINE_BM(ty) \
C4BM_TEMPLATE(unroll_switch_nocheck, ty); \
C4BM_TEMPLATE(unroll_switch, ty); \
C4BM_TEMPLATE(indexloop_restrictvar0, ty); \
C4BM_TEMPLATE(indexloop_restrictvar1, ty); \
C4BM_TEMPLATE(range_based_restrictvar0, ty); \
C4BM_TEMPLATE(range_based_restrictvar1, ty); \
C4BM_TEMPLATE(prefer_likely, ty); \
C4BM_TEMPLATE(no_early_return, ty); \
C4BM_TEMPLATE(no_early_return_auto_type, ty); \
C4BM_TEMPLATE(no_early_return_auto_type2, ty); \
DECLARE_BM(unroll_switch_nocheck)
DECLARE_BM(unroll_switch)
DECLARE_BM(indexloop_restrictvar0)
DECLARE_BM(indexloop_restrictvar1)
DECLARE_BM(range_based_restrictvar0)
DECLARE_BM(range_based_restrictvar1)
DECLARE_BM(prefer_likely)
DECLARE_BM(no_early_return)
DECLARE_BM(no_early_return_auto_type)
DECLARE_BM(no_early_return_auto_type2)
DEFINE_BM(uint8_t)
DEFINE_BM(int8_t)
DEFINE_BM(uint16_t)
DEFINE_BM(int16_t)
DEFINE_BM(uint32_t)
DEFINE_BM(int32_t)
DEFINE_BM(uint64_t)
DEFINE_BM(int64_t)
int main(int argc, char *argv[])
{
//do_test();
bm::Initialize(&argc, argv);
bm::RunSpecifiedBenchmarks();
return 0;
}