Merge pull request #304 from kevinbackhouse/quadratic-fuzz-2

Fuzz target for finding quadratic performance issues
This commit is contained in:
Kevin Backhouse
2023-01-24 19:47:27 +00:00
committed by GitHub
5 changed files with 160 additions and 0 deletions

View File

@@ -18,6 +18,15 @@ option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON)
option(CMARK_STATIC "Build static libcmark-gfm library" ON)
option(CMARK_SHARED "Build shared libcmark-gfm library" ON)
option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF)
option(CMARK_FUZZ_QUADRATIC "Build quadratic fuzzing harness" OFF)
if(CMARK_FUZZ_QUADRATIC)
set(FUZZER_FLAGS "-fsanitize=fuzzer-no-link,address -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FUZZER_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FUZZER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FUZZER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FUZZER_FLAGS}")
endif()
add_subdirectory(src)
add_subdirectory(extensions)
@@ -29,6 +38,9 @@ if(CMARK_TESTS)
enable_testing()
add_subdirectory(test testdir)
endif()
if(CMARK_FUZZ_QUADRATIC)
add_subdirectory(fuzz)
endif()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING

21
fuzz/CMakeLists.txt Normal file
View File

@@ -0,0 +1,21 @@
include_directories(
${PROJECT_BINARY_DIR}/extensions
${PROJECT_BINARY_DIR}/src
../extensions
../src
)
macro(fuzzer name)
add_executable(${name} ${name}.c)
set_target_properties(${name}
PROPERTIES
COMPILE_FLAGS "-fsanitize=fuzzer"
LINK_FLAGS "-fsanitize=fuzzer")
if(CMARK_SHARED)
target_link_libraries(${name} libcmark-gfm-extensions libcmark-gfm)
elseif(CMARK_STATIC)
target_link_libraries(${name} libcmark-gfm-extensions_static libcmark-gfm_static)
endif()
endmacro()
fuzzer(fuzz_quadratic)

12
fuzz/README.md Normal file
View File

@@ -0,0 +1,12 @@
The quadratic fuzzer generates long sequences of repeated characters, such as `<?x<?x<?x<?x...`,
to detect quadratic complexity performance issues.
To build and run the quadratic fuzzer:
```bash
mkdir build-fuzz
cd build-fuzz
cmake -DCMARK_FUZZ_QUADRATIC=ON -DCMAKE_C_COMPILER=$(which clang) -DCMAKE_CXX_COMPILER=$(which clang++) -DCMAKE_BUILD_TYPE=Release ..
make
../fuzz/fuzzloop.sh
```

87
fuzz/fuzz_quadratic.c Normal file
View File

@@ -0,0 +1,87 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "cmark-gfm.h"
#include "cmark-gfm-core-extensions.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
const char *extension_names[] = {
"autolink",
"strikethrough",
"table",
"tagfilter",
NULL,
};
int LLVMFuzzerInitialize(int *argc, char ***argv) {
cmark_init_standard_node_flags();
cmark_gfm_core_extensions_ensure_registered();
return 0;
}
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
struct __attribute__((packed)) {
int options;
int width;
uint8_t splitpoint;
uint8_t repeatlen;
} fuzz_config;
if (size >= sizeof(fuzz_config)) {
/* The beginning of `data` is treated as fuzzer configuration */
memcpy(&fuzz_config, data, sizeof(fuzz_config));
/* Test options that are used by GitHub. */
fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
/* Remainder of input is the markdown */
const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
const size_t markdown_size0 = size - sizeof(fuzz_config);
char markdown[0x80000];
if (markdown_size0 <= sizeof(markdown)) {
size_t markdown_size = 0;
if (fuzz_config.splitpoint <= markdown_size0 && 0 < fuzz_config.repeatlen &&
fuzz_config.repeatlen <= markdown_size0 - fuzz_config.splitpoint) {
const size_t size_after_splitpoint = markdown_size0 - fuzz_config.splitpoint - fuzz_config.repeatlen;
memcpy(&markdown[markdown_size], &markdown0[0], fuzz_config.splitpoint);
markdown_size += fuzz_config.splitpoint;
while (markdown_size + fuzz_config.repeatlen + size_after_splitpoint <= sizeof(markdown)) {
memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint],
fuzz_config.repeatlen);
markdown_size += fuzz_config.repeatlen;
}
memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint + fuzz_config.repeatlen],
size_after_splitpoint);
markdown_size += size_after_splitpoint;
} else {
markdown_size = markdown_size0;
memcpy(markdown, markdown0, markdown_size);
}
cmark_parser *parser = cmark_parser_new(fuzz_config.options);
for (const char **it = extension_names; *it; ++it) {
const char *extension_name = *it;
cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
if (!syntax_extension) {
fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
abort();
}
cmark_parser_attach_syntax_extension(parser, syntax_extension);
}
cmark_parser_feed(parser, markdown, markdown_size);
cmark_node *doc = cmark_parser_finish(parser);
free(cmark_render_html(doc, fuzz_config.options, NULL));
cmark_node_free(doc);
cmark_parser_free(parser);
}
}
return 0;
}

28
fuzz/fuzzloop.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/bin/bash
# Stop when an error is found
set -e
# Create a corpus sub-directory if it doesn't already exist.
mkdir -p corpus
# The memory and disk usage grows over time, so this loop restarts the
# fuzzer every 4 hours. The `-merge=1` option is used to minimize the
# corpus on each iteration.
while :
do
date
echo restarting loop
# Minimize the corpus
mv corpus/ corpus2
mkdir corpus
echo minimizing corpus
./fuzz/fuzz_quadratic -merge=1 corpus ../bench corpus2/ -max_len=1024
rm -r corpus2
# Run the fuzzer for 4 hours
date
echo start fuzzer
./fuzz/fuzz_quadratic corpus -dict=../test/fuzzing_dictionary -jobs=$(nproc) -workers=$(nproc) -max_len=1024 -max_total_time=14400
done