Merge remote-tracking branch 'github/master' into vmitchell/sync-upstream-gfm.7

rdar://104622655
2026-01-18 17:31:20 +01:00 · 2023-01-24 15:58:55 -07:00
parent 25d503f196 00ba25c20f
commit 5a3db92f26
32 changed files with 14723 additions and 10710 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,7 @@ build
 cmark.dSYM/*
 cmark
 .vscode
+.DS_Store

 # Testing and benchmark
 alltests.md
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,16 @@ set(CMAKE_C_STANDARD_REQUIRED YES)
 # Use CMake's generated headers instead of the Swift package prebuilt ones
 add_compile_definitions(CMARK_USE_CMAKE_HEADERS)

+option(CMARK_FUZZ_QUADRATIC "Build quadratic fuzzing harness" OFF)
+
+if(CMARK_FUZZ_QUADRATIC)
+  set(FUZZER_FLAGS "-fsanitize=fuzzer-no-link,address -g")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FUZZER_FLAGS}")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FUZZER_FLAGS}")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FUZZER_FLAGS}")
+  set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FUZZER_FLAGS}")
+endif()
+
 add_subdirectory(src)
 add_subdirectory(extensions)
 if(CMARK_TESTS AND (CMARK_SHARED OR CMARK_STATIC))
@@ -41,6 +51,9 @@ if(CMARK_TESTS)
  enable_testing()
  add_subdirectory(test testdir)
 endif()
+if(CMARK_FUZZ_QUADRATIC)
+  add_subdirectory(fuzz)
+endif()

 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE "Release" CACHE STRING
--- a/6
+++ b/6
@@ -22,7 +22,7 @@ VERSION?=$(SPECVERSION)
 RELEASE?=CommonMark-$(VERSION)
 INSTALL_PREFIX?=/usr/local
 CLANG_CHECK?=clang-check
-CLANG_FORMAT=clang-format-3.5 -style llvm -sort-includes=0 -i
+CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i
 AFL_PATH?=/usr/local/bin

 .PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check docker libFuzzer
@@ -140,7 +140,7 @@ $(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re
 	esac
 	re2c --case-insensitive -b -i --no-generation-date -8 \
 		--encoding-policy substitute -o $@ $<
-	clang-format-3.5 -style llvm -i $@
+	clang-format -style llvm -i $@

 # We include entities.inc in the repository, so normally this
 # doesn't need to be regenerated:
@@ -211,7 +211,7 @@ format:
 	$(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h

 format-extensions:
-	clang-format-3.5 -style llvm -i extensions/*.c extensions/*.h
+	clang-format -style llvm -i extensions/*.c extensions/*.h

 operf: $(CMARK)
 	operf $< < $(BENCHFILE) > /dev/null
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -1575,6 +1575,7 @@ int main() {
  int retval;
  test_batch_runner *runner = test_batch_runner_new();

+  cmark_init_standard_node_flags();
  version(runner);
  constructor(runner);
  accessors(runner);
--- a/bin/main.c
+++ b/bin/main.c
@@ -143,6 +143,7 @@ int main(int argc, char *argv[]) {
  }
 #endif

+  cmark_init_standard_node_flags();
  cmark_gfm_core_extensions_ensure_registered();

 #ifdef USE_PLEDGE
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,20 @@
+[0.29.0.gfm.7]
+
+  * Fixed a polynomial time complexity issue per
+    https://github.com/github/cmark-gfm/security/advisories/GHSA-r572-jvj2-3m8p
+  * Fixed an issue in which crafted markdown document could trigger an
+    out-of-bounds read in the validate_protocol function per
+    https://github.com/github/cmark-gfm/security/advisories/GHSA-c944-cv5f-hpvr
+  * Fixed a polynomial time complexity issue
+    https://github.com/github/cmark-gfm/security/advisories/GHSA-24f7-9frr-5h2r
+  * Fixed several polynomial time complexity issues per
+    https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
+  * We removed an unneeded .DS_Store file (#291)
+  * We added a test for domains with underscores and fix roundtrip behavior (#292)
+  * We now use an up-to-date clang-format (#294)
+  * We made a variety of implicit integer trunctions explicit by moving to
+    size_t as our standard size integer type (#302)
+
 [0.29.0.gfm.6]
  * Fixed polynomial time complexity DoS vulnerability in autolink extension
  
--- a/extensions/autolink.c
+++ b/extensions/autolink.c
@@ -2,6 +2,7 @@
 #include <parser.h>
 #include <string.h>
 #include <utf8.h>
+#include <stddef.h>

 #if defined(_WIN32)
 #define strncasecmp _strnicmp
@@ -35,30 +36,63 @@ static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
 }

 static size_t autolink_delim(uint8_t *data, size_t link_end) {
-  uint8_t cclose, copen;
  size_t i;
+  size_t closing = 0;
+  size_t opening = 0;

-  for (i = 0; i < link_end; ++i)
-    if (data[i] == '<') {
+  for (i = 0; i < link_end; ++i) {
+    const uint8_t c = data[i];
+    if (c == '<') {
      link_end = i;
      break;
+    } else if (c == '(') {
+      opening++;
+    } else if (c == ')') {
+      closing++;
    }
+  }

  while (link_end > 0) {
-    cclose = data[link_end - 1];
-
-    switch (cclose) {
+    switch (data[link_end - 1]) {
    case ')':
-      copen = '(';
-      break;
-    default:
-      copen = 0;
-    }
-
-    if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
+      /* Allow any number of matching brackets (as recognised in copen/cclose)
+       * at the end of the URL.  If there is a greater number of closing
+       * brackets than opening ones, we remove one character from the end of
+       * the link.
+       *
+       * Examples (input text => output linked portion):
+       *
+       *        http://www.pokemon.com/Pikachu_(Electric)
+       *                => http://www.pokemon.com/Pikachu_(Electric)
+       *
+       *        http://www.pokemon.com/Pikachu_((Electric)
+       *                => http://www.pokemon.com/Pikachu_((Electric)
+       *
+       *        http://www.pokemon.com/Pikachu_(Electric))
+       *                => http://www.pokemon.com/Pikachu_(Electric)
+       *
+       *        http://www.pokemon.com/Pikachu_((Electric))
+       *                => http://www.pokemon.com/Pikachu_((Electric))
+       */
+      if (closing <= opening) {
+        return link_end;
+      }
+      closing--;
      link_end--;
-
-    else if (data[link_end - 1] == ';') {
+      break;
+    case '?':
+    case '!':
+    case '.':
+    case ',':
+    case ':':
+    case '*':
+    case '_':
+    case '~':
+    case '\'':
+    case '"':
+      link_end--;
+      break;
+    case ';': {
      size_t new_end = link_end - 2;

      while (new_end > 0 && cmark_isalpha(data[new_end]))
@@ -68,46 +102,12 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
        link_end = new_end;
      else
        link_end--;
-    } else if (copen != 0) {
-      size_t closing = 0;
-      size_t opening = 0;
-      i = 0;
-
-      /* Allow any number of matching brackets (as recognised in copen/cclose)
-       * at the end of the URL.  If there is a greater number of closing
-       * brackets than opening ones, we remove one character from the end of
-       * the link.
-       *
-       * Examples (input text => output linked portion):
-       *
-       *	http://www.pokemon.com/Pikachu_(Electric)
-       *		=> http://www.pokemon.com/Pikachu_(Electric)
-       *
-       *	http://www.pokemon.com/Pikachu_((Electric)
-       *		=> http://www.pokemon.com/Pikachu_((Electric)
-       *
-       *	http://www.pokemon.com/Pikachu_(Electric))
-       *		=> http://www.pokemon.com/Pikachu_(Electric)
-       *
-       *	http://www.pokemon.com/Pikachu_((Electric))
-       *		=> http://www.pokemon.com/Pikachu_((Electric))
-       */
-
-      while (i < link_end) {
-        if (data[i] == copen)
-          opening++;
-        else if (data[i] == cclose)
-          closing++;
-
-        i++;
-      }
-
-      if (closing <= opening)
-        break;
-
-      link_end--;
-    } else
      break;
+    }
+
+    default:
+      return link_end;
+    }
  }

  return link_end;
@@ -116,7 +116,20 @@ static size_t autolink_delim(uint8_t *data, size_t link_end) {
 static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
  size_t i, np = 0, uscore1 = 0, uscore2 = 0;

+  /* The purpose of this code is to reject urls that contain an underscore
+   * in one of the last two segments. Examples:
+   *
+   *   www.xxx.yyy.zzz     autolinked
+   *   www.xxx.yyy._zzz    not autolinked
+   *   www.xxx._yyy.zzz    not autolinked
+   *   www._xxx.yyy.zzz    autolinked
+   *
+   * The reason is that domain names are allowed to include underscores,
+   * but host names are not. See: https://stackoverflow.com/a/2183140
+   */
  for (i = 1; i < size - 1; i++) {
+    if (data[i] == '\\' && i < size - 2)
+      i++;
    if (data[i] == '_')
      uscore2++;
    else if (data[i] == '.') {
@@ -127,8 +140,17 @@ static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
      break;
  }

-  if (uscore1 > 0 || uscore2 > 0)
-    return 0;
+  if (uscore1 > 0 || uscore2 > 0) {
+    /* If the url is very long then accept it despite the underscores,
+     * to avoid quadratic behavior causing a denial of service. See:
+     * https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
+     * Reasonable urls are unlikely to have more than 10 segments, so
+     * this extra condition shouldn't have any impact on normal usage.
+     */
+    if (np <= 10) {
+      return 0;
+    }
+  }

  if (allow_short) {
    /* We don't need a valid domain in the strict sense (with
@@ -165,7 +187,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
  if (link_end == 0)
    return NULL;

-  while (link_end < size && !cmark_isspace(data[link_end]))
+  while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
    link_end++;

  link_end = autolink_delim(data, link_end);
@@ -225,7 +247,7 @@ static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
    return 0;

  link_end += domain_len;
-  while (link_end < size && !cmark_isspace(data[link_end]))
+  while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
    link_end++;

  link_end = autolink_delim(data, link_end);
@@ -269,142 +291,167 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
  // inline was finished in inlines.c.
 }

-static bool validate_protocol(char protocol[], uint8_t *data, int rewind) {
+static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
  size_t len = strlen(protocol);

-  // Check that the protocol matches
-  for (int i = 1; i <= len; i++) {
-    if (data[-rewind - i] != protocol[len - i]) {
-      return false;
-    }
+  if (len > (max_rewind - rewind)) {
+    return false;
  }

-  char prev_char = data[-rewind - len - 1];
+  // Check that the protocol matches
+  if (memcmp(data - rewind - len, protocol, len) != 0) {
+    return false;
+  }
+
+  if (len == (max_rewind - rewind)) {
+    return true;
+  }
+
+  char prev_char = data[-((ptrdiff_t)rewind) - len - 1];

  // Make sure the character before the protocol is non-alphanumeric
  return !cmark_isalnum(prev_char);
 }

-static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset, int depth) {
-  // postprocess_text can recurse very deeply if there is a very long line of
-  // '@' only.  Stop at a reasonable depth to ensure it cannot crash.
-  if (depth > 1000) return;
+static void postprocess_text(cmark_parser *parser, cmark_node *text) {
+  size_t start = 0;
+  size_t offset = 0;
+  // `text` is going to be split into a list of nodes containing shorter segments
+  // of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
+  // create references to it. Later, `cmark_chunk_to_cstr` is used to convert
+  // the references into allocated buffers. The detached buffer is freed before we
+  // return.
+  cmark_chunk detached_chunk = text->as.literal;
+  text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);

-  size_t link_end;
-  uint8_t *data = text->as.literal.data,
-    *at;
-  size_t size = text->as.literal.len;
-  bool auto_mailto = true;
-  bool is_xmpp = false;
-  int rewind, max_rewind,
-      nb = 0, np = 0, ns = 0;
+  uint8_t *data = text->as.literal.data;
+  size_t remaining = text->as.literal.len;

-  if (offset < 0 || (size_t)offset >= size)
-    return;
+  while (true) {
+    size_t link_end;
+    uint8_t *at;
+    bool auto_mailto = true;
+    bool is_xmpp = false;
+    size_t rewind;
+    size_t max_rewind;
+    size_t np = 0;

-  data += offset;
-  size -= offset;
+    if (offset >= remaining)
+      break;

-  at = (uint8_t *)memchr(data, '@', size);
-  if (!at)
-    return;
+    at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
+    if (!at)
+      break;

-  max_rewind = (int)(at - data);
-  data += max_rewind;
-  size -= max_rewind;
+    max_rewind = at - (data + start + offset);

-  for (rewind = 0; rewind < max_rewind; ++rewind) {
-    uint8_t c = data[-rewind - 1];
+found_at:
+    for (rewind = 0; rewind < max_rewind; ++rewind) {
+      uint8_t c = data[start + offset + max_rewind - rewind - 1];

-    if (cmark_isalnum(c))
-      continue;
-
-    if (strchr(".+-_", c) != NULL)
-      continue;
-
-    if (strchr(":", c) != NULL) {
-      if (validate_protocol("mailto:", data, rewind)) {
-        auto_mailto = false;
+      if (cmark_isalnum(c))
        continue;
+
+      if (strchr(".+-_", c) != NULL)
+        continue;
+
+      if (strchr(":", c) != NULL) {
+        if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
+          auto_mailto = false;
+          continue;
+        }
+
+        if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
+          auto_mailto = false;
+          is_xmpp = true;
+          continue;
+        }
      }

-      if (validate_protocol("xmpp:", data, rewind)) {
-        auto_mailto = false;
-        is_xmpp = true;
-        continue;
-      }
+      break;
    }

-    break;
-  }
-
-  if (rewind == 0 || ns > 0) {
-    postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
-    return;
-  }
-
-  for (link_end = 0; link_end < size; ++link_end) {
-    uint8_t c = data[link_end];
-
-    if (cmark_isalnum(c))
+    if (rewind == 0) {
+      offset += max_rewind + 1;
      continue;
+    }

-    if (c == '@')
-      nb++;
-    else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
-      np++;
-    else if (c == '/' && is_xmpp)
+    assert(data[start + offset + max_rewind] == '@');
+    for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
+      uint8_t c = data[start + offset + max_rewind + link_end];
+
+      if (cmark_isalnum(c))
+        continue;
+
+      if (c == '@') {
+        // Found another '@', so go back and try again with an updated offset and max_rewind.
+        offset += max_rewind + 1;
+        max_rewind = link_end - 1;
+        goto found_at;
+      } else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
+               cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
+        np++;
+      else if (c == '/' && is_xmpp)
+        continue;
+      else if (c != '-' && c != '_')
+        break;
+    }
+
+    if (link_end < 2 || np == 0 ||
+        (!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
+         data[start + offset + max_rewind + link_end - 1] != '.')) {
+      offset += max_rewind + link_end;
      continue;
-    else if (c != '-' && c != '_')
-      break;
-  }
-
-  if (link_end < 2 || nb != 1 || np == 0 ||
-      (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
-    postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
-    return;
-  }
-
-  link_end = autolink_delim(data, link_end);
-
-  if (link_end == 0) {
-    postprocess_text(parser, text, max_rewind + 1 + offset, depth + 1);
-    return;
+    }
+
+    link_end = autolink_delim(data + start + offset + max_rewind, link_end);
+
+    if (link_end == 0) {
+      offset += max_rewind + 1;
+      continue;
+    }
+
+    cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
+    cmark_strbuf buf;
+    cmark_strbuf_init(parser->mem, &buf, 10);
+    if (auto_mailto)
+      cmark_strbuf_puts(&buf, "mailto:");
+    cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
+    link_node->as.link.url = cmark_chunk_buf_detach(&buf);
+
+    cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
+    cmark_chunk email = cmark_chunk_dup(
+      &detached_chunk,
+      (bufsize_t)(start + offset + max_rewind - rewind),
+      (bufsize_t)(link_end + rewind));
+    cmark_chunk_to_cstr(parser->mem, &email);
+    link_text->as.literal = email;
+    cmark_node_append_child(link_node, link_text);
+
+    cmark_node_insert_after(text, link_node);
+
+    cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
+    post->as.literal = cmark_chunk_dup(&detached_chunk,
+                                       (bufsize_t)(start + offset + max_rewind + link_end),
+                                       (bufsize_t)(remaining - offset - max_rewind - link_end));
+
+    cmark_node_insert_after(link_node, post);
+
+    text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
+    cmark_chunk_to_cstr(parser->mem, &text->as.literal);
+
+    text = post;
+    start += offset + max_rewind + link_end;
+    remaining -= offset + max_rewind + link_end;
+    offset = 0;
  }

+  // Convert the reference to allocated memory.
+  assert(!text->as.literal.alloc);
  cmark_chunk_to_cstr(parser->mem, &text->as.literal);

-  cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
-  cmark_strbuf buf;
-  cmark_strbuf_init(parser->mem, &buf, 10);
-  if (auto_mailto)
-    cmark_strbuf_puts(&buf, "mailto:");
-  cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
-  link_node->as.link.url = cmark_chunk_buf_detach(&buf);
-
-  cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
-  cmark_chunk email = cmark_chunk_dup(
-      &text->as.literal,
-      offset + max_rewind - rewind,
-      (bufsize_t)(link_end + rewind));
-  cmark_chunk_to_cstr(parser->mem, &email);
-  link_text->as.literal = email;
-  cmark_node_append_child(link_node, link_text);
-
-  cmark_node_insert_after(text, link_node);
-
-  cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
-  post->as.literal = cmark_chunk_dup(&text->as.literal,
-    (bufsize_t)(offset + max_rewind + link_end),
-    (bufsize_t)(size - link_end));
-  cmark_chunk_to_cstr(parser->mem, &post->as.literal);
-
-  cmark_node_insert_after(link_node, post);
-
-  text->as.literal.len = offset + max_rewind - rewind;
-  text->as.literal.data[text->as.literal.len] = 0;
-
-  postprocess_text(parser, post, 0, depth + 1);
+  // Free the detached buffer.
+  cmark_chunk_free(parser->mem, &detached_chunk);
 }

 static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
@@ -431,7 +478,7 @@ static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser
    }

    if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
-      postprocess_text(parser, node, 0, /*depth*/0);
+      postprocess_text(parser, node);
    }
  }

--- a/extensions/strikethrough.c
+++ b/extensions/strikethrough.c
@@ -67,6 +67,7 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,
  strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1;
  cmark_node_free(closer->inl_text);

+done:
  delim = closer;
  while (delim != NULL && delim != opener) {
    tmp_delim = delim->previous;
@@ -76,7 +77,6 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,

  cmark_inline_parser_remove_delimiter(inline_parser, opener);

-done:
  return res;
 }

--- a/extensions/table.c
+++ b/extensions/table.c
@@ -11,24 +11,12 @@
 #include "table.h"
 #include "cmark-gfm-core-extensions.h"

+// Custom node flag, initialized in `create_table_extension`.
+static cmark_node__internal_flags CMARK_NODE__TABLE_VISITED;
+
 cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
    CMARK_NODE_TABLE_CELL;

-typedef struct {
-  uint16_t n_columns;
-  int paragraph_offset;
-  cmark_llist *cells;
-} table_row;
-
-typedef struct {
-  uint16_t n_columns;
-  uint8_t *alignments;
-} node_table;
-
-typedef struct {
-  bool is_header;
-} node_table_row;
-
 typedef struct {
  unsigned colspan, rowspan;
 } node_cell_data;
@@ -39,21 +27,41 @@ typedef struct {
  node_cell_data *cell_data;
 } node_cell;

-static void free_table_cell(cmark_mem *mem, void *data) {
-  node_cell *cell = (node_cell *)data;
+typedef struct {
+  uint16_t n_columns;
+  int paragraph_offset;
+  node_cell *cells;
+} table_row;
+
+typedef struct {
+  uint16_t n_columns;
+  uint8_t *alignments;
+} node_table;
+
+typedef struct {
+  bool is_header;
+} node_table_row;
+
+static void free_table_cell(cmark_mem *mem, node_cell *cell) {
  cmark_strbuf_free((cmark_strbuf *)cell->buf);
  mem->free(cell->buf);
  if (cell->cell_data)
    mem->free(cell->cell_data);
-  mem->free(cell);
+}
+
+static void free_row_cells(cmark_mem *mem, table_row *row) {
+  while (row->n_columns > 0) {
+    free_table_cell(mem, &row->cells[--row->n_columns]);
+  }
+  mem->free(row->cells);
+  row->cells = NULL;
 }

 static void free_table_row(cmark_mem *mem, table_row *row) {
  if (!row)
    return;

-  cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell);
-
+  free_row_cells(mem, row);
  mem->free(row);
 }

@@ -175,6 +183,24 @@ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsi
  return res;
 }

+// Adds a new cell to the end of the row. A pointer to the new cell is returned
+// for the caller to initialize.
+static node_cell* append_row_cell(cmark_mem *mem, table_row *row) {
+  const uint32_t n_columns = row->n_columns + 1;
+  // realloc when n_columns is a power of 2
+  if ((n_columns & (n_columns-1)) == 0) {
+    // make sure we never wrap row->n_columns
+    // offset will != len and our exit will clean up as intended
+    if (n_columns > UINT16_MAX) {
+      return NULL;
+    }
+    // Use realloc to double the size of the buffer.
+    row->cells = (node_cell *)mem->realloc(row->cells, (2 * n_columns - 1) * sizeof(node_cell));
+  }
+  row->n_columns = (uint16_t)n_columns;
+  return &row->cells[n_columns-1];
+}
+
 static table_row *row_from_string(cmark_syntax_extension *self,
                                  cmark_parser *parser, unsigned char *string,
                                  int len) {
@@ -216,15 +242,22 @@ static table_row *row_from_string(cmark_syntax_extension *self,
          cell_matched);
      cmark_strbuf_trim(cell_buf);

-      node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
+      node_cell *cell = append_row_cell(parser->mem, row);
+      if (!cell) {
+        int_overflow_abort = 1;
+        cmark_strbuf_free(cell_buf);
+        parser->mem->free(cell_buf);
+        break;
+      }
      cell->buf = cell_buf;
      cell->start_offset = offset;
      if (cell_matched > 0)
        cell->end_offset = offset + cell_matched - 1;
      else
        cell->end_offset = offset;
+      cell->internal_offset = 0;

-      while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
+      while (cell->start_offset > row->paragraph_offset && string[cell->start_offset - 1] != '|') {
        --cell->start_offset;
        ++cell->internal_offset;
      }
@@ -237,13 +270,11 @@ static table_row *row_from_string(cmark_syntax_extension *self,
          cell->cell_data->colspan = 0;

          // find the last cell that isn't part of a colspan, and increment that colspan
-          cmark_llist *tmp = row->cells;
          node_cell *colspan_cell = NULL;
-          while (tmp) {
-            node_cell *this_cell = (node_cell *)tmp->data;
+          for (uint16_t i = 0; i < row->n_columns; i++) {
+            node_cell *this_cell = &row->cells[i];
            if (this_cell->cell_data->colspan > 0)
              colspan_cell = this_cell;
-            tmp = tmp->next;
          }
          if (colspan_cell)
            ++colspan_cell->cell_data->colspan;
@@ -272,8 +303,6 @@ static table_row *row_from_string(cmark_syntax_extension *self,
          int_overflow_abort = 1;
          break;
      }
-      row->n_columns += 1;
-      row->cells = cmark_llist_append(parser->mem, row->cells, cell);
    }

    offset += cell_matched + pipe_matched;
@@ -291,9 +320,7 @@ static table_row *row_from_string(cmark_syntax_extension *self,
      if (row_end_offset && offset != len) {
        row->paragraph_offset = offset;

-        cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell);
-        row->cells = NULL;
-        row->n_columns = 0;
+        free_row_cells(parser->mem, row);

        // Scan past the (optional) leading pipe.
        offset += scan_table_cell_end(string, len, offset);
@@ -344,6 +371,10 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
  const char *parent_string;
  uint16_t i;

+  if (parent_container->flags & CMARK_NODE__TABLE_VISITED) {
+    return parent_container;
+  }
+
  if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) {
    return parent_container;
  }
@@ -371,6 +402,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
    free_table_row(parser->mem, marker_row);
    free_table_row(parser->mem, header_row);
    cmark_arena_pop();
+    parent_container->flags |= CMARK_NODE__TABLE_VISITED;
    return parent_container;
  }

@@ -407,9 +439,8 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
  // since we populate the alignments array based on marker_row->cells
  uint8_t *alignments =
      (uint8_t *)parser->mem->calloc(marker_row->n_columns, sizeof(uint8_t));
-  cmark_llist *it = marker_row->cells;
-  for (i = 0; it; it = it->next, ++i) {
-    node_cell *node = (node_cell *)it->data;
+  for (i = 0; i < marker_row->n_columns; ++i) {
+    node_cell *node = &marker_row->cells[i];
    bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':';

    if (left && right)
@@ -432,10 +463,8 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
  ntr->is_header = true;

  {
-    cmark_llist *tmp;
-
-    for (tmp = header_row->cells; tmp; tmp = tmp->next) {
-      node_cell *cell = (node_cell *) tmp->data;
+    for (i = 0; i < header_row->n_columns; ++i) {
+      node_cell *cell = &header_row->cells[i];
      cmark_node *header_cell = cmark_parser_add_child(parser, table_header,
          CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
      header_cell->start_line = header_cell->end_line = parent_container->start_line;
@@ -487,11 +516,10 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,

  if (parser->options & CMARK_OPT_TABLE_SPANS) {
    // Check the new row for rowspan markers and increment the rowspan of the cell it's merging with
-    cmark_llist *tmp;
    int i;

-    for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) {
-      node_cell *this_cell = (node_cell *)tmp->data;
+    for (i = 0; i < row->n_columns && i < table_columns; ++i) {
+      node_cell *this_cell = &row->cells[i];
      if (this_cell->cell_data->rowspan == 0) {
        // Rowspan marker. Scan up through previous rows and increment the spanning cell's rowspan
        cmark_node *check_row = table_row_block->prev;
@@ -515,11 +543,10 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
  }

  {
-    cmark_llist *tmp;
    int i;

-    for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) {
-      node_cell *cell = (node_cell *) tmp->data;
+    for (i = 0; i < row->n_columns && i < table_columns; ++i) {
+      node_cell *cell = &row->cells[i];
      cmark_node *node = cmark_parser_add_child(parser, table_row_block,
          CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
      node->internal_offset = cell->internal_offset;
@@ -980,6 +1007,7 @@ static int escape(cmark_syntax_extension *self, cmark_node *node, int c) {
 cmark_syntax_extension *create_table_extension(void) {
  cmark_syntax_extension *self = cmark_syntax_extension_new("table");

+  cmark_register_node_flag(&CMARK_NODE__TABLE_VISITED);
  cmark_syntax_extension_set_match_block_func(self, matches);
  cmark_syntax_extension_set_open_block_func(self, try_opening_table_block);
  cmark_syntax_extension_set_get_type_string_func(self, get_type_string);
--- a/fuzz/CMakeLists.txt
+++ b/fuzz/CMakeLists.txt
@@ -0,0 +1,21 @@
+include_directories(
+  ${PROJECT_BINARY_DIR}/extensions
+  ${PROJECT_BINARY_DIR}/src
+  ../extensions
+  ../src
+)
+
+macro(fuzzer name)
+    add_executable(${name} ${name}.c)
+    set_target_properties(${name}
+          PROPERTIES
+          COMPILE_FLAGS "-fsanitize=fuzzer"
+          LINK_FLAGS "-fsanitize=fuzzer")
+    if(CMARK_SHARED)
+      target_link_libraries(${name} libcmark-gfm-extensions libcmark-gfm)
+    elseif(CMARK_STATIC)
+      target_link_libraries(${name} libcmark-gfm-extensions_static libcmark-gfm_static)
+    endif()
+endmacro()
+
+fuzzer(fuzz_quadratic)
--- a/fuzz/README.md
+++ b/fuzz/README.md
@@ -0,0 +1,12 @@
+The quadratic fuzzer generates long sequences of repeated characters, such as `<?x<?x<?x<?x...`,
+to detect quadratic complexity performance issues.
+
+To build and run the quadratic fuzzer:
+
+```bash
+mkdir build-fuzz
+cd build-fuzz
+cmake -DCMARK_FUZZ_QUADRATIC=ON -DCMAKE_C_COMPILER=$(which clang) -DCMAKE_CXX_COMPILER=$(which clang++) -DCMAKE_BUILD_TYPE=Release ..
+make
+../fuzz/fuzzloop.sh
+```
--- a/fuzz/fuzz_quadratic.c
+++ b/fuzz/fuzz_quadratic.c
@@ -0,0 +1,87 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "cmark-gfm.h"
+#include "cmark-gfm-core-extensions.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+const char *extension_names[] = {
+  "autolink",
+  "strikethrough",
+  "table",
+  "tagfilter",
+  NULL,
+};
+
+int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  cmark_init_standard_node_flags();
+  cmark_gfm_core_extensions_ensure_registered();
+  return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  struct __attribute__((packed)) {
+    int options;
+    int width;
+    uint8_t splitpoint;
+    uint8_t repeatlen;
+  } fuzz_config;
+
+  if (size >= sizeof(fuzz_config)) {
+    /* The beginning of `data` is treated as fuzzer configuration */
+    memcpy(&fuzz_config, data, sizeof(fuzz_config));
+
+    /* Test options that are used by GitHub. */
+    fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
+
+    /* Remainder of input is the markdown */
+    const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
+    const size_t markdown_size0 = size - sizeof(fuzz_config);
+    char markdown[0x80000];
+    if (markdown_size0 <= sizeof(markdown)) {
+      size_t markdown_size = 0;
+      if (fuzz_config.splitpoint <= markdown_size0 && 0 < fuzz_config.repeatlen &&
+          fuzz_config.repeatlen <= markdown_size0 - fuzz_config.splitpoint) {
+        const size_t size_after_splitpoint = markdown_size0 - fuzz_config.splitpoint - fuzz_config.repeatlen;
+        memcpy(&markdown[markdown_size], &markdown0[0], fuzz_config.splitpoint);
+        markdown_size += fuzz_config.splitpoint;
+
+        while (markdown_size + fuzz_config.repeatlen + size_after_splitpoint <= sizeof(markdown)) {
+          memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint],
+                 fuzz_config.repeatlen);
+          markdown_size += fuzz_config.repeatlen;
+        }
+        memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint + fuzz_config.repeatlen],
+               size_after_splitpoint);
+        markdown_size += size_after_splitpoint;
+      } else {
+        markdown_size = markdown_size0;
+        memcpy(markdown, markdown0, markdown_size);
+      }
+
+      cmark_parser *parser = cmark_parser_new(fuzz_config.options);
+
+      for (const char **it = extension_names; *it; ++it) {
+        const char *extension_name = *it;
+        cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
+        if (!syntax_extension) {
+          fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
+          abort();
+        }
+        cmark_parser_attach_syntax_extension(parser, syntax_extension);
+      }
+
+      cmark_parser_feed(parser, markdown, markdown_size);
+      cmark_node *doc = cmark_parser_finish(parser);
+ 
+      free(cmark_render_html(doc, fuzz_config.options, NULL));
+
+      cmark_node_free(doc);
+      cmark_parser_free(parser);
+    }
+  }
+  return 0;
+}
--- a/fuzz/fuzzloop.sh
+++ b/fuzz/fuzzloop.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Stop when an error is found
+set -e
+
+# Create a corpus sub-directory if it doesn't already exist.
+mkdir -p corpus
+
+# The memory and disk usage grows over time, so this loop restarts the
+# fuzzer every 4 hours. The `-merge=1` option is used to minimize the
+# corpus on each iteration.
+while :
+do
+    date
+    echo restarting loop
+
+    # Minimize the corpus
+    mv corpus/ corpus2
+    mkdir corpus
+    echo minimizing corpus
+    ./fuzz/fuzz_quadratic -merge=1 corpus ../bench corpus2/ -max_len=1024
+    rm -r corpus2
+
+    # Run the fuzzer for 4 hours
+    date
+    echo start fuzzer
+    ./fuzz/fuzz_quadratic corpus -dict=../test/fuzzing_dictionary -jobs=$(nproc) -workers=$(nproc) -max_len=1024 -max_total_time=14400
+done
--- a/src/arena.c
+++ b/src/arena.c
@@ -84,19 +84,17 @@ static void *arena_calloc(size_t nmem, size_t size) {

  CMARK_INITIALIZE_AND_LOCK(arena);

-  void *ptr = NULL;
-
+  struct arena_chunk *chunk;
  if (sz > A->sz) {
-    A->prev = alloc_arena_chunk(sz, A->prev);
-    ptr = (uint8_t *) A->prev->ptr;
+    A->prev = chunk = alloc_arena_chunk(sz, A->prev);
+  } else if (sz > A->sz - A->used) {
+    A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
  } else {
-    if (sz > A->sz - A->used) {
-      A = alloc_arena_chunk(A->sz + A->sz / 2, A);
-    }
-    ptr = (uint8_t *) A->ptr + A->used;
-    A->used += sz;
-    *((size_t *) ptr) = sz - sizeof(size_t);
+    chunk = A;
  }
+  void *ptr = (uint8_t *) chunk->ptr + chunk->used;
+  chunk->used += sz;
+  *((size_t *) ptr) = sz - sizeof(size_t);
  
  CMARK_UNLOCK(arena);

--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <stdio.h>
+#include <limits.h>

 #include "cmark_ctype.h"
 #include "syntax_extension.h"
@@ -665,6 +666,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
  }

  finalize(parser, parser->root);
+
+  // Limit total size of extra content created from reference links to
+  // document size to avoid superlinear growth. Always allow 100KB.
+  if (parser->total_size > 100000)
+    parser->refmap->max_ref_size = parser->total_size;
+  else
+    parser->refmap->max_ref_size = 100000;
+
  process_inlines(parser, parser->refmap, parser->options);
  if (parser->options & CMARK_OPT_FOOTNOTES)
    process_footnotes(parser);
@@ -725,6 +734,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
  static const uint8_t repl[] = {239, 191, 189};
  bool preserveWhitespace = parser->options & CMARK_OPT_PRESERVE_WHITESPACE;

+  if (len > UINT_MAX - parser->total_size)
+    parser->total_size = UINT_MAX;
+  else
+    parser->total_size += len;
+
  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
    // skip NL if last buffer ended with CR ; see #117
    buffer++;
--- a/src/include/cmark-gfm-extension_api.h
+++ b/src/include/cmark-gfm-extension_api.h
@@ -114,6 +114,7 @@ typedef struct delimiter {
  struct delimiter *previous;
  struct delimiter *next;
  cmark_node *inl_text;
+  bufsize_t position;
  bufsize_t length;
  unsigned char delim_char;
  int can_open;
--- a/src/include/map.h
+++ b/src/include/map.h
@@ -10,7 +10,8 @@ extern "C" {
 struct cmark_map_entry {
  struct cmark_map_entry *next;
  unsigned char *label;
-  unsigned int age;
+  size_t age;
+  size_t size;
 };

 typedef struct cmark_map_entry cmark_map_entry;
@@ -23,7 +24,9 @@ struct cmark_map {
  cmark_mem *mem;
  cmark_map_entry *refs;
  cmark_map_entry **sorted;
-  unsigned int size;
+  size_t size;
+  size_t ref_size;
+  size_t max_ref_size;
  cmark_map_free_f free;
 };

--- a/src/include/node.h
+++ b/src/include/node.h
@@ -52,11 +52,7 @@ typedef struct {
  cmark_chunk on_exit;
 } cmark_custom;

-enum cmark_node__internal_flags {
-  CMARK_NODE__OPEN = (1 << 0),
-  CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
-  CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
-};
+typedef uint16_t cmark_node__internal_flags;

 struct cmark_node {
  cmark_strbuf content;
@@ -76,7 +72,7 @@ struct cmark_node {
  int end_column;
  int internal_offset;
  uint16_t type;
-  uint16_t flags;
+  cmark_node__internal_flags flags;
  int backtick_count;

  cmark_syntax_extension *extension;
@@ -101,6 +97,30 @@ struct cmark_node {
  } as;
 };

+/**
+ * Syntax extensions can use this function to register a custom node
+ * flag. The flags are stored in the `flags` field of the `cmark_node`
+ * struct. The `flags` parameter should be the address of a global variable
+ * which will store the flag value.
+ */
+CMARK_GFM_EXPORT
+void cmark_register_node_flag(cmark_node__internal_flags *flags);
+
+/**
+ * Standard node flags. (Initialized using `cmark_init_standard_node_flags`.)
+ */
+extern cmark_node__internal_flags CMARK_NODE__OPEN;
+extern cmark_node__internal_flags CMARK_NODE__LAST_LINE_BLANK;
+extern cmark_node__internal_flags CMARK_NODE__LAST_LINE_CHECKED;
+
+/**
+ * Uses `cmark_register_node_flag` to initialize the standard node flags.
+ * This function should be called at program startup time. Calling it
+ * multiple times has no additional effect.
+ */
+CMARK_GFM_EXPORT
+void cmark_init_standard_node_flags();
+
 static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
  return node->content.mem;
 }
--- a/src/include/parser.h
+++ b/src/include/parser.h
@@ -47,6 +47,7 @@ struct cmark_parser {
  /* Options set by the user, see the Options section in cmark.h */
  int options;
  bool last_buffer_ended_with_cr;
+  size_t total_size;
  cmark_llist *syntax_extensions;
  cmark_llist *inline_syntax_extensions;
  cmark_ispunct_func backslash_ispunct;
--- a/src/include/scanners.h
+++ b/src/include/scanners.h
@@ -15,6 +15,10 @@ bufsize_t _scan_autolink_uri(const unsigned char *p);
 bufsize_t _scan_autolink_email(const unsigned char *p);
 bufsize_t _scan_html_tag(const unsigned char *p);
 bufsize_t _scan_liberal_html_tag(const unsigned char *p);
+bufsize_t _scan_html_comment(const unsigned char *p);
+bufsize_t _scan_html_pi(const unsigned char *p);
+bufsize_t _scan_html_declaration(const unsigned char *p);
+bufsize_t _scan_html_cdata(const unsigned char *p);
 bufsize_t _scan_html_block_start(const unsigned char *p);
 bufsize_t _scan_html_block_start_7(const unsigned char *p);
 bufsize_t _scan_html_block_end_1(const unsigned char *p);
@@ -37,6 +41,10 @@ bufsize_t _scan_footnote_definition(const unsigned char *p);
 #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
 #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
 #define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n)
+#define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n)
+#define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n)
+#define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n)
+#define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n)
 #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
 #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
 #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -41,7 +41,6 @@ typedef enum {

 typedef struct bracket {
  struct bracket *previous;
-  struct delimiter *previous_delimiter;
  cmark_node *inl_text;
  bufsize_t position;
  bracket_type type;
@@ -50,9 +49,15 @@ typedef struct bracket {
  bool in_bracket[4];
 } bracket;

+#define FLAG_SKIP_HTML_CDATA        (1u << 0)
+#define FLAG_SKIP_HTML_DECLARATION  (1u << 1)
+#define FLAG_SKIP_HTML_PI           (1u << 2)
+#define FLAG_SKIP_HTML_COMMENT      (1u << 3)
+
 typedef struct subject{
  cmark_mem *mem;
  cmark_chunk input;
+  unsigned flags;
  int line;
  bufsize_t pos;
  int block_offset;
@@ -62,6 +67,7 @@ typedef struct subject{
  bracket *last_bracket;
  bufsize_t backticks[MAXBACKTICKS + 1];
  bool scanned_for_backticks;
+  bool no_link_openers;
 } subject;

 void cmark_set_default_skip_chars(int8_t **skip_chars, bool use_memcpy) {
@@ -122,6 +128,24 @@ static cmark_node *make_str_with_entities(subject *subj,
  }
 }

+// Like cmark_node_append_child but without costly sanity checks.
+// Assumes that child was newly created.
+static void append_child(cmark_node *node, cmark_node *child) {
+  cmark_node *old_last_child = node->last_child;
+
+  child->next = NULL;
+  child->prev = old_last_child;
+  child->parent = node;
+  node->last_child = child;
+
+  if (old_last_child) {
+    old_last_child->next = child;
+  } else {
+    // Also set first_child if node previously had no children.
+    node->first_child = child;
+  }
+}
+
 // Duplicate a chunk by creating a copy of the buffer not by reusing the
 // buffer like cmark_chunk_dup does.
 static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
@@ -165,7 +189,7 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj,
  link->start_line = link->end_line = subj->line;
  link->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
  link->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
-  cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
+  append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
  return link;
 }

@@ -174,6 +198,7 @@ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset,
  int i;
  e->mem = mem;
  e->input = *chunk;
+  e->flags = 0;
  e->line = line_number;
  e->pos = 0;
  e->block_offset = block_offset;
@@ -185,6 +210,7 @@ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset,
    e->backticks[i] = 0;
  }
  e->scanned_for_backticks = false;
+  e->no_link_openers = true;
 }

 static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
@@ -520,6 +546,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
  delim->can_open = can_open;
  delim->can_close = can_close;
  delim->inl_text = inl_text;
+  delim->position = subj->pos;
  delim->length = inl_text->as.literal.len;
  delim->previous = subj->last_delim;
  delim->next = NULL;
@@ -539,11 +566,13 @@ static void push_bracket(subject *subj, bracket_type type, cmark_node *inl_text)
  b->active = true;
  b->inl_text = inl_text;
  b->previous = subj->last_bracket;
-  b->previous_delimiter = subj->last_delim;
  b->position = subj->pos;
  b->bracket_after = false;
  b->in_bracket[type] = true;
  subj->last_bracket = b;
+  if (type != IMAGE) {
+    subj->no_link_openers = false;
+  }
 }

 // Assumes the subject has a c at the current position.
@@ -650,12 +679,13 @@ static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *pars
  return NULL;
 }

-static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *stack_bottom) {
-  delimiter *closer = subj->last_delim;
+static void process_emphasis(cmark_parser *parser, subject *subj, bufsize_t stack_bottom) {
+  delimiter *candidate;
+  delimiter *closer = NULL;
  delimiter *opener;
  delimiter *old_closer;
  bool opener_found;
-  delimiter *openers_bottom[3][128];
+  bufsize_t openers_bottom[3][128];
  int i;

  // initialize openers_bottom:
@@ -668,8 +698,10 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
  }

  // move back to first relevant delim.
-  while (closer != NULL && closer->previous != stack_bottom) {
-    closer = closer->previous;
+  candidate = subj->last_delim;
+  while (candidate != NULL && candidate->position >= stack_bottom) {
+    closer = candidate;
+    candidate = candidate->previous;
  }

  // now move forward, looking for closers, and handling each
@@ -679,8 +711,8 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
      // Now look backwards for first matching opener:
      opener = closer->previous;
      opener_found = false;
-      while (opener != NULL && opener != stack_bottom &&
-             opener != openers_bottom[closer->length % 3][closer->delim_char]) {
+      while (opener != NULL && opener->position >= stack_bottom &&
+             opener->position >= openers_bottom[closer->length % 3][closer->delim_char]) {
        if (opener->can_open && opener->delim_char == closer->delim_char) {
          // interior closer of size 2 can't match opener of size 1
          // or of size 1 can't match 2
@@ -706,27 +738,29 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
        } else {
          closer = closer->next;
        }
-      } else if (closer->delim_char == '\'') {
+      } else if (closer->delim_char == '\'' || closer->delim_char == '"') {
        cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
-        closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
-        if (opener_found) {
-          cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
-          opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
+        if (closer->delim_char == '\'') {
+          closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
+        } else {
+          closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
        }
        closer = closer->next;
-      } else if (closer->delim_char == '"') {
-        cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
-        closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
        if (opener_found) {
          cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
-          opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
+          if (old_closer->delim_char == '\'') {
+            opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
+          } else {
+            opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
+          }
+          remove_delimiter(subj, opener);
+          remove_delimiter(subj, old_closer);
        }
-        closer = closer->next;
      }
      if (!opener_found) {
        // set lower bound for future searches for openers
        openers_bottom[old_closer->length % 3][old_closer->delim_char] =
-		old_closer->previous;
+                old_closer->position;
        if (!old_closer->can_open) {
          // we can remove a closer that can't be an
          // opener, once we've seen there's no
@@ -739,7 +773,8 @@ static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *sta
    }
  }
  // free all delimiters in list until stack_bottom:
-  while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
+  while (subj->last_delim != NULL &&
+         subj->last_delim->position >= stack_bottom) {
    remove_delimiter(subj, subj->last_delim);
  }
 }
@@ -778,7 +813,8 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
  tmp = opener_inl->next;
  while (tmp && tmp != closer_inl) {
    tmpnext = tmp->next;
-    cmark_node_append_child(emph, tmp);
+    cmark_node_unlink(tmp);
+    append_child(emph, tmp);
    tmp = tmpnext;
  }
  cmark_node_insert_after(opener_inl, emph);
@@ -915,7 +951,63 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) {
  }

  // finally, try to match an html tag
-  matchlen = scan_html_tag(&subj->input, subj->pos);
+  if (subj->pos + 2 <= subj->input.len) {
+    int c = subj->input.data[subj->pos];
+    if (c == '!' && (subj->flags & FLAG_SKIP_HTML_COMMENT) == 0) {
+      c = subj->input.data[subj->pos+1];
+      if (c == '-' && subj->input.data[subj->pos+2] == '-') {
+        if (subj->input.data[subj->pos+3] == '>') {
+          matchlen = 4;
+        } else if (subj->input.data[subj->pos+3] == '-' &&
+                   subj->input.data[subj->pos+4] == '>') {
+          matchlen = 5;
+        } else {
+          matchlen = scan_html_comment(&subj->input, subj->pos + 1);
+          if (matchlen > 0) {
+            matchlen += 1; // prefix "<"
+          } else { // no match through end of input: set a flag so
+                   // we don't reparse looking for -->:
+            subj->flags |= FLAG_SKIP_HTML_COMMENT;
+          }
+        }
+      } else if (c == '[') {
+        if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) {
+          matchlen = scan_html_cdata(&subj->input, subj->pos + 2);
+          if (matchlen > 0) {
+            // The regex doesn't require the final "]]>". But if we're not at
+            // the end of input, it must come after the match. Otherwise,
+            // disable subsequent scans to avoid quadratic behavior.
+            matchlen += 5; // prefix "![", suffix "]]>"
+            if (subj->pos + matchlen > subj->input.len) {
+              subj->flags |= FLAG_SKIP_HTML_CDATA;
+              matchlen = 0;
+            }
+          }
+        }
+      } else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) {
+        matchlen = scan_html_declaration(&subj->input, subj->pos + 1);
+        if (matchlen > 0) {
+          matchlen += 2; // prefix "!", suffix ">"
+          if (subj->pos + matchlen > subj->input.len) {
+            subj->flags |= FLAG_SKIP_HTML_DECLARATION;
+            matchlen = 0;
+          }
+        }
+      }
+    } else if (c == '?') {
+      if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) {
+        // Note that we allow an empty match.
+        matchlen = scan_html_pi(&subj->input, subj->pos + 1);
+        matchlen += 3; // prefix "?", suffix "?>"
+        if (subj->pos + matchlen > subj->input.len) {
+          subj->flags |= FLAG_SKIP_HTML_PI;
+          matchlen = 0;
+        }
+      }
+    } else {
+      matchlen = scan_html_tag(&subj->input, subj->pos);
+    }
+  }
  if (matchlen > 0) {
    contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
    subj->pos += matchlen;
@@ -1170,7 +1262,7 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject
  // Free the bracket ^[:
  cmark_node_free(opener->inl_text);

-  process_emphasis(parser, subj, opener->previous_delimiter);
+  process_emphasis(parser, subj, opener->position);
  pop_bracket(subj);

  return NULL;
@@ -1201,12 +1293,6 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
  }

-  if (!opener->active) {
-    // take delimiter off stack
-    pop_bracket(subj);
-    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
-  }
-
  if (opener->type == ATTRIBUTE) {
    return handle_close_bracket_attribute(parser, subj, opener);
  }
@@ -1215,6 +1301,12 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
  // Now we check to see if it's a link/image.
  is_image = opener->type == IMAGE;

+  if (!is_image && subj->no_link_openers) {
+    // take delimiter off stack
+    pop_bracket(subj);
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
+  }
+
  after_link_text_pos = subj->pos;

  // First, look for an inline link.
@@ -1333,7 +1425,7 @@ noMatch:
      // being replacing the opening '[' text node with a `^footnote-ref]` node.
      cmark_node_insert_before(opener->inl_text, fnref);

-      process_emphasis(parser, subj, opener->previous_delimiter);
+      process_emphasis(parser, subj, opener->position);
      // sometimes, the footnote reference text gets parsed into multiple nodes
      // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
      // this happens for ex with the autolink extension. when the autolinker
@@ -1379,42 +1471,22 @@ match:
  tmp = opener->inl_text->next;
  while (tmp) {
    tmpnext = tmp->next;
-    cmark_node_append_child(inl, tmp);
+    cmark_node_unlink(tmp);
+    append_child(inl, tmp);
    tmp = tmpnext;
  }

  // Free the bracket [:
  cmark_node_free(opener->inl_text);

-  process_emphasis(parser, subj, opener->previous_delimiter);
+  process_emphasis(parser, subj, opener->position);
  pop_bracket(subj);

-  // Now, if we have a link, we also want to deactivate earlier link
-  // delimiters. (This code can be removed if we decide to allow links
+  // Now, if we have a link, we also want to deactivate links until
+  // we get a new opener. (This code can be removed if we decide to allow links
  // inside links.)
  if (!is_image) {
-    opener = subj->last_bracket;
-    while (opener != NULL) {
-      if (opener->type == LINK) {
-        if (!opener->active) {
-          break;
-        } else {
-          opener->active = false;
-        }
-      }
-      opener = opener->previous;
-    }
-    bool in_image = false;
-    if (opener) {
-      in_image = opener->in_bracket[IMAGE];
-    }
-    bracket *opener2 = subj->last_bracket;
-    while (opener2 != opener) {
-      if (opener2->type == IMAGE) {
-        opener2->in_bracket[IMAGE] = in_image;
-      }
-      opener2 = opener2->previous;
-    }
+    subj->no_link_openers = true;
  }

  return NULL;
@@ -1623,7 +1695,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
  }

  if (new_inl != NULL) {
-    cmark_node_append_child(parent, new_inl);
+    append_child(parent, new_inl);
  }

  return 1;
@@ -1643,7 +1715,7 @@ void cmark_parse_inlines(cmark_parser *parser,
  while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
    ;

-  process_emphasis(parser, &subj, NULL);
+  process_emphasis(parser, &subj, 0);
  // free bracket and delim stack
  while (subj.last_delim) {
    remove_delimiter(&subj, subj.last_delim);
--- a/src/map.c
+++ b/src/map.c
@@ -51,7 +51,7 @@ refsearch(const void *label, const void *p2) {
 }

 static void sort_map(cmark_map *map) {
-  unsigned int i = 0, last = 0, size = map->size;
+  size_t i = 0, last = 0, size = map->size;
  cmark_map_entry *r = map->refs, **sorted = NULL;

  sorted = (cmark_map_entry **)map->mem->calloc(size, sizeof(cmark_map_entry *));
@@ -73,6 +73,7 @@ static void sort_map(cmark_map *map) {

 cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) {
  cmark_map_entry **ref = NULL;
+  cmark_map_entry *r = NULL;
  unsigned char *norm;

  if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
@@ -91,10 +92,15 @@ cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) {
  ref = (cmark_map_entry **)bsearch(norm, map->sorted, map->size, sizeof(cmark_map_entry *), refsearch);
  map->mem->free(norm);

-  if (!ref)
-    return NULL;
+  if (ref != NULL) {
+    r = ref[0];
+    /* Check for expansion limit */
+    if (r->size > map->max_ref_size - map->ref_size)
+      return NULL;
+    map->ref_size += r->size;
+  }

-  return ref[0];
+  return r;
 }

 void cmark_map_free(cmark_map *map) {
@@ -118,5 +124,6 @@ cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free) {
  cmark_map *map = (cmark_map *)mem->calloc(1, sizeof(cmark_map));
  map->mem = mem;
  map->free = free;
+  map->max_ref_size = UINT_MAX;
  return map;
 }
--- a/src/node.c
+++ b/src/node.c
@@ -9,6 +9,40 @@ static void S_node_unlink(cmark_node *node);

 #define NODE_MEM(node) cmark_node_mem(node)

+cmark_node__internal_flags CMARK_NODE__OPEN;
+cmark_node__internal_flags CMARK_NODE__LAST_LINE_BLANK;
+cmark_node__internal_flags CMARK_NODE__LAST_LINE_CHECKED;
+
+void cmark_register_node_flag(cmark_node__internal_flags *flags) {
+  static uint8_t shift = 0;
+
+  // flags should be a pointer to a global variable and this function
+  // should only be called once to initialize its value.
+  if (*flags) {
+    fprintf(stderr, "flag initialization error in cmark_register_node_flag\n");
+    abort();
+  }
+
+  // Check that we haven't run out of bits.
+  if (shift >= 8 * sizeof(cmark_node__internal_flags)) {
+    fprintf(stderr, "too many flags in cmark_register_node_flag\n");
+    abort();
+  }
+
+  *flags = (cmark_node__internal_flags)1 << shift;
+  shift++;
+}
+
+void cmark_init_standard_node_flags() {
+  static int initialized = 0;
+  if (!initialized) {
+    initialized = 1;
+    cmark_register_node_flag(&CMARK_NODE__OPEN);
+    cmark_register_node_flag(&CMARK_NODE__LAST_LINE_BLANK);
+    cmark_register_node_flag(&CMARK_NODE__LAST_LINE_CHECKED);
+  }
+}
+
 bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) {
  if (child_type == CMARK_NODE_DOCUMENT) {
      return false;
--- a/src/references.c
+++ b/src/references.c
@@ -35,6 +35,7 @@ void cmark_reference_create(cmark_map *map, cmark_chunk *label,
  ref->attributes = cmark_chunk_literal("");
  ref->entry.age = map->size;
  ref->entry.next = map->refs;
+  ref->entry.size = ref->url.len + ref->title.len;

  map->refs = (cmark_map_entry *)ref;
  map->size++;
--- a/src/scanners.c
+++ b/src/scanners.c
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -37,7 +37,7 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,

  tagname = [A-Za-z][A-Za-z0-9-]*;

-  blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';
+  blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';

  attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;

@@ -54,16 +54,15 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
  opentag = tagname attribute* spacechar* [/]? [>];
  closetag = [/] tagname spacechar* [>];

-  htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->");
+  htmlcomment = "--" ([^\x00-]+ | "-" [^\x00-] | "--" [^\x00>])* "-->";

-  processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>";
+  processinginstruction = ([^?>\x00]+ | [?][^>\x00] | [>])+;

-  declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
+  declaration = [A-Z]+ spacechar+ [^>\x00]*;

-  cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>";
+  cdata = "CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])*;

-  htmltag = opentag | closetag | htmlcomment | processinginstruction |
-            declaration | cdata;
+  htmltag = opentag | closetag;

  in_parens_nosp   = [(] (reg_char|escaped_char|[\\])* [)];

@@ -133,6 +132,46 @@ bufsize_t _scan_liberal_html_tag(const unsigned char *p)
 */
 }

+bufsize_t _scan_html_comment(const unsigned char *p)
+{
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+/*!re2c
+  htmlcomment { return (bufsize_t)(p - start); }
+  * { return 0; }
+*/
+}
+
+bufsize_t _scan_html_pi(const unsigned char *p)
+{
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+/*!re2c
+  processinginstruction { return (bufsize_t)(p - start); }
+  * { return 0; }
+*/
+}
+
+bufsize_t _scan_html_declaration(const unsigned char *p)
+{
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+/*!re2c
+  declaration { return (bufsize_t)(p - start); }
+  * { return 0; }
+*/
+}
+
+bufsize_t _scan_html_cdata(const unsigned char *p)
+{
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+/*!re2c
+  cdata { return (bufsize_t)(p - start); }
+  * { return 0; }
+*/
+}
+
 // Try to match an HTML block tag start line, returning
 // an integer code for the type of block (1-6, matching the spec).
 // #7 is handled by a separate function, below.
@@ -140,7 +179,7 @@ bufsize_t _scan_html_block_start(const unsigned char *p)
 {
  const unsigned char *marker = NULL;
 /*!re2c
-  [<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; }
+  [<] ('script'|'pre'|'textarea'|'style') (spacechar | [>]) { return 1; }
  '<!--' { return 2; }
  '<?' { return 3; }
  '<!' [A-Z] { return 4; }
@@ -167,7 +206,7 @@ bufsize_t _scan_html_block_end_1(const unsigned char *p)
  const unsigned char *marker = NULL;
  const unsigned char *start = p;
 /*!re2c
-  [^\n\x00]* [<] [/] ('script'|'pre'|'style') [>] { return (bufsize_t)(p - start); }
+  [^\n\x00]* [<] [/] ('script'|'pre'|'textarea'|'style') [>] { return (bufsize_t)(p - start); }
  * { return 0; }
 */
 }
--- a/test/cmark.py
+++ b/test/cmark.py
@@ -13,6 +13,7 @@ def pipe_through_prog(prog, text):

 def parse(lib, extlib, text, extensions):
    cmark_gfm_core_extensions_ensure_registered = extlib.cmark_gfm_core_extensions_ensure_registered
+    cmark_init_standard_node_flags = lib.cmark_init_standard_node_flags

    find_syntax_extension = lib.cmark_find_syntax_extension
    find_syntax_extension.restype = c_void_p
@@ -32,6 +33,7 @@ def parse(lib, extlib, text, extensions):
    parser_finish.restype = c_void_p
    parser_finish.argtypes = [c_void_p]

+    cmark_init_standard_node_flags()
    cmark_gfm_core_extensions_ensure_registered()

    parser = parser_new(0)
--- a/test/extensions.txt
+++ b/test/extensions.txt
@@ -581,6 +581,12 @@ www.github.com www.github.com/á

 www.google.com/a_b

+Underscores not allowed in host name www.xxx.yyy._zzz
+
+Underscores not allowed in host name www.xxx._yyy.zzz
+
+Underscores allowed in domain name www._xxx.yyy.zzz
+
 **Autolink and http://inlines**

 ![http://inline.com/image](http://inline.com/image)
@@ -618,6 +624,9 @@ http://🍄.ga/ http://x🍄.ga/
 <p>Email me at:<a href="mailto:scyther@pokemon.com">scyther@pokemon.com</a></p>
 <p><a href="http://www.github.com">www.github.com</a> <a href="http://www.github.com/%C3%A1">www.github.com/á</a></p>
 <p><a href="http://www.google.com/a_b">www.google.com/a_b</a></p>
+<p>Underscores not allowed in host name www.xxx.yyy._zzz</p>
+<p>Underscores not allowed in host name www.xxx._yyy.zzz</p>
+<p>Underscores allowed in domain name <a href="http://www._xxx.yyy.zzz">www._xxx.yyy.zzz</a></p>
 <p><strong>Autolink and <a href="http://inlines">http://inlines</a></strong></p>
 <p><img src="http://inline.com/image" alt="http://inline.com/image" /></p>
 <p><a href="mailto:a.w@b.c">a.w@b.c</a></p>
--- a/test/pathological_tests.py
+++ b/test/pathological_tests.py
@@ -63,6 +63,9 @@ pathological = {
    "pattern [ (]( repeated":
                 (("[ (](" * 80000),
                  re.compile("(\[ \(\]\(){80000}")),
+    "pattern ![[]() repeated":
+                 ("![[]()" * 160000,
+                  re.compile("(!\[<a href=\"\"></a>){160000}")),
    "hard link/emph case":
                 ("**x [a*b**c*](d)",
                  re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
@@ -87,6 +90,9 @@ pathological = {
    "unclosed links B":
                 ("[a](b" * 30000,
                  re.compile("(\[a\]\(b){30000}")),
+    "unclosed <!--":
+                 ("</" + "<!--" * 300000,
+                  re.compile("\&lt;\/(\&lt;!--){300000}")),
    "tables":
                 ("aaa\rbbb\n-\v\n" * 30000,
                  re.compile("^<p>aaa</p>\n<table>\n<thead>\n<tr>\n<th>bbb</th>\n</tr>\n</thead>\n<tbody>\n(<tr>\n<td>aaa</td>\n</tr>\n<tr>\n<td>bbb</td>\n</tr>\n<tr>\n<td>-\x0b</td>\n</tr>\n){29999}</tbody>\n</table>\n$")),
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -366,3 +366,11 @@ Hello world
 .
 <p>Hello world</p>
 ````````````````````````````````
+
+Issue #424 - emphasis before links
+
+```````````````````````````````` example
+*text* [link](#section)
+.
+<p><em>text</em> <a href="#section">link</a></p>
+````````````````````````````````
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -130,7 +130,7 @@ questions it does not answer:
    not require that.  This is hardly a "corner case," and divergences
    between implementations on this issue often lead to surprises for
    users in real documents. (See [this comment by John
-    Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+    Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).)

 2.  Is a blank line needed before a block quote or heading?
    Most implementations do not require the blank line.  However,
@@ -138,7 +138,7 @@ questions it does not answer:
    also to ambiguities in parsing (note that some implementations
    put the heading inside the blockquote, while others do not).
    (John Gruber has also spoken [in favor of requiring the blank
-    lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+    lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).)

 3.  Is a blank line needed before an indented code block?
    (`Markdown.pl` requires it, but this is not mentioned in the
@@ -171,7 +171,7 @@ questions it does not answer:
    ```

    (There are some relevant comments by John Gruber
-    [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+    [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).)

 5.  Can list markers be indented?  Can ordered list markers be right-aligned?

@@ -1001,10 +1001,7 @@ interpretable as a [code fence], [ATX heading][ATX headings],

 A [setext heading underline](@) is a sequence of
 `=` characters or a sequence of `-` characters, with no more than 3
-spaces indentation and any number of trailing spaces.  If a line
-containing a single `-` can be interpreted as an
-empty [list items], it should be interpreted this way
-and not as a [setext heading underline].
+spaces of indentation and any number of trailing spaces or tabs.

 The heading is a level 1 heading if `=` characters are used in
 the [setext heading underline], and a level 2 heading if `-`
@@ -1638,7 +1635,7 @@ has been found, the code block contains all of the lines after the
 opening code fence until the end of the containing block (or
 document).  (An alternative spec would require backtracking in the
 event that a closing code fence is not found.  But this makes parsing
-much less efficient, and there seems to be no real down side to the
+much less efficient, and there seems to be no real downside to the
 behavior described here.)

 A fenced code block may interrupt a paragraph, and does not require
@@ -2068,7 +2065,7 @@ followed by an uppercase ASCII letter.\
 `<![CDATA[`.\
 **End condition:** line contains the string `]]>`.

-6.  **Start condition:** line begins the string `<` or `</`
+6.  **Start condition:** line begins with the string `<` or `</`
 followed by one of the strings (case-insensitive) `address`,
 `article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
 `caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
@@ -5279,7 +5276,7 @@ well.  ([reStructuredText](http://docutils.sourceforge.net/rst.html)
 takes a different approach, requiring blank lines before lists
 even inside other list items.)

-In order to solve of unwanted lists in paragraphs with
+In order to solve the problem of unwanted lists in paragraphs with
 hard-wrapped numerals, we allow only lists starting with `1` to
 interrupt paragraphs.  Thus,

@@ -9410,10 +9407,9 @@ character, and a `>` character.
 A [closing tag](@) consists of the string `</`, a
 [tag name], optional [whitespace], and the character `>`.

-An [HTML comment](@) consists of `<!--` + *text* + `-->`,
-where *text* does not start with `>` or `->`, does not end with `-`,
-and does not contain `--`.  (See the
-[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).)
+An [HTML comment](@) consists of `<!-->`, `<!--->`, or  `<!--`, a string of
+characters not including the string `-->`, and `-->` (see the
+[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)).

 A [processing instruction](@)
 consists of the string `<?`, a string
@@ -9554,30 +9550,20 @@ Illegal attributes in closing tag:
 Comments:

 ```````````````````````````````` example
-foo <!-- this is a
-comment - with hyphen -->
+foo <!-- this is a --
+comment - with hyphens -->
 .
-<p>foo <!-- this is a
-comment - with hyphen --></p>
+<p>foo <!-- this is a --
+comment - with hyphens --></p>
 ````````````````````````````````

-
-```````````````````````````````` example
-foo <!-- not a comment -- two hyphens -->
-.
-<p>foo &lt;!-- not a comment -- two hyphens --&gt;</p>
-````````````````````````````````
-
-
-Not comments:
-
 ```````````````````````````````` example
 foo <!--> foo -->

-foo <!-- foo--->
+foo <!---> foo -->
 .
-<p>foo &lt;!--&gt; foo --&gt;</p>
-<p>foo &lt;!-- foo---&gt;</p>
+<p>foo <!--> foo --&gt;</p>
+<p>foo <!---> foo --&gt;</p>
 ````````````````````````````````


--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \
  wget \
  clang \
  man \
-  clang-format-3.5 \
+  clang-format \
  && apt-get clean

 RUN wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz && \