diff --git a/api_test/main.c b/api_test/main.c index 24619295..62006eaa 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -1058,6 +1058,53 @@ static void source_pos(test_batch_runner *runner) { cmark_node_free(doc); } +static void source_pos_inlines(test_batch_runner *runner) { + { + static const char markdown[] = + "*first*\n" + "second\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " first\n" + " \n" + " \n" + " second\n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } + { + static const char markdown[] = + "*first\n" + "second*\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " first\n" + " \n" + " second\n" + " \n" + " \n" + "\n", + "sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } +} + static void ref_source_pos(test_batch_runner *runner) { static const char markdown[] = "Let's try [reference] links.\n" @@ -1110,6 +1157,7 @@ int main() { test_feed_across_line_ending(runner); test_pathological_regressions(runner); source_pos(runner); + source_pos_inlines(runner); ref_source_pos(runner); test_print_summary(runner); diff --git a/src/commonmark.c b/src/commonmark.c index 2aaaad78..335c005e 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -172,6 +172,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, int i; bool entering = (ev_type == CMARK_EVENT_ENTER); const char *info, *code, *title; + char fencechar[2] = {'\0', '\0'}; size_t info_len, code_len; char listmarker[LISTMARKER_SIZE]; char *emph_delim; @@ -284,6 +285,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } info = cmark_node_get_fence_info(node); info_len = strlen(info); + fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; code = cmark_node_get_literal(node); code_len = strlen(code); // use indented form if no info, and code doesn't @@ -303,7 +305,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, numticks = 3; } for (i = 0; i < numticks; i++) { - LIT("`"); + LIT(fencechar); } LIT(" "); OUT(info, false, LITERAL); @@ -311,7 +313,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, OUT(cmark_node_get_literal(node), false, LITERAL); CR(); for (i = 0; i < numticks; i++) { - LIT("`"); + LIT(fencechar); } } BLANKLINE(); diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c index 8c38d2fb..16938919 100644 --- a/src/houdini_href_e.c +++ b/src/houdini_href_e.c @@ -15,7 +15,7 @@ * - The characters which are *not* safe to be in * an URL because they are RESERVED characters. * - * We asume (lazily) that any RESERVED char that + * We assume (lazily) that any RESERVED char that * appears inside an URL is actually meant to * have its native function (i.e. as an URL * component/separator) and hence needs no escaping. diff --git a/src/inlines.c b/src/inlines.c index c8dd1e9f..63caeebc 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -760,9 +760,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, } cmark_node_insert_after(opener_inl, emph); - emph->start_line = emph->end_line = subj->line; - emph->start_column = opener_inl->start_column + subj->column_offset; - emph->end_column = closer_inl->end_column + subj->column_offset; + emph->start_line = opener_inl->start_line; + emph->end_line = closer_inl->end_line; + emph->start_column = opener_inl->start_column; + emph->end_column = closer_inl->end_column; // if opener has 0 characters, remove it and its associated inline if (opener_num_chars == 0) { diff --git a/src/main.c b/src/main.c index 6ef6f9ea..63780d81 100644 --- a/src/main.c +++ b/src/main.c @@ -21,6 +21,14 @@ # endif #endif +#if defined(__OpenBSD__) +# include +# if OpenBSD >= 201605 +# define USE_PLEDGE +# include +# endif +#endif + #if defined(_WIN32) && !defined(__CYGWIN__) #include #include @@ -134,6 +142,13 @@ int main(int argc, char *argv[]) { cmark_gfm_core_extensions_ensure_registered(); +#ifdef USE_PLEDGE + if (pledge("stdio rpath", NULL) != 0) { + perror("pledge"); + return 1; + } +#endif + #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); _setmode(_fileno(stdout), _O_BINARY); diff --git a/src/render.c b/src/render.c index 958c046f..df46d451 100644 --- a/src/render.c +++ b/src/render.c @@ -57,6 +57,7 @@ static void S_out(cmark_renderer *renderer, cmark_node *node, } } renderer->column = 0; + renderer->last_breakable = 0; renderer->begin_line = true; renderer->begin_content = true; renderer->need_cr -= 1; diff --git a/src/scanners.c b/src/scanners.c index 5a426482..765b863e 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,4 +1,4 @@ -/* Generated by re2c 1.0.3 */ +/* Generated by re2c 1.1.1 */ #include #include "chunk.h" #include "scanners.h" @@ -9226,7 +9226,7 @@ bufsize_t _scan_open_code_fence(const unsigned char *p) 144, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 96, 192, + 192, 192, 192, 192, 192, 192, 224, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/src/scanners.re b/src/scanners.re index 0b217868..550374e7 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -286,7 +286,7 @@ bufsize_t _scan_open_code_fence(const unsigned char *p) const unsigned char *start = p; /*!re2c [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } - [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [^\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } * { return 0; } */ } diff --git a/test/spec.txt b/test/spec.txt index 62502c8a..66ada68c 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -1600,8 +1600,8 @@ begins with a code fence, indented no more than three spaces. The line with the opening code fence may optionally contain some text following the code fence; this is trimmed of leading and trailing -whitespace and called the [info string](@). -The [info string] may not contain any backtick +whitespace and called the [info string](@). If the [info string] comes +after a backtick fence, it may not contain any backtick characters. (The reason for this restriction is that otherwise some inline code would be incorrectly interpreted as the beginning of a fenced code block.) @@ -1989,6 +1989,18 @@ foo

```````````````````````````````` +[Info strings] for tilde code blocks can contain backticks and tildes: + +```````````````````````````````` example +~~~ aa ``` ~~~ +foo +~~~ +. +
foo
+
+```````````````````````````````` + + Closing code fences cannot have [info strings]: ```````````````````````````````` example diff --git a/wrappers/wrapper.rkt b/wrappers/wrapper.rkt index d9b34e84..1ecce17f 100644 --- a/wrappers/wrapper.rkt +++ b/wrappers/wrapper.rkt @@ -13,19 +13,34 @@ (define-ffi-definer defcmark (ffi-lib "libcmark")) (define _cmark_node_type - (_enum '(none + (_enum '(;; Error status + none ;; Block document block-quote list item code-block - html paragraph header hrule + html-block custom-block + paragraph heading thematic-break + ;; ?? first-block = document + ;; ?? last-block = thematic-break ;; Inline - text softbreak linebreak code inline-html - emph strong link image))) + text softbreak linebreak code html-inline custom-inline + emph strong link image + ;; ?? first-inline = text + ;; ?? last-inline = image + ))) (define _cmark_list_type (_enum '(no_list bullet_list ordered_list))) (define _cmark_delim_type (_enum '(no_delim period_delim paren_delim))) (define _cmark_opts - (_bitmask '(sourcepos = 1 hardbreaks = 2 normalize = 4 smart = 8))) + (_bitmask + '(sourcepos = 2 ; include sourcepos attribute on block elements + hardbreaks = 4 ; render `softbreak` elements as hard line breaks + safe = 8 ; suppress raw HTML and unsafe links + nobreaks = 16 ; render `softbreak` elements as spaces + normalize = 256 ; legacy (no effect) + validate-utf8 = 512 ; validate UTF-8 in the input + smart = 1024 ; straight quotes to curly, ---/-- to em/en dashes + ))) (define-cpointer-type _node) @@ -56,8 +71,8 @@ (defcmark cmark_node_get_type_string (_fun _node -> _bytes)) (defcmark cmark_node_get_literal (_fun _node -> _string)) (defcmark cmark_node_set_literal (_fun _node _string -> _bool)) - (defcmark cmark_node_get_header_level (_fun _node -> _int)) - (defcmark cmark_node_set_header_level (_fun _node _int -> _bool)) + (defcmark cmark_node_get_heading_level (_fun _node -> _int)) + (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool)) (defcmark cmark_node_get_list_type (_fun _node -> _cmark_list_type)) (defcmark cmark_node_set_list_type (_fun _node _cmark_list_type -> _bool)) (defcmark cmark_node_get_list_delim (_fun _node -> _cmark_delim_type)) @@ -84,6 +99,9 @@ (defcmark cmark_node_append_child (_fun _node _node -> _bool)) (defcmark cmark_consolidate_text_nodes (_fun _node -> _void)) + (defcmark cmark_version (_fun -> _int)) + (defcmark cmark_version_string (_fun -> _string)) + ) ;; Rackety interface @@ -108,7 +126,7 @@ (define-syntax-rule (define-getters+setters name [type field ...] ...) (define name (list (list 'type (make-getter+setter field) ...) ...))) (define-getters+setters getters+setters - [header header_level] [code-block fence_info] + [heading heading_level] [code-block fence_info] [link url title] [image url title] [list list_type list_delim list_start list_tight]) @@ -126,12 +144,12 @@ [else '()])) (define (assert-no what-not b) (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type))) - (cond [(memq type '(document paragraph header block-quote list item + (cond [(memq type '(document paragraph heading block-quote list item emph strong link image)) (assert-no 'text text) (list type info children)] - [(memq type '(text code code-block html inline-html - softbreak linebreak hrule)) + [(memq type '(text code code-block html-block html-inline + softbreak linebreak thematic-break)) (assert-no 'children (pair? children)) (list type info text)] [else (error 'cmark->sexpr "unknown type: ~s" type)]))