Merge pull request #211 from biojppm/events

[fix] accept tabs as tokens after : and -
This commit is contained in:
jpmag
2022-02-10 03:29:08 +01:00
committed by GitHub
12 changed files with 513 additions and 213 deletions

View File

@@ -42,7 +42,55 @@ jobs:
- {std: 20, cxx: clang++-10 , bt: Release, os: ubuntu-18.04, bitlinks: shared64 static32}
- {std: 11, cxx: clang++-6.0, bt: Debug , os: ubuntu-18.04, bitlinks: shared64 static32}
- {std: 11, cxx: clang++-6.0, bt: Release, os: ubuntu-18.04, bitlinks: shared64 static32}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}",
CMAKE_FLAGS: "${{matrix.cmkflags}}",
VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}
- {name: install requirements, run: source .github/reqs.sh && c4_install_test_requirements $OS}
- {name: show info, run: source .github/setenv.sh && c4_show_info}
- name: shared64-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test shared64
- {name: shared64-build, run: source .github/setenv.sh && c4_build_test shared64}
- {name: shared64-run, run: source .github/setenv.sh && c4_run_test shared64}
- {name: shared64-pack, run: source .github/setenv.sh && c4_package shared64}
- name: static64-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test static64
- {name: static64-build, run: source .github/setenv.sh && c4_build_test static64}
- {name: static64-run, run: source .github/setenv.sh && c4_run_test static64}
- {name: static64-pack, run: source .github/setenv.sh && c4_package static64}
- name: static32-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test static32
- {name: static32-build, run: source .github/setenv.sh && c4_build_test static32}
- {name: static32-run, run: source .github/setenv.sh && c4_run_test static32}
- {name: static32-pack, run: source .github/setenv.sh && c4_package static32}
- name: shared32-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test shared32
- {name: shared32-build, run: source .github/setenv.sh && c4_build_test shared32}
- {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32}
- {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32}
clang_canary_tabtokens:
name: tabtokens/${{matrix.cxx}}/canary/c++${{matrix.std}}/${{matrix.bt}}
if: |
(!contains(github.event.head_commit.message, 'skip all')) ||
(!contains(github.event.head_commit.message, 'skip clang')) ||
contains(github.event.head_commit.message, 'only clang')
continue-on-error: true
runs-on: ${{matrix.os}}
strategy:
fail-fast: false
matrix:
include:
- {std: 17, cxx: clang++-10 , bt: Debug , os: ubuntu-18.04, bitlinks: static64, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 17, cxx: clang++-10 , bt: Release, os: ubuntu-18.04, bitlinks: static64, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 20, cxx: clang++-10 , bt: Debug , os: ubuntu-18.04, bitlinks: static64, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 20, cxx: clang++-10 , bt: Release, os: ubuntu-18.04, bitlinks: static64, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 11, cxx: clang++-6.0, bt: Debug , os: ubuntu-18.04, bitlinks: static64, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 11, cxx: clang++-6.0, bt: Release, os: ubuntu-18.04, bitlinks: static64, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}",
CMAKE_FLAGS: "${{matrix.cmkflags}}",
VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}
- {name: install requirements, run: source .github/reqs.sh && c4_install_test_requirements $OS}
@@ -82,21 +130,11 @@ jobs:
fail-fast: false
matrix:
include:
- {std: 11, cxx: clang++-9 , bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-9 , bt: Release, vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-8 , bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-8 , bt: Release, vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-7 , bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-7 , bt: Release, vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-6.0, bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-6.0, bt: Release, vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-5.0, bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-5.0, bt: Release, vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-4.0, bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-4.0, bt: Release, vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-3.9, bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-3.9, bt: Release, vg: on, os: ubuntu-18.04}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
- {std: 11, cxx: clang++-10 , bt: Debug , vg: on, os: ubuntu-18.04}
- {std: 11, cxx: clang++-10 , bt: Release, vg: on, os: ubuntu-18.04}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}",
CMAKE_FLAGS: "${{matrix.cmkflags}}",
VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}
- {name: install requirements, run: source .github/reqs.sh && c4_install_test_requirements $OS}
@@ -137,10 +175,10 @@ jobs:
matrix:
include:
# clang tidy takes a long time, so don't do multiple bits/linktypes
- {std: 11, cxx: clang++-9, bt: Debug , lint: clang-tidy, bitlinks: shared64 static64, os: ubuntu-18.04}
- {std: 11, cxx: clang++-9, bt: Debug , lint: clang-tidy, bitlinks: shared32 static32, os: ubuntu-18.04}
- {std: 11, cxx: clang++-9, bt: ReleaseWithDebInfo, lint: clang-tidy, bitlinks: shared64 static64, os: ubuntu-18.04}
- {std: 11, cxx: clang++-9, bt: ReleaseWithDebInfo, lint: clang-tidy, bitlinks: shared32 static32, os: ubuntu-18.04}
- {std: 11, cxx: clang++-10, bt: Debug , lint: clang-tidy, bitlinks: shared64 static64, os: ubuntu-18.04}
- {std: 11, cxx: clang++-10, bt: Debug , lint: clang-tidy, bitlinks: shared32 static32, os: ubuntu-18.04}
- {std: 11, cxx: clang++-10, bt: ReleaseWithDebInfo, lint: clang-tidy, bitlinks: shared64 static64, os: ubuntu-18.04}
- {std: 11, cxx: clang++-10, bt: ReleaseWithDebInfo, lint: clang-tidy, bitlinks: shared32 static32, os: ubuntu-18.04}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}

View File

@@ -68,6 +68,52 @@ jobs:
- {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32}
- {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32}
gcc_tabtokens:
name: tabtokens/${{matrix.cxx}}/canary/${{matrix.bt}}
if: |
(!contains(github.event.head_commit.message, 'skip all')) ||
(!contains(github.event.head_commit.message, 'skip gcc')) ||
contains(github.event.head_commit.message, 'only gcc')
continue-on-error: true
runs-on: ${{matrix.os}}
strategy:
fail-fast: false
matrix:
include:
- {std: 11, cxx: g++-7 , bt: Debug , os: ubuntu-18.04, bitlinks: shared64 static32, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 11, cxx: g++-7 , bt: Release, os: ubuntu-18.04, bitlinks: shared64 static32, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 20, cxx: g++-10 , bt: Debug , os: ubuntu-18.04, bitlinks: shared64 static32, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 20, cxx: g++-10 , bt: Release, os: ubuntu-18.04, bitlinks: shared64 static32, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 11, cxx: g++-5 , bt: Debug , os: ubuntu-18.04, bitlinks: shared64 static32, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
- {std: 11, cxx: g++-5 , bt: Release, os: ubuntu-18.04, bitlinks: shared64 static32, cmkflags: "-DRYML_WITH_TAB_TOKENS=ON"}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}",
CMAKE_FLAGS: "${{matrix.cmkflags}}",
VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}
- {name: install requirements, run: source .github/reqs.sh && c4_install_test_requirements $OS}
- {name: show info, run: source .github/setenv.sh && c4_show_info}
- name: shared64-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test shared64
- {name: shared64-build, run: source .github/setenv.sh && c4_build_test shared64}
- {name: shared64-run, run: source .github/setenv.sh && c4_run_test shared64}
- {name: shared64-pack, run: source .github/setenv.sh && c4_package shared64}
- name: static64-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test static64
- {name: static64-build, run: source .github/setenv.sh && c4_build_test static64}
- {name: static64-run, run: source .github/setenv.sh && c4_run_test static64}
- {name: static64-pack, run: source .github/setenv.sh && c4_package static64}
- name: static32-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test static32
- {name: static32-build, run: source .github/setenv.sh && c4_build_test static32}
- {name: static32-run, run: source .github/setenv.sh && c4_run_test static32}
- {name: static32-pack, run: source .github/setenv.sh && c4_package static32}
- name: shared32-configure---------------------------------------------------
run: source .github/setenv.sh && c4_cfg_test shared32
- {name: shared32-build, run: source .github/setenv.sh && c4_build_test shared32}
- {name: shared32-run, run: source .github/setenv.sh && c4_run_test shared32}
- {name: shared32-pack, run: source .github/setenv.sh && c4_package shared32}
#----------------------------------------------------------------------------
gcc_extended:
name: ${{matrix.cxx}}/extended/${{matrix.bt}}
@@ -91,17 +137,6 @@ jobs:
- {std: 17, cxx: g++-10, bt: Release, vg: ON, os: ubuntu-18.04}
- {std: 20, cxx: g++-10, bt: Debug , vg: ON, os: ubuntu-18.04}
- {std: 20, cxx: g++-10, bt: Release, vg: ON, os: ubuntu-18.04}
#
- {std: 11, cxx: g++-9, bt: Debug , os: ubuntu-18.04}
- {std: 11, cxx: g++-9, bt: Release, os: ubuntu-18.04}
- {std: 11, cxx: g++-8, bt: Debug , os: ubuntu-18.04}
- {std: 11, cxx: g++-8, bt: Release, os: ubuntu-18.04}
- {std: 11, cxx: g++-7, bt: Debug , os: ubuntu-18.04}
- {std: 11, cxx: g++-7, bt: Release, os: ubuntu-18.04}
- {std: 11, cxx: g++-6, bt: Debug , os: ubuntu-18.04}
- {std: 11, cxx: g++-6, bt: Release, os: ubuntu-18.04}
- {std: 11, cxx: g++-5, bt: Debug , os: ubuntu-18.04}
- {std: 11, cxx: g++-5, bt: Release, os: ubuntu-18.04}
env: {STD: "${{matrix.std}}", CXX_: "${{matrix.cxx}}", BT: "${{matrix.bt}}", BITLINKS: "${{matrix.bitlinks}}", VG: "${{matrix.vg}}", SAN: "${{matrix.san}}", LINT: "${{matrix.lint}}", OS: "${{matrix.os}}"}
steps:
- {name: checkout, uses: actions/checkout@v2, with: {submodules: recursive}}

View File

@@ -10,6 +10,7 @@ c4_project(VERSION 0.3.0 STANDALONE
#-------------------------------------------------------
option(RYML_WITH_TAB_TOKENS "Enable parsing of tabs after ':' and '-'. This is costly and disabled by default." OFF)
option(RYML_DEFAULT_CALLBACKS "Enable ryml's default implementation of callbacks: allocate(), free(), error()" ON)
option(RYML_BUILD_TOOLS "build tools" OFF)
option(RYML_BUILD_API "Enable API generation (python, etc)" OFF)
@@ -57,6 +58,10 @@ c4_add_library(ryml
INCORPORATE c4core
)
if(RYML_WITH_TAB_TOKENS)
target_compile_definitions(ryml PUBLIC RYML_WITH_TAB_TOKENS)
endif()
if(NOT RYML_DEFAULT_CALLBACKS)
target_compile_definitions(ryml PRIVATE RYML_NO_DEFAULT_CALLBACKS)
endif()

View File

@@ -657,7 +657,8 @@ sample_location_tracking(); ///< track node locations in the parsed source tr
### Package managers
If you opt for package managers, here's where ryml is available so far (thanks to all the contributors!):
If you opt for package managers, here's where ryml is available so far
(thanks to all the contributors!):
* [vcpkg](https://vcpkg.io/en/packages.html): `vcpkg install ryml`
* Arch Linux/Manjaro:
* [rapidyaml-git (AUR)](https://aur.archlinux.org/packages/rapidyaml-git/)
@@ -766,6 +767,9 @@ more about each sample:
The following cmake variables can be used to control the build behavior of
ryml:
* `RYML_WITH_TAB_TOKENS=ON/OFF`. Enable/disable support for tabs as
valid container tokens after `:` and `-`. Defaults to `OFF`,
because this may cost up to 10% in processing time.
* `RYML_DEFAULT_CALLBACKS=ON/OFF`. Enable/disable ryml's default
implementation of error and allocation callbacks. Defaults to `ON`.
* `RYML_STANDALONE=ON/OFF`. ryml uses
@@ -787,7 +791,8 @@ ryml is strongly coupled to c4core, and this is reinforced by the fact
that c4core is a submodule of the current repo. However, it is still
possible to use a c4core version different from the one in the repo
(of course, only if there are no incompatibilities between the
versions). You can find out how to achieve this by looking at the [`custom_c4core` sample](./samples/custom_c4core/CMakeLists.txt).
versions). You can find out how to achieve this by looking at the
[`custom_c4core` sample](./samples/custom_c4core/CMakeLists.txt).
------
@@ -814,8 +819,8 @@ be changed.) With that said, here's an example of the Python API:
```python
import ryml
# because ryml does not take ownership of the source buffer
# ryml cannot accept strings; only bytes or bytearrays
# ryml cannot accept strings because it does not take ownership of the
# source buffer; only bytes or bytearrays are accepted.
src = b"{HELLO: a, foo: b, bar: c, baz: d, seq: [0, 1, 2, 3]}"
def check(tree):
@@ -914,17 +919,20 @@ See also [the roadmap](./ROADMAP.md) for a list of future work.
ryml deliberately makes no effort to follow the standard in the following situations:
* Tab characters after `:` and `-` are not accepted tokens, unless
ryml is compiled with the macro `RYML_WITH_TAB_TOKENS`. This
requirement exists because checking for tabs introduces branching
into the parser's hot code and in some cases costs as much as 10%
in parsing time.
* Containers are not accepted as mapping keys: keys must be scalar strings.
* Tags are parsed as-is; tag lookup is not supported.
* Anchor names must not end with a terminating colon: eg `&anchor: key: val`.
* Tabs after `:` or `-` are not supported.
* `%TAG` directives have no effect and are ignored. All schemas are assumed
to be the default YAML 2002 schema.
* `%YAML` directives have no effect and are ignored.
Some of the limitations above will be worked on (tag lookups, tab
tokens). Others (notably container keys) absolutely will not, not in
the data tree at least.
Some of the limitations above will be worked on, (eg tag
lookups). Others (notably container keys) most likely will not.
Also, ryml tends to be on the permissive side where the YAML standard
dictates there should be an error; in many of these cases, ryml will
@@ -937,12 +945,13 @@ problems, which is a good practice anyway.
If you do run into trouble and would like to investigate conformance
of your YAML code, beware of existing online YAML linters, many of
which are not fully conformant; instead, try using
[https://play.yaml.io](https://play.yaml.io), an amazing tool from the
YAML people which lets you dynamically input your YAML and continuously
see the results from all the existing parsers (kudos to
@ingydotnet). And of course, if you detect anything bad with ryml,
please [open an issue](https://github.com/biojppm/rapidyaml/issues) so
that we can improve.
[https://play.yaml.io](https://play.yaml.io), an amazing tool which
lets you dynamically input your YAML and continuously see the results
from all the existing parsers (kudos to @ingydotnet and the people
from the YAML test suite). And of course, if you detect anything wrong
with ryml, please [open an
issue](https://github.com/biojppm/rapidyaml/issues) so that we can
improve.
### Test suite status

View File

@@ -109,7 +109,8 @@ As part of the [new feature to track source locations](https://github.com/biojpp
? explicit key # this comment was not parsed correctly
? # trailing empty key was not added to the map
```
- ryml now parses successfully compact JSON code `{"like":"this"}` without any need for preprocessing. So the `preprocess_json()` functions and utilities are no longer necessary and have been removed. If you were using these functions, just remove the calls and pass the original source directly to ryml ([PR#210](https://github.com/biojppm/rapidyaml/pulls/210)).
- Fixed parsing of tabs used as whitespace tokens after `:` or `-`. This feature [is costly (see some benchmark results here)](https://github.com/biojppm/rapidyaml/pull/211#issuecomment-1030688035) and thus it is disabled by default, and requires defining a macro or cmake option `RYML_WITH_TAB_TOKENS` to enable ([PR#211](https://github.com/biojppm/rapidyaml/pulls/211)).
- ryml now parses successfully compact JSON code `{"like":"this"}` without any need for preprocessing. This code was not valid YAML 1.1, but was made valid in YAML 1.2. So the `preprocess_json()` functions, used to insert spaces after `:` are no longer necessary and have been removed. If you were using these functions, remove the calls and just pass the original source directly to ryml's parser ([PR#210](https://github.com/biojppm/rapidyaml/pulls/210)).
- Fix handling of indentation when parsing block scalars ([PR#210](https://github.com/biojppm/rapidyaml/pulls/210)):
```yaml
---

View File

@@ -51,7 +51,7 @@ void _dbg_printf(c4::csubstr fmt, Args&& ...args)
# define _c4dbgq(msg) _dbg_printf(msg "\n")
# define _c4err(fmt, ...) \
do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \
this->_err("ERROR:\n" "%s:%d: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0)
this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0)
#else
# define _c4dbgt(fmt, ...)
# define _c4dbgpf(fmt, ...)
@@ -63,21 +63,20 @@ void _dbg_printf(c4::csubstr fmt, Args&& ...args)
#endif
#define _c4prsp(sp) sp
#define _c4prc(c) (__c4prc(c) ? 2 : 1), (__c4prc(c) ? __c4prc(c) : &c)
#define _c4presc(s) __c4presc(s.str, s.len)
inline const char *__c4prc(const char c)
inline c4::csubstr _c4prc(const char &C4_RESTRICT c)
{
switch(c)
{
case '\n': return "\\n";
case '\t': return "\\t";
case '\0': return "\\0";
case '\r': return "\\r";
case '\f': return "\\f";
case '\b': return "\\b";
case '\v': return "\\v";
case '\a': return "\\a";
default: return nullptr;
case '\n': return c4::csubstr("\\n");
case '\t': return c4::csubstr("\\t");
case '\0': return c4::csubstr("\\0");
case '\r': return c4::csubstr("\\r");
case '\f': return c4::csubstr("\\f");
case '\b': return c4::csubstr("\\b");
case '\v': return c4::csubstr("\\v");
case '\a': return c4::csubstr("\\a");
default: return c4::csubstr(&c, 1);
}
}
inline void __c4presc(const char *s, size_t len)

View File

@@ -11,7 +11,20 @@
#ifdef RYML_DBG
#include "c4/yml/detail/print.hpp"
#endif
#define RYML_FILTER_ARENA
#ifndef RYML_ERRMSG_SIZE
#define RYML_ERRMSG_SIZE 1024
#endif
//#define RYML_WITH_TAB_TOKENS
#ifdef RYML_WITH_TAB_TOKENS
#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
#else
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
#endif
#if defined(_MSC_VER)
# pragma warning(push)
@@ -35,7 +48,7 @@ namespace yml {
namespace {
template<class DumpFn, class ...Args>
size_t _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args)
void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args)
{
char writebuf[256];
auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward<Args>(args)...);
@@ -48,7 +61,6 @@ size_t _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args)
results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...);
}
}
return results.bufsize;
}
bool _is_scalar_next__runk(csubstr s)
@@ -63,7 +75,7 @@ bool _is_scalar_next__rseq_rval(csubstr s)
bool _is_scalar_next__rmap(csubstr s)
{
return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? "));
return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t")));
}
bool _is_scalar_next__rmap_val(csubstr s)
@@ -357,24 +369,23 @@ void Parser::_fmt_msg(DumpFn &&dumpfn) const
if(contents.len)
{
// print the yaml src line
size_t offs;
if( ! m_file.empty())
offs = _parse_dump(dumpfn, "{}:{}:{}", m_file, m_state->pos.line, m_state->pos.col);
else
offs = _parse_dump(dumpfn, "{}:{}", m_state->pos.line, m_state->pos.col);
size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col);
if(m_file.len)
{
_parse_dump(dumpfn, "{}:", m_file);
offs += m_file.len + 1;
}
_parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col);
csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
_parse_dump(dumpfn, "{}:{}",m_state->pos.line, m_state->pos.col);
_parse_dump(dumpfn, "{}{} (size={})\n",
(contents.len < 80u ? contents : contents.first(80u)),
maybe_ellipsis,
contents.len);
_parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
// highlight the remaining portion of the previous line
size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
size_t lastcol = firstcol + lc.rem.len;
for(size_t i = 0; i < offs + firstcol; ++i)
dumpfn(" ");
dumpfn("^");
for(size_t i = 0, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
dumpfn("~");
_parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
}
@@ -397,9 +408,6 @@ void Parser::_fmt_msg(DumpFn &&dumpfn) const
template<class ...Args>
void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
{
#ifndef RYML_ERRMSG_SIZE
#define RYML_ERRMSG_SIZE 1024
#endif
char errmsg[RYML_ERRMSG_SIZE];
detail::_SubstrWriter writer(errmsg);
auto dumpfn = [&writer](csubstr s){ writer.append(s); };
@@ -473,27 +481,27 @@ void Parser::_handle_line()
_RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty());
if(has_any(RSEQ))
{
if(has_any(EXPL))
if(has_any(FLOW))
{
if(_handle_seq_expl())
if(_handle_seq_flow())
return;
}
else
{
if(_handle_seq_impl())
if(_handle_seq_blck())
return;
}
}
else if(has_any(RMAP))
{
if(has_any(EXPL))
if(has_any(FLOW))
{
if(_handle_map_expl())
if(_handle_map_flow())
return;
}
else
{
if(_handle_map_impl())
if(_handle_map_blck())
return;
}
}
@@ -559,7 +567,7 @@ bool Parser::_handle_unk()
}
}
if(rem.begins_with("- "))
if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
{
_c4dbgpf("it's a seq (as_child={})", start_as_child);
_move_key_anchor_to_val_anchor();
@@ -583,23 +591,23 @@ bool Parser::_handle_unk()
}
else if(rem.begins_with('['))
{
_c4dbgpf("it's a seq, explicit (as_child={})", start_as_child);
_c4dbgpf("it's a seq, flow (as_child={})", start_as_child);
_move_key_anchor_to_val_anchor();
_move_key_tag_to_val_tag();
_push_level(/*explicit flow*/true);
_start_seq(start_as_child);
add_flags(EXPL);
add_flags(FLOW);
_line_progressed(1);
return true;
}
else if(rem.begins_with('{'))
{
_c4dbgpf("it's a map, explicit (as_child={})", start_as_child);
_c4dbgpf("it's a map, flow (as_child={})", start_as_child);
_move_key_anchor_to_val_anchor();
_move_key_tag_to_val_tag();
_push_level(/*explicit flow*/true);
_start_map(start_as_child);
addrem_flags(EXPL|RKEY, RVAL);
addrem_flags(FLOW|RKEY, RVAL);
_line_progressed(1);
return true;
}
@@ -674,7 +682,7 @@ bool Parser::_handle_unk()
{
_c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child);
_start_seq(start_as_child);
add_flags(EXPL);
add_flags(FLOW);
_append_val(_consume_scalar());
_line_progressed(2);
}
@@ -682,11 +690,11 @@ bool Parser::_handle_unk()
{
_c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child);
_start_seq(start_as_child);
add_flags(EXPL);
add_flags(FLOW);
_append_val(_consume_scalar());
_line_progressed(1);
}
else if(rem.begins_with(": "))
else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
{
_c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child);
_start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair
@@ -701,7 +709,7 @@ bool Parser::_handle_unk()
}
else if(rem.begins_with('}'))
{
if(!has_all(RMAP|EXPL))
if(!has_all(RMAP|FLOW))
{
_c4err("invalid token: not reading a map");
}
@@ -786,7 +794,7 @@ bool Parser::_handle_unk()
}
}
_store_scalar(scalar, is_quoted);
if(rem.begins_with(": "))
if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
{
_c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child);
_push_level();
@@ -827,24 +835,52 @@ bool Parser::_handle_unk()
return false;
}
//-----------------------------------------------------------------------------
bool Parser::_handle_seq_expl()
C4_ALWAYS_INLINE void Parser::_skipchars(char c)
{
_c4dbgpf("handle_seq_expl: node_id={} level={}", m_state->node_id, m_state->level);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c));
size_t pos = m_state->line_contents.rem.first_not_of(c);
if(pos == npos)
pos = m_state->line_contents.rem.len; // maybe the line is just whitespace
_c4dbgpf("skip {} '{}'", pos, c);
_line_progressed(pos);
}
template<size_t N>
C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N])
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars));
size_t pos = m_state->line_contents.rem.first_not_of(chars);
if(pos == npos)
pos = m_state->line_contents.rem.len; // maybe the line is just whitespace
_c4dbgpf("skip {} characters", pos);
_line_progressed(pos);
}
//-----------------------------------------------------------------------------
bool Parser::_handle_seq_flow()
{
_c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level);
csubstr rem = m_state->line_contents.rem;
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW));
if(rem.begins_with(' '))
{
// with explicit flow, indentation does not matter
_c4dbgp("starts with spaces");
rem = rem.left_of(rem.first_not_of(' '));
_c4dbgpf("skip {} spaces", rem.len);
_line_progressed(rem.len);
_skipchars(' ');
return true;
}
_RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t'))
{
_c4dbgp("starts with tabs");
_skipchars('\t');
return true;
})
else if(rem.begins_with('#'))
{
_c4dbgp("it's a comment");
@@ -881,7 +917,7 @@ bool Parser::_handle_seq_expl()
addrem_flags(RNXT, RVAL); // before _push_level!
_push_level(/*explicit flow*/true);
_start_seq();
add_flags(EXPL);
add_flags(FLOW);
_line_progressed(1);
return true;
}
@@ -891,7 +927,7 @@ bool Parser::_handle_seq_expl()
addrem_flags(RNXT, RVAL); // before _push_level!
_push_level(/*explicit flow*/true);
_start_map();
addrem_flags(EXPL|RKEY, RVAL);
addrem_flags(FLOW|RKEY, RVAL);
_line_progressed(1);
return true;
}
@@ -902,7 +938,7 @@ bool Parser::_handle_seq_expl()
_line_progressed(1);
return true;
}
else if(rem.begins_with(": "))
else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
{
_c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id);
_start_seqimap();
@@ -950,7 +986,7 @@ bool Parser::_handle_seq_expl()
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
if(rem.begins_with(", "))
{
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));
_c4dbgp("seq: expect next val");
addrem_flags(RVAL, RNXT);
_line_progressed(2);
@@ -958,7 +994,7 @@ bool Parser::_handle_seq_expl()
}
else if(rem.begins_with(','))
{
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));
_c4dbgp("seq: expect next val");
addrem_flags(RVAL, RNXT);
_line_progressed(1);
@@ -971,7 +1007,7 @@ bool Parser::_handle_seq_expl()
_line_progressed(1);
return true;
}
else if(rem.begins_with(": "))
else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
{
_c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id);
_start_seqimap();
@@ -992,14 +1028,14 @@ bool Parser::_handle_seq_expl()
}
//-----------------------------------------------------------------------------
bool Parser::_handle_seq_impl()
bool Parser::_handle_seq_blck()
{
_c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level);
csubstr rem = m_state->line_contents.rem;
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
if(rem.begins_with('#'))
{
@@ -1013,11 +1049,9 @@ bool Parser::_handle_seq_impl()
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
if(_handle_indentation())
{
return true;
}
if(rem.begins_with("- "))
if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
{
_c4dbgp("expect another val");
addrem_flags(RVAL, RNXT);
@@ -1034,9 +1068,7 @@ bool Parser::_handle_seq_impl()
else if(rem.begins_with_any(" \t"))
{
_RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin());
rem = rem.left_of(rem.first_not_of(" \t"));
_c4dbgpf("skipping {} spaces/tabs", rem.len);
_line_progressed(rem.len);
_skipchars(" \t");
return true;
}
else if(rem.begins_with("..."))
@@ -1070,17 +1102,18 @@ bool Parser::_handle_seq_impl()
_c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
rem = m_state->line_contents.rem;
if(rem.begins_with(' '))
if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' ')))
{
_c4dbgp("skipping whitespace...");
size_t skip = rem.first_not_of(' ');
size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
if(skip == csubstr::npos)
skip = rem.len; // maybe the line is just whitespace
_line_progressed(skip);
rem = rem.sub(skip);
}
if(!rem.begins_with('#') && (rem.begins_with(": ") || rem.ends_with(':')))
_c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem);
if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))))
{
_c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope");
if(m_key_anchor.empty())
@@ -1108,7 +1141,7 @@ bool Parser::_handle_seq_impl()
}
return true;
}
else if(rem.begins_with("- "))
else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
{
if(_rval_dash_start_or_continue_seq())
_line_progressed(2);
@@ -1122,21 +1155,21 @@ bool Parser::_handle_seq_impl()
}
else if(rem.begins_with('['))
{
_c4dbgp("val is a child seq, explicit");
_c4dbgp("val is a child seq, flow");
addrem_flags(RNXT, RVAL); // before _push_level!
_push_level(/*explicit flow*/true);
_start_seq();
add_flags(EXPL);
add_flags(FLOW);
_line_progressed(1);
return true;
}
else if(rem.begins_with('{'))
{
_c4dbgp("val is a child map, explicit");
_c4dbgp("val is a child map, flow");
addrem_flags(RNXT, RVAL); // before _push_level!
_push_level(/*explicit flow*/true);
_start_map();
addrem_flags(EXPL|RKEY, RVAL);
addrem_flags(FLOW|RKEY, RVAL);
_line_progressed(1);
return true;
}
@@ -1244,23 +1277,28 @@ bool Parser::_rval_dash_start_or_continue_seq()
}
//-----------------------------------------------------------------------------
bool Parser::_handle_map_expl()
bool Parser::_handle_map_flow()
{
// explicit flow, ie, inside {}, separated by commas
_c4dbgpf("handle_map_expl: node_id={} level={}", m_state->node_id, m_state->level);
_c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level);
csubstr rem = m_state->line_contents.rem;
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW));
if(rem.begins_with(' '))
{
// with explicit flow, indentation does not matter
_c4dbgp("starts with spaces");
rem = rem.left_of(rem.first_not_of(' '));
_c4dbgpf("skip {} spaces", rem.len);
_line_progressed(rem.len);
_skipchars(' ');
return true;
}
_RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t'))
{
// with explicit flow, indentation does not matter
_c4dbgp("starts with tabs");
_skipchars('\t');
return true;
})
else if(rem.begins_with('#'))
{
_c4dbgp("it's a comment");
@@ -1324,7 +1362,7 @@ bool Parser::_handle_map_expl()
_store_scalar(rem, is_quoted);
rem = m_state->line_contents.rem;
csubstr trimmed = rem.triml(" \t");
if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}")))
if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))))
{
_RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str);
size_t num = static_cast<size_t>(trimmed.str - rem.str);
@@ -1334,7 +1372,7 @@ bool Parser::_handle_map_expl()
}
}
if(rem.begins_with(": "))
if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
{
_c4dbgp("wait for val");
addrem_flags(RVAL, RKEY|QMRK);
@@ -1458,7 +1496,7 @@ bool Parser::_handle_map_expl()
_push_level(/*explicit flow*/true);
_move_scalar_from_top();
_start_seq();
add_flags(EXPL);
add_flags(FLOW);
_line_progressed(1);
return true;
}
@@ -1469,7 +1507,7 @@ bool Parser::_handle_map_expl()
_push_level(/*explicit flow*/true);
_move_scalar_from_top();
_start_map();
addrem_flags(EXPL|RKEY, RNXT|RVAL);
addrem_flags(FLOW|RKEY, RNXT|RVAL);
_line_progressed(1);
return true;
}
@@ -1520,13 +1558,13 @@ bool Parser::_handle_map_expl()
}
//-----------------------------------------------------------------------------
bool Parser::_handle_map_impl()
bool Parser::_handle_map_blck()
{
_c4dbgpf("handle_map_impl: node_id={} level={}", m_state->node_id, m_state->level);
csubstr rem = m_state->line_contents.rem;
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
if(rem.begins_with('#'))
{
@@ -1587,10 +1625,11 @@ bool Parser::_handle_map_impl()
}
else if(rem.begins_with_any(" \t"))
{
//_RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin());
rem = rem.left_of(rem.first_not_of(" \t"));
_c4dbgpf("skip {} spaces/tabs", rem.len);
_line_progressed(rem.len);
size_t pos = rem.first_not_of(" \t");
if(pos == npos)
pos = rem.len;
_c4dbgpf("skip {} spaces/tabs", pos);
_line_progressed(pos);
return true;
}
else if(rem == '?' || rem.begins_with("? "))
@@ -1613,13 +1652,11 @@ bool Parser::_handle_map_impl()
if(rem.begins_with(' '))
{
_RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin());
rem = rem.left_of(rem.first_not_of(' '));
_c4dbgpf("skip {} spaces", rem.len);
_line_progressed(rem.len);
_skipchars(' ');
}
return true;
}
else if(rem == ':' || rem.begins_with(": ") )
else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
{
_c4dbgp("key finished");
if(!has_all(SSCL))
@@ -1724,23 +1761,23 @@ bool Parser::_handle_map_impl()
}
else if(rem.begins_with('['))
{
_c4dbgp("val is a child seq, explicit");
_c4dbgp("val is a child seq, flow");
addrem_flags(RKEY, RVAL); // before _push_level!
_push_level(/*explicit flow*/true);
_move_scalar_from_top();
_start_seq();
add_flags(EXPL);
add_flags(FLOW);
_line_progressed(1);
return true;
}
else if(rem.begins_with('{'))
{
_c4dbgp("val is a child map, explicit");
_c4dbgp("val is a child map, flow");
addrem_flags(RKEY, RVAL); // before _push_level!
_push_level(/*explicit flow*/true);
_move_scalar_from_top();
_start_map();
addrem_flags(EXPL|RKEY, RVAL);
addrem_flags(FLOW|RKEY, RVAL);
_line_progressed(1);
return true;
}
@@ -2288,16 +2325,22 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
_c4dbgp("RSEQ|RVAL");
if( ! _is_scalar_next__rseq_rval(s))
return false;
s = s.left_of(s.find(" #")); // is there a comment?
s = s.left_of(s.find(": ")); // is there a key-value?
if(s.ends_with(':'))
s = s.left_of(s.len-1);
if(has_all(EXPL))
{
--s.len;
}
else
{
auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");
if(first)
s.len = first.pos;
}
if(has_all(FLOW))
{
_c4dbgp("RSEQ|RVAL|EXPL");
s = s.left_of(s.first_of(",]"));
}
s = s.trimr(' ');
s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
}
else
{
@@ -2311,10 +2354,23 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
size_t colon_space = s.find(": ");
if(colon_space == npos)
{
colon_space = s.find(":");
_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
if(colon_space != s.len-1)
colon_space = npos;
_RYML_WITH_OR_WITHOUT_TAB_TOKENS(
// with tab tokens
colon_space = s.find(":\t");
if(colon_space == npos)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
colon_space = s.find(':');
if(colon_space != s.len-1)
colon_space = npos;
}
,
// without tab tokens
colon_space = s.find(':');
_RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
if(colon_space != s.len-1)
colon_space = npos;
)
}
if(has_all(RKEY))
@@ -2328,7 +2384,7 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
return false;
s = s.left_of(colon_space);
s = s.left_of(s.first_of("#"));
if(has_any(EXPL))
if(has_any(FLOW))
s = s.left_of(s.first_of(':'));
s = s.trimr(" \t");
if(s.begins_with("---"))
@@ -2343,8 +2399,8 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
if(s.begins_with("? ") || s == '?')
return false;
s = s.left_of(colon_space);
s = s.trimr(' ');
if(has_any(EXPL))
s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
if(has_any(FLOW))
{
_c4dbgpf("RMAP|RKEY|EXPL: '{}'", s);
s = s.left_of(s.first_of(",}"));
@@ -2366,12 +2422,10 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
_c4dbgp("RMAP|RVAL");
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));
if( ! _is_scalar_next__rmap_val(s))
{
return false;
}
s = s.left_of(s.find(" #")); // is there a comment?
s = s.left_of(s.find("\t#")); // is there a comment?
if(has_any(EXPL))
if(has_any(FLOW))
{
_c4dbgp("RMAP|RVAL|EXPL");
if(has_none(RSEQIMAP))
@@ -2379,7 +2433,7 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
else
s = s.left_of(s.first_of(",]"));
}
s = s.trim(' ');
s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
if(s.begins_with("---"))
return false;
else if(s.begins_with("..."))
@@ -2392,7 +2446,7 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
}
else if(has_all(RUNK))
{
_c4dbgp("RUNK");
_c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s);
if( ! _is_scalar_next__runk(s))
{
_c4dbgp("RUNK: no scalar next");
@@ -2404,6 +2458,10 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
s = s.left_of(pos);
else if(s.ends_with(':'))
s = s.left_of(s.len-1);
_RYML_WITH_TAB_TOKENS(
else if((pos = s.find(":\t")) != npos) // TABS
s = s.left_of(pos);
)
else
s = s.left_of(s.first_of(','));
s = s.trim(" \t");
@@ -2440,8 +2498,8 @@ csubstr Parser::_extend_scanned_scalar(csubstr s)
{
if(has_all(RMAP|RKEY|QMRK))
{
size_t scalar_indentation = has_any(EXPL) ? 0 : m_state->scalar_col;
_c4dbgpf("extend_scalar: complex key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col);
size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col;
_c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col);
csubstr n = _scan_to_next_nonempty_line(scalar_indentation);
if(!n.empty())
{
@@ -2454,7 +2512,7 @@ csubstr Parser::_extend_scanned_scalar(csubstr s)
else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference)
{
_c4dbgpf("extend_scalar: line ended, scalar='{}'", s);
if(has_none(EXPL))
if(has_none(FLOW))
{
size_t scalar_indentation = m_state->indref + 1;
if(has_all(RUNK) && scalar_indentation == 1)
@@ -2464,19 +2522,19 @@ csubstr Parser::_extend_scanned_scalar(csubstr s)
{
_c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation);
_RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n));
substr full = _scan_plain_scalar_impl(s, n, scalar_indentation);
substr full = _scan_plain_scalar_blck(s, n, scalar_indentation);
if(full.len >= s.len)
s = _filter_plain_scalar(full, scalar_indentation);
}
}
else
{
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));
csubstr n = _scan_to_next_nonempty_line(/*indentation*/0);
if(!n.empty())
{
_c4dbgp("rscalar[EXPL]");
substr full = _scan_plain_scalar_expl(s, n);
_c4dbgp("rscalar[FLOW]");
substr full = _scan_plain_scalar_flow(s, n);
s = _filter_plain_scalar(full, /*indentation*/0);
}
}
@@ -2488,7 +2546,7 @@ csubstr Parser::_extend_scanned_scalar(csubstr s)
//-----------------------------------------------------------------------------
substr Parser::_scan_plain_scalar_expl(csubstr currscalar, csubstr peeked_line)
substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line)
{
static constexpr const csubstr chars = "[]{}?#,";
size_t pos = peeked_line.first_of(chars);
@@ -2546,7 +2604,7 @@ substr Parser::_scan_plain_scalar_expl(csubstr currscalar, csubstr peeked_line)
//-----------------------------------------------------------------------------
substr Parser::_scan_plain_scalar_impl(csubstr currscalar, csubstr peeked_line, size_t indentation)
substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar));
// NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice
@@ -2961,9 +3019,9 @@ void Parser::_push_level(bool explicit_flow_chars)
return;
}
flag_t st = RUNK;
if(explicit_flow_chars || has_all(EXPL))
if(explicit_flow_chars || has_all(FLOW))
{
st |= EXPL;
st |= FLOW;
}
m_stack.push_top();
m_state = &m_stack.top();
@@ -3099,7 +3157,7 @@ void Parser::_end_stream()
_c4err("internal error");
}
}
else if(has_all(RSEQ|RVAL) && has_none(EXPL))
else if(has_all(RSEQ|RVAL) && has_none(FLOW))
{
_c4dbgp("add last...");
added = _append_val_null(m_state->line_contents.rem.str);
@@ -3157,7 +3215,7 @@ void Parser::_end_stream()
{
_c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size());
_RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top()));
if(has_all(RSEQ|EXPL))
if(has_all(RSEQ|FLOW))
_err("closing ] not found");
_pop_level();
}
@@ -3375,7 +3433,7 @@ void Parser::_stop_seq()
void Parser::_start_seqimap()
{
_c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW));
// create a map, and turn the last scalar of this sequence
// into the key of the map's first child. This scalar was
// understood to be a value in the sequence, but it is
@@ -3403,7 +3461,7 @@ void Parser::_start_seqimap()
_start_map();
_store_scalar_null(m_state->line_contents.rem.str);
}
add_flags(RSEQIMAP|EXPL);
add_flags(RSEQIMAP|FLOW);
}
void Parser::_stop_seqimap()
@@ -3508,7 +3566,7 @@ void Parser::_move_scalar_from_top()
/** @todo this function is a monster and needs love. */
bool Parser::_handle_indentation()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(EXPL));
_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
if( ! _at_line_begin())
return false;
@@ -4997,7 +5055,7 @@ csubstr Parser::_prfl(substr buf, flag_t flags)
_prflag(RUNK);
_prflag(RMAP);
_prflag(RSEQ);
_prflag(EXPL);
_prflag(FLOW);
_prflag(QMRK);
_prflag(RKEY);
_prflag(RVAL);

View File

@@ -288,8 +288,8 @@ private:
csubstr _scan_squot_scalar();
csubstr _scan_dquot_scalar();
csubstr _scan_block();
substr _scan_plain_scalar_impl(csubstr currscalar, csubstr peeked_line, size_t indentation);
substr _scan_plain_scalar_expl(csubstr currscalar, csubstr peeked_line);
substr _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation);
substr _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line);
substr _scan_complex_key(csubstr currscalar, csubstr peeked_line);
csubstr _scan_to_next_nonempty_line(size_t indentation);
csubstr _extend_scanned_scalar(csubstr currscalar);
@@ -310,10 +310,10 @@ private:
bool _handle_indentation();
bool _handle_unk();
bool _handle_map_expl();
bool _handle_map_impl();
bool _handle_seq_expl();
bool _handle_seq_impl();
bool _handle_map_flow();
bool _handle_map_blck();
bool _handle_seq_flow();
bool _handle_seq_blck();
bool _handle_top();
bool _handle_types();
bool _handle_key_anchors_and_refs();
@@ -363,6 +363,10 @@ private:
void _write_key_anchor(size_t node_id);
void _write_val_anchor(size_t node_id);
void _skipchars(char c);
template<size_t N>
void _skipchars(const char (&chars)[N]);
private:
static size_t _count_nlines(csubstr src);
@@ -374,7 +378,7 @@ private:
RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq
RMAP = 0x01 << 2, ///< reading a map
RSEQ = 0x01 << 3, ///< reading a seq
EXPL = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
QMRK = 0x01 << 5, ///< reading an explicit key (`? key`)
RKEY = 0x01 << 6, ///< reading a scalar as key
RVAL = 0x01 << 7, ///< reading a scalar as val

View File

@@ -132,26 +132,25 @@ TEST(seq_of_map, missing_scalars_v3)
EXPECT_EQ(t["a"][1].first_child().val(), nullptr);
}
TEST(explicit_key, test_suite_NJ66)
#ifdef RYML_WITH_TAB_TOKENS
TEST(seq_of_map, test_suite_6BCT)
{
csubstr yaml = R"(
- { single line: value}
- { multi
line: value}
- { multi
line: value}
)";
test_check_emit_check(yaml, [](Tree const &t){
ASSERT_TRUE(t.rootref().is_seq());
ASSERT_EQ(t.rootref().num_children(), 3u);
ASSERT_TRUE(t[0].has_child("single line"));
ASSERT_TRUE(t[1].has_child("multi line"));
ASSERT_TRUE(t[2].has_child("multi line"));
EXPECT_EQ(t[0]["single line"].val(), csubstr("value"));
EXPECT_EQ(t[1]["multi line"].val(), csubstr("value"));
EXPECT_EQ(t[2]["multi line"].val(), csubstr("value"));
});
Tree t = parse_in_arena(R"(
- foo0: bar0
- foo1 : bar1
- foo2 : bar2
)");
#ifdef RYML_DBG
print_tree(t);
#endif
ASSERT_TRUE(t[0].is_map());
ASSERT_TRUE(t[1].is_map());
ASSERT_TRUE(t[2].is_map());
EXPECT_EQ(t[0]["foo0"].val(), csubstr("bar0"));
EXPECT_EQ(t[1]["foo1"].val(), csubstr("bar1"));
EXPECT_EQ(t[2]["foo2"].val(), csubstr("bar2"));
}
#endif
//-----------------------------------------------------------------------------
@@ -275,7 +274,7 @@ L{N(KEYSEQ|KEYQUO, "implicit block key", L{
N(L{N(KEYSEQ|KEYQUO, "implicit flow key s", L{N("val1"), N("val2")})}),
})});
/* TODO JAVAI 209
ADD_CASE_TO_GROUP("seq of maps, implicit map in seq, missing scalar",
R"({a : [
: foo
@@ -294,7 +293,7 @@ L{
N("b", L{N(MAP, L{N("", "foo")}),}),
N("c", L{N(MAP, L{N(KEYVAL, "", {})}), N(MAP, L{N(KEYVAL, "", {})}),}),
});
*/
ADD_CASE_TO_GROUP("seq of maps, implicit with anchors, unresolved",
R"(
@@ -310,6 +309,7 @@ L{
N(L{N("*a1", AR(KEYREF, "*a1"), "w1"), N("*a2", AR(KEYREF, "*a2"), "w2"), N("*a3", AR(KEYREF, "*a3"), "w3")}),
});
ADD_CASE_TO_GROUP("seq of maps, implicit with anchors, resolved", RESOLVE_REFS,
R"(
- &a1 a1: v1

View File

@@ -207,6 +207,94 @@ TEST(simple_map, no_seq_key_block)
}
#endif
#ifdef RYML_WITH_TAB_TOKENS
TEST(simple_map, block_tab_tokens)
{
Tree tree = parse_in_arena(R"(
--- # block, spaces only
a: 0
b: 1
c: 2
--- # block, tabs after token
a: 0
b: 1
c: 2
--- # block, tabs before and after token
a : 0
b : 1
c : 2
--- # block, tabs before token
a : 0
b : 1
c : 2
--- # block, tabs before newline
a : 0
b : 1
c : 2
)");
EXPECT_EQ(tree.docref(0)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(0)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(0)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(1)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(1)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(1)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(2)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(2)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(2)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(3)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(3)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(3)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(4)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(4)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(4)["c"].val(), csubstr("2"));
}
TEST(simple_map, flow_tab_tokens)
{
Tree tree = parse_in_arena(R"(
--- # flow, no tabs
{a: 0, b: 1, c: 2}
--- # flow, tabs after token
{a: 0, b: 1, c: 2}
--- # flow, tabs before and after token
{a : 0, b : 1, c : 2}
--- # flow, tabs before token
{a : 0, b : 1, c : 2}
--- # flow, tabs after val
{a : 0 , b : 1 , c : 2 }
--- # flow, tabs after val and comma
{a : 0 , b : 1 , c : 2 }
--- # flow, tabs everywhere
{
a : 0 ,
b : 1 ,
c : 2
}
)");
EXPECT_EQ(tree.docref(0)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(0)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(0)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(1)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(1)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(1)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(2)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(2)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(2)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(3)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(3)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(3)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(4)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(4)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(4)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(5)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(5)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(5)["c"].val(), csubstr("2"));
EXPECT_EQ(tree.docref(6)["a"].val(), csubstr("0"));
EXPECT_EQ(tree.docref(6)["b"].val(), csubstr("1"));
EXPECT_EQ(tree.docref(6)["c"].val(), csubstr("2"));
}
#endif // RYML_WITH_TAB_TOKENS
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

View File

@@ -109,6 +109,63 @@ TEST(simple_seq, deeply_nested_to_cover_parse_stack_resizes)
}
#ifdef RYML_WITH_TAB_TOKENS
TEST(simple_seq, block_tab_tokens)
{
Tree tree = parse_in_arena(R"(
--- # block, spaces only
- 0
- 1
- 2
--- # block, tabs after
- 0
- 1
- 2
--- # block, tabs after token, and after val
- 0
- 1
- 2
)");
EXPECT_EQ(tree.docref(0)[0].val(), csubstr("0"));
EXPECT_EQ(tree.docref(0)[1].val(), csubstr("1"));
EXPECT_EQ(tree.docref(0)[2].val(), csubstr("2"));
EXPECT_EQ(tree.docref(1)[0].val(), csubstr("0"));
EXPECT_EQ(tree.docref(1)[1].val(), csubstr("1"));
EXPECT_EQ(tree.docref(1)[2].val(), csubstr("2"));
}
TEST(simple_seq, flow_tab_tokens)
{
Tree tree = parse_in_arena(R"(
--- # flow, no tabs
[0, 1, 2]
--- # flow, tabs after
[0, 1, 2]
--- # flow, tabs before and after
[0 , 1 , 2]
--- # flow, tabs everywhere
[
0 ,
1 ,
2 ,
]
)");
EXPECT_EQ(tree.docref(0)[0].val(), csubstr("0"));
EXPECT_EQ(tree.docref(0)[1].val(), csubstr("1"));
EXPECT_EQ(tree.docref(0)[2].val(), csubstr("2"));
EXPECT_EQ(tree.docref(1)[0].val(), csubstr("0"));
EXPECT_EQ(tree.docref(1)[1].val(), csubstr("1"));
EXPECT_EQ(tree.docref(1)[2].val(), csubstr("2"));
EXPECT_EQ(tree.docref(2)[0].val(), csubstr("0"));
EXPECT_EQ(tree.docref(2)[1].val(), csubstr("1"));
EXPECT_EQ(tree.docref(2)[2].val(), csubstr("2"));
EXPECT_EQ(tree.docref(3)[0].val(), csubstr("0"));
EXPECT_EQ(tree.docref(3)[1].val(), csubstr("1"));
EXPECT_EQ(tree.docref(3)[2].val(), csubstr("2"));
}
#endif // RYML_WITH_TAB_TOKENS
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
@@ -413,6 +470,11 @@ L{
}
);
#ifdef RYML_WITH_TAB_TOKENS
#define _ryml_with_or_without_tabs(with, without) with
#else
#define _ryml_with_or_without_tabs(with, without) without
#endif
ADD_CASE_TO_GROUP("simple seq expl, scalars with special chars, colon",
R"(
- [[], :@]
@@ -429,7 +491,7 @@ L{
N(L{N(SEQ), N(":^")}),
N(L{N(SEQ), N(":$")}),
N(L{N(SEQ), N("::")}),
N(L{N(SEQ), N(": ")}),
N(L{N(SEQ), _ryml_with_or_without_tabs(N(MAP, L{N("", "")}), N(": "))}),
N(L{N(SEQ), N(":`")}),
}
);

View File

@@ -110,6 +110,10 @@ constexpr const AllowedFailure allowed_failures[] = {
// These tests are skipped because they cover parts of YAML that
// are deliberately not implemented by ryml.
#ifndef RYML_WITH_TAB_TOKENS // - or : are supported only when the above macro is defined
_("6BCT-in_yaml" , "tabs after - or :"),
_("J3BT-in_yaml-events" , "tabs after - or :"),
#endif
// container keys are not supported
_("4FJ6-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("4FJ6-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
@@ -126,10 +130,10 @@ constexpr const AllowedFailure allowed_failures[] = {
_("KZN9-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("LX3P-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("LX3P-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("M2N8_00-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),// FIXME but only case 1
_("M2N8_00-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),// FIXME but only case 1
_("M2N8_01-in_yaml-events" , "only scalar keys allowed (keys cannot be maps or seqs)"),// FIXME but only case 1
_("M2N8_01-out_yaml-events", "only scalar keys allowed (keys cannot be maps or seqs)"),// FIXME but only case 1
_("M2N8_00-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("M2N8_00-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("M2N8_01-in_yaml-events" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("M2N8_01-out_yaml-events", "only scalar keys allowed (keys cannot be maps or seqs)"),
_("M5DY-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("M5DY-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("Q9WF-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
@@ -144,9 +148,6 @@ constexpr const AllowedFailure allowed_failures[] = {
_("X38W-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("XW4D-in_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
_("XW4D-out_yaml" , "only scalar keys allowed (keys cannot be maps or seqs)"),
// tabs after - or : are not supported
_("6BCT-in_yaml" , "tabs after - or :"),
_("J3BT-in_yaml-events" , "tabs after - or :"),
// anchors with : are not supported
_("2SXE-in_yaml-events" , "weird characters in anchors, anchors must not end with :"),
// tags are parsed as-is; tag lookup is not supported