mirror of
https://github.com/biojppm/rapidyaml.git
synced 2026-01-18 21:41:18 +01:00
Byte order mark: account for indentation
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
### Changes
|
||||
|
||||
- [PR#561](https://github.com/biojppm/rapidyaml/pull/561) (fixes [#559](https://github.com/biojppm/rapidyaml/issues/559)) - Byte Order Mark: account for BOM when determining block indentation
|
||||
- [PR#563](https://github.com/biojppm/rapidyaml/pull/563) (fixes [#562](https://github.com/biojppm/rapidyaml/issues/562)) - Fix bug in `NodeRef::cend()`
|
||||
- [PR#547](https://github.com/biojppm/rapidyaml/pull/547) - Fix parsing of implicit first documents with empty sequences, caused by a problem in `Tree::set_root_as_stream()`:
|
||||
```yaml
|
||||
@@ -7,14 +8,14 @@
|
||||
---
|
||||
more data here
|
||||
```
|
||||
- [PR#557](https://github.com/biojppm/rapidyaml/pull/557) - `Tree` is now non-empty by default, and `Tree::root_id()` will no longer modify the tree when it is empty. To create an empty tree now it is necessary to use the capacity constructor with a capacity of zero:
|
||||
- [PR#557](https://github.com/biojppm/rapidyaml/pull/557) - `Tree` is now non-empty by default, and `Tree::root_id()` will no longer modify the tree when it is empty. To create an empty tree now, it is necessary to use the capacity constructor with a capacity of zero:
|
||||
```c++
|
||||
// default-constructed tree is now non-empty
|
||||
// breaking change: default-constructed tree is now non-empty
|
||||
Tree tree;
|
||||
assert(!tree.empty()); // MODIFIED! was empty on previous version
|
||||
id_type root = tree.root_id(); // OK. default-constructed tree is now non-empty
|
||||
|
||||
// to create an empty tree:
|
||||
// to create an empty tree (as happened before):
|
||||
Tree tree(0); // pass capacity of zero
|
||||
assert(tree.empty()); // as expected
|
||||
// but watchout, this is no longer possible:
|
||||
|
||||
@@ -416,7 +416,9 @@ void ParseEngine<EventHandler>::_reset()
|
||||
m_doc_empty = true;
|
||||
m_was_inside_qmrk = false;
|
||||
m_prev_colon = npos;
|
||||
m_bom_len = 0;
|
||||
m_encoding = NOBOM;
|
||||
m_bom_line = 0;
|
||||
if(m_options.locations())
|
||||
{
|
||||
_prepare_locations();
|
||||
@@ -523,7 +525,7 @@ void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...arg
|
||||
{
|
||||
if(_dbg_enabled())
|
||||
{
|
||||
auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
|
||||
auto dumpfn = [](csubstr s){ if(s.len) fwrite(s.str, 1, s.len, stdout); };
|
||||
detail::_dump(dumpfn, fmt, args...);
|
||||
dumpfn("\n");
|
||||
_fmt_msg(dumpfn);
|
||||
@@ -1603,6 +1605,7 @@ void ParseEngine<EventHandler>::_end2_seq()
|
||||
template<class EventHandler>
|
||||
void ParseEngine<EventHandler>::_begin2_doc()
|
||||
{
|
||||
_c4dbgp("begin_doc");
|
||||
m_doc_empty = true;
|
||||
add_flags(RDOC);
|
||||
m_evt_handler->begin_doc();
|
||||
@@ -1612,6 +1615,7 @@ void ParseEngine<EventHandler>::_begin2_doc()
|
||||
template<class EventHandler>
|
||||
void ParseEngine<EventHandler>::_begin2_doc_expl()
|
||||
{
|
||||
_c4dbgp("begin_doc_expl");
|
||||
m_doc_empty = true;
|
||||
add_flags(RDOC);
|
||||
m_evt_handler->begin_doc_expl();
|
||||
@@ -1630,6 +1634,7 @@ void ParseEngine<EventHandler>::_end2_doc()
|
||||
m_evt_handler->set_val_scalar_plain_empty();
|
||||
}
|
||||
m_evt_handler->end_doc();
|
||||
m_bom_len = 0;
|
||||
}
|
||||
|
||||
template<class EventHandler>
|
||||
@@ -1643,6 +1648,7 @@ void ParseEngine<EventHandler>::_end2_doc_expl()
|
||||
m_evt_handler->set_val_scalar_plain_empty();
|
||||
}
|
||||
m_evt_handler->end_doc_expl();
|
||||
m_bom_len = 0;
|
||||
}
|
||||
|
||||
template<class EventHandler>
|
||||
@@ -4354,18 +4360,20 @@ bool ParseEngine<EventHandler>::_handle_bom()
|
||||
const csubstr rest = rem.sub(1);
|
||||
// https://yaml.org/spec/1.2.2/#52-character-encodings
|
||||
#define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
|
||||
if(rem.begins_with({"\x00\x00\xfe\xff", 4}) || (rem.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
|
||||
if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF32BE");
|
||||
_handle_bom(UTF32BE);
|
||||
_line_progressed(4);
|
||||
m_bom_len = 4;
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xff\xfe\x00\x00") || (rest.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
|
||||
else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF32LE");
|
||||
_handle_bom(UTF32LE);
|
||||
_line_progressed(4);
|
||||
m_bom_len = 4;
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
|
||||
@@ -4373,6 +4381,7 @@ bool ParseEngine<EventHandler>::_handle_bom()
|
||||
_c4dbgp("byte order mark: UTF16BE");
|
||||
_handle_bom(UTF16BE);
|
||||
_line_progressed(2);
|
||||
m_bom_len = 2;
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
|
||||
@@ -4380,6 +4389,7 @@ bool ParseEngine<EventHandler>::_handle_bom()
|
||||
_c4dbgp("byte order mark: UTF16LE");
|
||||
_handle_bom(UTF16LE);
|
||||
_line_progressed(2);
|
||||
m_bom_len = 2;
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xef\xbb\xbf"))
|
||||
@@ -4387,6 +4397,7 @@ bool ParseEngine<EventHandler>::_handle_bom()
|
||||
_c4dbgp("byte order mark: UTF8");
|
||||
_handle_bom(UTF8);
|
||||
_line_progressed(3);
|
||||
m_bom_len = 3;
|
||||
return true;
|
||||
}
|
||||
#undef _rymlisascii
|
||||
@@ -4399,8 +4410,7 @@ void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
|
||||
{
|
||||
if(m_encoding == NOBOM)
|
||||
{
|
||||
const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
|
||||
if(enc == UTF8 || is_beginning_of_file)
|
||||
if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == m_buf.str))
|
||||
m_encoding = enc;
|
||||
else
|
||||
_c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
|
||||
@@ -5651,7 +5661,7 @@ seqblck_start:
|
||||
const size_t startline = m_evt_handler->m_curr->pos.line;
|
||||
// warning: the gcc optimizer on x86 builds is brittle with
|
||||
// this function:
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
|
||||
ScannedScalar sc;
|
||||
if(first == '\'')
|
||||
{
|
||||
@@ -5815,7 +5825,7 @@ seqblck_start:
|
||||
_handle_annotations_before_blck_val_scalar();
|
||||
m_evt_handler->begin_seq_val_block();
|
||||
addrem_flags(RVAL, RNXT);
|
||||
_save_indentation();
|
||||
_set_indentation(startindent);
|
||||
// keep going on inside this function
|
||||
}
|
||||
_line_progressed(1);
|
||||
@@ -5883,7 +5893,7 @@ seqblck_start:
|
||||
m_was_inside_qmrk = true;
|
||||
m_evt_handler->begin_map_val_block();
|
||||
addrem_flags(RMAP|QMRK, RSEQ|RNXT);
|
||||
_save_indentation();
|
||||
_set_indentation(startindent);
|
||||
_line_progressed(1);
|
||||
_maybe_skip_whitespace_tokens();
|
||||
goto seqblck_finish;
|
||||
@@ -6050,6 +6060,7 @@ seqblck_start:
|
||||
_c4dbgt("seqblck: go again", 0);
|
||||
if(_finished_line())
|
||||
{
|
||||
m_bom_len = 0;
|
||||
_line_ended();
|
||||
_scan_line();
|
||||
if(_finished_file())
|
||||
@@ -7368,15 +7379,15 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
_c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
|
||||
}
|
||||
|
||||
if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
|
||||
if(m_evt_handler->m_curr->line_contents.indentation == 0u && (_at_line_begin() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
|
||||
{
|
||||
_c4dbgp("rtop: zero indent + at line begin");
|
||||
_c4dbgpf("rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
|
||||
_c4dbgp("check BOM");
|
||||
if(_handle_bom())
|
||||
{
|
||||
_c4dbgp("byte order mark!");
|
||||
rem = m_evt_handler->m_curr->line_contents.rem;
|
||||
if(!rem.len)
|
||||
return;
|
||||
m_bom_line = m_evt_handler->m_curr->pos.line;
|
||||
_c4dbgpf("byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
|
||||
return;
|
||||
}
|
||||
const char first = rem.str[0];
|
||||
if(first == '-')
|
||||
@@ -7427,19 +7438,35 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
/* no else-if! */
|
||||
char first = rem.str[0];
|
||||
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
|
||||
size_t remindent = m_evt_handler->m_curr->line_contents.current_col(rem);
|
||||
if(m_bom_len)
|
||||
{
|
||||
_c4dbgpf("prev BOMlen={}", m_bom_len);
|
||||
if(m_evt_handler->m_curr->pos.line == m_bom_line)
|
||||
{
|
||||
_c4dbgpf("BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
|
||||
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len);
|
||||
remindent -= m_bom_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_bom_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(first == '[')
|
||||
{
|
||||
m_evt_handler->check_trailing_doc_token();
|
||||
_maybe_begin_doc();
|
||||
m_doc_empty = false;
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
|
||||
if(C4_LIKELY( ! _annotations_require_key_container()))
|
||||
{
|
||||
_c4dbgp("it's a seq, flow");
|
||||
_handle_annotations_before_blck_val_scalar();
|
||||
m_evt_handler->begin_seq_val_flow();
|
||||
addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
|
||||
_set_indentation(startindent);
|
||||
_set_indentation(remindent);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -7447,10 +7474,10 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
_handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
|
||||
m_evt_handler->begin_map_val_block();
|
||||
addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
|
||||
_handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
|
||||
_handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
|
||||
m_evt_handler->begin_seq_key_flow();
|
||||
addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
|
||||
_set_indentation(startindent);
|
||||
_set_indentation(remindent);
|
||||
}
|
||||
_line_progressed(1);
|
||||
}
|
||||
@@ -7459,14 +7486,13 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
m_evt_handler->check_trailing_doc_token();
|
||||
_maybe_begin_doc();
|
||||
m_doc_empty = false;
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
|
||||
if(C4_LIKELY( ! _annotations_require_key_container()))
|
||||
{
|
||||
_c4dbgp("it's a map, flow");
|
||||
_handle_annotations_before_blck_val_scalar();
|
||||
m_evt_handler->begin_map_val_flow();
|
||||
addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
|
||||
_set_indentation(startindent);
|
||||
_set_indentation(remindent);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -7474,10 +7500,10 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
_handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
|
||||
m_evt_handler->begin_map_val_block();
|
||||
addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
|
||||
_handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
|
||||
_handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
|
||||
m_evt_handler->begin_map_key_flow();
|
||||
addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
|
||||
_set_indentation(startindent);
|
||||
_set_indentation(remindent);
|
||||
}
|
||||
_line_progressed(1);
|
||||
}
|
||||
@@ -7490,7 +7516,7 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
m_evt_handler->begin_seq_val_block();
|
||||
addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
|
||||
m_doc_empty = false;
|
||||
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
||||
_set_indentation(remindent);
|
||||
_line_progressed(1);
|
||||
_maybe_skip_whitespace_tokens();
|
||||
}
|
||||
@@ -7504,7 +7530,7 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
|
||||
m_doc_empty = false;
|
||||
m_was_inside_qmrk = true;
|
||||
_save_indentation();
|
||||
_set_indentation(remindent); //_save_indentation();
|
||||
_line_progressed(1);
|
||||
_maybe_skip_whitespace_tokens();
|
||||
}
|
||||
@@ -7513,7 +7539,6 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
if(m_doc_empty)
|
||||
{
|
||||
_c4dbgp("it's a map with an empty key");
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
||||
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
||||
m_evt_handler->check_trailing_doc_token();
|
||||
_maybe_begin_doc();
|
||||
@@ -7542,9 +7567,8 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
_c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
|
||||
m_evt_handler->check_trailing_doc_token();
|
||||
_maybe_begin_doc();
|
||||
const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
|
||||
const size_t line = m_evt_handler->m_curr->pos.line;
|
||||
_add_annotation(&m_pending_anchors, anchor, indentation, line);
|
||||
_add_annotation(&m_pending_anchors, anchor, remindent, line);
|
||||
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
||||
m_doc_empty = false;
|
||||
}
|
||||
@@ -7564,7 +7588,6 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
else
|
||||
{
|
||||
_c4dbgp("runk: start new block map, set ref as key");
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
||||
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
||||
_handle_annotations_before_start_mapblck(startline);
|
||||
m_evt_handler->begin_map_val_block();
|
||||
@@ -7592,7 +7615,6 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
csubstr s = m_evt_handler->m_curr->line_contents.rem;
|
||||
if(!s.len)
|
||||
return;
|
||||
const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
|
||||
const size_t startline = m_evt_handler->m_curr->pos.line; // save
|
||||
first = s.str[0];
|
||||
ScannedScalar sc;
|
||||
|
||||
@@ -758,6 +758,10 @@ private:
|
||||
bool m_doc_empty = true;
|
||||
size_t m_prev_colon = npos;
|
||||
|
||||
private:
|
||||
|
||||
size_t m_bom_len = 0;
|
||||
size_t m_bom_line = 0;
|
||||
Encoding_e m_encoding = UTF8;
|
||||
|
||||
private:
|
||||
|
||||
@@ -107,6 +107,7 @@ ryml_add_test(json)
|
||||
ryml_add_test(preprocess)
|
||||
ryml_add_test(merge)
|
||||
ryml_add_test(location)
|
||||
ryml_add_test(bom)
|
||||
ryml_add_test_case_group(empty_file)
|
||||
ryml_add_test_case_group(doc)
|
||||
ryml_add_test_case_group(seq)
|
||||
|
||||
701
test/test_bom.cpp
Normal file
701
test/test_bom.cpp
Normal file
@@ -0,0 +1,701 @@
|
||||
#include "./test_lib/test_case.hpp"
|
||||
#include <c4/utf.hpp>
|
||||
|
||||
namespace c4 {
|
||||
namespace yml {
|
||||
|
||||
namespace {
|
||||
|
||||
struct bomspec
|
||||
{
|
||||
csubstr name;
|
||||
Encoding_e encoding;
|
||||
csubstr bom;
|
||||
};
|
||||
|
||||
const bomspec bomspecs[] = {
|
||||
// bare string causes problems in gcc5 and earlier
|
||||
{"NOBOM" , UTF8 , csubstr("", size_t(0))},
|
||||
{"UTF8" , UTF8 , csubstr("\xef\xbb\xbf", 3)},
|
||||
{"UTF16BE" , UTF16BE, csubstr("\xfe\xff", 2)},
|
||||
{"!UTF16BE-a" , UTF16BE, csubstr("\x00""a", 2)},
|
||||
{"!UTF16BE-b" , UTF16BE, csubstr("\x00""b", 2)},
|
||||
{"!UTF16BE-0" , UTF16BE, csubstr("\x00""0", 2)},
|
||||
{"UTF16LE" , UTF16LE, csubstr("\xff\xfe", 2)},
|
||||
{"!UTF16LE-a" , UTF16LE, csubstr("a""\x00" , 2)},
|
||||
{"!UTF16LE-b" , UTF16LE, csubstr("b""\x00" , 2)},
|
||||
{"!UTF16LE-0" , UTF16LE, csubstr("0""\x00" , 2)},
|
||||
{"UTF32BE" , UTF32BE, csubstr("\x00\x00\xfe\xff", 4)},
|
||||
{"!UTF32BE-a" , UTF32BE, csubstr("\x00\x00\x00""a" , 4)},
|
||||
{"!UTF32BE-b" , UTF32BE, csubstr("\x00\x00\x00""b" , 4)},
|
||||
{"!UTF32BE-0" , UTF32BE, csubstr("\x00\x00\x00""0" , 4)},
|
||||
{"UTF32LE" , UTF32LE, csubstr("\xff\xfe\x00\x00", 4)},
|
||||
{"!UTF32LE-a" , UTF32LE, csubstr("a""\x00\x00\x00" , 4)},
|
||||
{"!UTF32LE-b" , UTF32LE, csubstr("b""\x00\x00\x00" , 4)},
|
||||
{"!UTF32LE-0" , UTF32LE, csubstr("0""\x00\x00\x00" , 4)},
|
||||
};
|
||||
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_bom(bomspec const& bom, CreateFn &&createfn, TestFn &&testfn, bool with_docs=true)
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
SCOPED_TRACE(bom.name);
|
||||
{
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(buf);
|
||||
SCOPED_TRACE("single instance");
|
||||
#ifdef RYML_DBG
|
||||
std::cout << "------------------\n" << bom.name << "\n" << buf << "\n";
|
||||
#endif
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree.crootref(), bom);
|
||||
}
|
||||
if(with_docs)
|
||||
{
|
||||
SCOPED_TRACE(bom.name);
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
buf += "\n---\n";
|
||||
buf += std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(buf);
|
||||
SCOPED_TRACE("two docs");
|
||||
#ifdef RYML_DBG
|
||||
std::cout << "------------------\n" << bom.name << " x2\n" << buf << "\n";
|
||||
#endif
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
{
|
||||
SCOPED_TRACE("doc 0");
|
||||
std::forward<TestFn>(testfn)(parser, tree.docref(0), bom);
|
||||
}
|
||||
if(tree.num_children(tree.root_id()) > 1)
|
||||
{
|
||||
SCOPED_TRACE("doc 1");
|
||||
std::forward<TestFn>(testfn)(parser, tree.docref(1), bom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_bom_json(bomspec const& bom, CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
SCOPED_TRACE(bom.name);
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(buf);
|
||||
#ifdef RYML_DBG
|
||||
std::cout << "------------------\n" << bom.name << "\n" << buf << "\n";
|
||||
#endif
|
||||
Tree tree = parse_json_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree.crootref(), bom);
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
bom2_err_none,
|
||||
bom2_err_vs,
|
||||
bom2_err_any
|
||||
} bom2_err_e;
|
||||
using bom2spec = std::tuple<bomspec,bomspec>;
|
||||
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_bom2(bom2spec const& spec, CreateFn &&createfn, TestFn &&testfn, bom2_err_e err=bom2_err_vs)
|
||||
{
|
||||
bomspec const& bom1 = std::get<0>(spec);
|
||||
bomspec const& bom2 = std::get<1>(spec);
|
||||
SCOPED_TRACE(bom1.name);
|
||||
SCOPED_TRACE(bom2.name);
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom1, bom2);
|
||||
SCOPED_TRACE(buf);
|
||||
#ifdef RYML_DBG
|
||||
std::cout << "------------------\n" << bom1.name << " vs " << bom2.name << "\n" << buf << "\n";
|
||||
#endif
|
||||
if(err == bom2_err_none || (err == bom2_err_vs && (bom1.encoding == bom2.encoding || bom2.bom.empty())))
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree.crootref(), bom1, bom2);
|
||||
if(::testing::Test::HasFailure())
|
||||
{
|
||||
std::cout << "------------------\n" << bom1.name << " vs " << bom2.name << "\n" << buf << "\n";
|
||||
print_tree(tree);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pfn_error orig = get_callbacks().m_error;
|
||||
ExpectError::check_error([&]{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
Tree tree;
|
||||
ASSERT_EQ((pfn_error)tree.callbacks().m_error, (pfn_error)parser.callbacks().m_error);
|
||||
ASSERT_NE((pfn_error)tree.callbacks().m_error, orig);
|
||||
parse_in_arena(&parser, to_csubstr(buf), &tree);
|
||||
print_tree(tree);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
std::string namefor(bomspec const& param)
|
||||
{
|
||||
std::string s(param.name.str, param.name.len);
|
||||
substr ss = to_substr(s);
|
||||
ss.replace('!', '_');
|
||||
ss.replace('-', '_');
|
||||
return s;
|
||||
}
|
||||
std::string namefor(bom2spec const& param)
|
||||
{
|
||||
return c4::catrs<std::string>(namefor(std::get<0>(param)), "__vs__", namefor(std::get<1>(param)));
|
||||
}
|
||||
template<class T>
|
||||
std::string namefor(testing::TestParamInfo<T> const& pinfo)
|
||||
{
|
||||
return namefor(pinfo.param);
|
||||
}
|
||||
|
||||
template<class... Args>
|
||||
std::string mkstr(Args&& ...args)
|
||||
{
|
||||
return c4::formatrs<std::string>(std::forward<Args>(args)...);
|
||||
}
|
||||
} // namespace anon
|
||||
|
||||
|
||||
struct TestBOM : public testing::TestWithParam<bomspec> {};
|
||||
struct TestBOM2 : public testing::TestWithParam<bom2spec> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(byte_order_mark, TestBOM,
|
||||
testing::ValuesIn(bomspecs),
|
||||
namefor<bomspec>);
|
||||
INSTANTIATE_TEST_SUITE_P(byte_order_mark, TestBOM2,
|
||||
testing::Combine(testing::ValuesIn(bomspecs), testing::ValuesIn(bomspecs)),
|
||||
namefor<bom2spec>);
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
TEST_P(TestBOM, specs)
|
||||
{
|
||||
const bomspec p = GetParam();
|
||||
SCOPED_TRACE(p.name);
|
||||
|
||||
if(p.name == "NOBOM")
|
||||
return;
|
||||
if(!p.name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(c4::first_non_bom(p.bom), p.bom.len);
|
||||
EXPECT_EQ(c4::get_bom(p.bom), p.bom);
|
||||
EXPECT_EQ(c4::skip_bom(p.bom), "");
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(c4::first_non_bom(p.bom), 0u);
|
||||
EXPECT_EQ(c4::get_bom(p.bom), "");
|
||||
EXPECT_EQ(c4::skip_bom(p.bom), p.bom);
|
||||
}
|
||||
switch(p.encoding)
|
||||
{
|
||||
case UTF32BE:
|
||||
ASSERT_EQ(p.bom.len, 4u);
|
||||
if(!p.name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(p.bom.str[0], '\x00');
|
||||
EXPECT_EQ(p.bom.str[1], '\x00');
|
||||
EXPECT_EQ(p.bom.str[2], '\xfe');
|
||||
EXPECT_EQ(p.bom.str[3], '\xff');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(p.bom.str[0], '\x00');
|
||||
EXPECT_EQ(p.bom.str[1], '\x00');
|
||||
EXPECT_EQ(p.bom.str[2], '\x00');
|
||||
EXPECT_GE(p.bom.str[3], 0);
|
||||
EXPECT_LE(p.bom.str[3], 0x7f);
|
||||
}
|
||||
break;
|
||||
case UTF32LE:
|
||||
ASSERT_EQ(p.bom.len, 4u);
|
||||
if(!p.name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(p.bom.str[0], '\xff');
|
||||
EXPECT_EQ(p.bom.str[1], '\xfe');
|
||||
EXPECT_EQ(p.bom.str[2], '\x00');
|
||||
EXPECT_EQ(p.bom.str[3], '\x00');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_GE(p.bom.str[0], 0);
|
||||
EXPECT_LE(p.bom.str[0], 0x7f);
|
||||
EXPECT_EQ(p.bom.str[1], '\x00');
|
||||
EXPECT_EQ(p.bom.str[2], '\x00');
|
||||
EXPECT_EQ(p.bom.str[3], '\x00');
|
||||
}
|
||||
break;
|
||||
case UTF16BE:
|
||||
ASSERT_EQ(p.bom.len, 2u);
|
||||
if(!p.name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(p.bom.str[0], '\xfe');
|
||||
EXPECT_EQ(p.bom.str[1], '\xff');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(p.bom.str[0], '\x00');
|
||||
EXPECT_GE(p.bom.str[1], 0);
|
||||
EXPECT_LE(p.bom.str[1], 0x7f);
|
||||
}
|
||||
break;
|
||||
case UTF16LE:
|
||||
ASSERT_EQ(p.bom.len, 2u);
|
||||
if(!p.name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(p.bom.str[0], '\xff');
|
||||
EXPECT_EQ(p.bom.str[1], '\xfe');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_GE(p.bom.str[0], 0);
|
||||
EXPECT_LE(p.bom.str[0], 0x7f);
|
||||
EXPECT_EQ(p.bom.str[1], '\x00');
|
||||
}
|
||||
break;
|
||||
case UTF8:
|
||||
ASSERT_EQ(p.bom.len, 3u);
|
||||
EXPECT_EQ(p.bom.str[0], '\xef');
|
||||
EXPECT_EQ(p.bom.str[1], '\xbb');
|
||||
EXPECT_EQ(p.bom.str[2], '\xbf');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, only_bom)
|
||||
{
|
||||
test_bom(GetParam(),
|
||||
[](bomspec bom){
|
||||
return mkstr("{}", bom.bom);
|
||||
},
|
||||
[](Parser const& parser, ConstNodeRef const&, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, bom_and_scalar)
|
||||
{
|
||||
test_bom(GetParam(),
|
||||
[](bomspec const& bom){
|
||||
return mkstr("{}this is a scalar", bom.bom);
|
||||
},
|
||||
[](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(node.val(), "this is a scalar");
|
||||
});
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, scalar_and_bom)
|
||||
{
|
||||
auto mkscalar = [](bomspec const& bom){
|
||||
return mkstr("this is a scalar{}", bom.bom);
|
||||
};
|
||||
test_bom(GetParam(),
|
||||
mkscalar,
|
||||
[&](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), UTF8);
|
||||
EXPECT_EQ(node.val(), mkscalar(bom));
|
||||
});
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, scalar_bom_scalar)
|
||||
{
|
||||
auto mkscalar = [](bomspec const& bom){
|
||||
return mkstr("this is a scalar{}and it continues", bom.bom);
|
||||
};
|
||||
test_bom(GetParam(),
|
||||
mkscalar,
|
||||
[&](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), UTF8);
|
||||
EXPECT_EQ(node.val(), mkscalar(bom));
|
||||
});
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, bom_and_seq)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("[]");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_TRUE(node.is_seq());
|
||||
EXPECT_EQ(node.num_children(), 0);
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
test_bom_json(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, bom_and_map)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("{}");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_TRUE(node.is_map());
|
||||
EXPECT_EQ(node.num_children(), 0);
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
test_bom_json(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, bom_and_doc)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("---\nabc");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(node.doc(0).val(), "abc");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test, /*with_docs*/true);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_1)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("a: 1\nb: 2\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_map());
|
||||
ASSERT_EQ(node.num_children(), 2);
|
||||
EXPECT_EQ(node["a"].val(), "1");
|
||||
EXPECT_EQ(node["b"].val(), "2");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_2)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("- a\n- b\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_seq());
|
||||
ASSERT_EQ(node.num_children(), 2);
|
||||
EXPECT_EQ(node[0].val(), "a");
|
||||
EXPECT_EQ(node[1].val(), "b");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_3)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append(R"(? multiline
|
||||
scalar
|
||||
: c
|
||||
)");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_map());
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
EXPECT_EQ(node[0].key(), "multiline scalar");
|
||||
EXPECT_EQ(node[0].val(), "c");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_4)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append(R"(|-
|
||||
multiline
|
||||
scalar
|
||||
)");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_val());
|
||||
EXPECT_EQ(node.val(), "multiline\nscalar");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_5)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append(R"(>-
|
||||
multiline
|
||||
scalar
|
||||
)");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_val());
|
||||
EXPECT_EQ(node.val(), "multiline scalar");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
|
||||
TEST_P(TestBOM, github559_6)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("a:\n - 1\n - 2\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_map());
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
ASSERT_TRUE(node["a"].is_seq());
|
||||
ASSERT_EQ(node["a"].num_children(), 2);
|
||||
EXPECT_EQ(node["a"][0].val(), "1");
|
||||
EXPECT_EQ(node["a"][1].val(), "2");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_6_1)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append(" :\n - 1\n - 2\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_map());
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
ASSERT_TRUE(node[0].is_seq());
|
||||
EXPECT_EQ(node[0].key(), "");
|
||||
ASSERT_EQ(node[0].num_children(), 2);
|
||||
EXPECT_EQ(node[0][0].val(), "1");
|
||||
EXPECT_EQ(node[0][1].val(), "2");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_7)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("- a: 1\n b: 2\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_seq());
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
ASSERT_TRUE(node[0].is_map());
|
||||
ASSERT_EQ(node[0].num_children(), 2);
|
||||
EXPECT_EQ(node[0]["a"].val(), "1");
|
||||
EXPECT_EQ(node[0]["b"].val(), "2");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_8)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("- : 1\n b: 2\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_seq());
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
ASSERT_TRUE(node[0].is_map());
|
||||
ASSERT_EQ(node[0].num_children(), 2);
|
||||
EXPECT_EQ(node[0][0].key(), "");
|
||||
EXPECT_EQ(node[0][0].val(), "1");
|
||||
EXPECT_EQ(node[0][1].key(), "b");
|
||||
EXPECT_EQ(node[0][1].val(), "2");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM, github559_9)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("- ? key\n : val\n ? key1\n : val1\n");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_TRUE(node.is_seq());
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
ASSERT_TRUE(node[0].is_map());
|
||||
ASSERT_EQ(node[0].num_children(), 2);
|
||||
EXPECT_EQ(node[0][0].key(), "key");
|
||||
EXPECT_EQ(node[0][0].val(), "val");
|
||||
EXPECT_EQ(node[0][1].key(), "key1");
|
||||
EXPECT_EQ(node[0][1].val(), "val1");
|
||||
};
|
||||
test_bom(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
TEST_P(TestBOM2, bom_doc_bom_0)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}\n---\n{}abc", bom1.bom, bom2.bom);
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom1, bomspec const&){
|
||||
EXPECT_EQ(parser.encoding(), bom1.encoding);
|
||||
EXPECT_EQ(node[0].val(), "abc");
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM2, bom_doc_bom_1)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}---\n{}\nabc", bom1.bom, bom2.bom);
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom1, bomspec const&){
|
||||
EXPECT_EQ(parser.encoding(), bom1.encoding);
|
||||
EXPECT_EQ(node[0].val(), "abc");
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM2, bom_doc_bom_2)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}---\n{}\nabc", bom1.bom, bom2.bom);
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom1, bomspec const&){
|
||||
EXPECT_EQ(parser.encoding(), bom1.encoding);
|
||||
EXPECT_EQ(node[0].val(), "abc");
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM2, bom_doc_bom_3)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}---{} abc", bom1.bom, bom2.bom);
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom1, bomspec const& bom2){
|
||||
EXPECT_EQ(parser.encoding(), bom1.encoding);
|
||||
if(bom2.name == "NOBOM")
|
||||
{
|
||||
EXPECT_EQ(node[0].val(), "abc");
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string expected = mkstr("---{} abc", bom2.bom);
|
||||
EXPECT_EQ(node.val(), expected);
|
||||
}
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test, bom2_err_none);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
TEST_P(TestBOM2, bom_scalar_doc_bom_scalar_1)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}abc\n---\n{}def\n", bom1.bom, bom2.bom);
|
||||
};
|
||||
auto test = [](Parser const& parser, ConstNodeRef const& node, bomspec const& bom1, bomspec const&){
|
||||
EXPECT_EQ(parser.encoding(), bom1.encoding);
|
||||
ASSERT_EQ(node.num_children(), 2);
|
||||
EXPECT_EQ(node[0].val(), "abc");
|
||||
EXPECT_EQ(node[1].val(), "def");
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test);
|
||||
}
|
||||
|
||||
namespace {
|
||||
static void test_bom_scalar_doc_bom_scalar_2_fn(Parser const& parser, ConstNodeRef const& node, bomspec const& bom1, bomspec const& bom2)
|
||||
{
|
||||
EXPECT_EQ(parser.encoding(), bom1.encoding);
|
||||
if(bom1.name == "NOBOM")
|
||||
{
|
||||
if(bom2.name == "NOBOM")
|
||||
{
|
||||
ASSERT_EQ(node.num_children(), 2);
|
||||
EXPECT_EQ(node[0].val(), "abc");
|
||||
EXPECT_EQ(node[1].val(), "def");
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_EQ(node.num_children(), 1);
|
||||
EXPECT_EQ(node[0].val(), mkstr("abc ---{} def", bom2.bom));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(bom2.name == "NOBOM")
|
||||
{
|
||||
ASSERT_EQ(node.num_children(), 2);
|
||||
EXPECT_EQ(node[0].val(), mkstr("---{} abc", bom1.bom));
|
||||
EXPECT_EQ(node[1].val(), "def");
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_TRUE(node.is_val());
|
||||
EXPECT_EQ(node.val(), mkstr("---{} abc ---{} def", bom1.bom, bom2.bom));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace anon
|
||||
|
||||
TEST_P(TestBOM2, bom_scalar_doc_bom_scalar_2)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}---{} abc\n---{} def\n", bom1.bom, bom1.bom, bom2.bom);
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test_bom_scalar_doc_bom_scalar_2_fn, bom2_err_none);
|
||||
}
|
||||
|
||||
TEST_P(TestBOM2, bom_scalar_doc_bom_scalar_3)
|
||||
{
|
||||
auto mkyaml = [](bomspec const& bom1, bomspec const& bom2){
|
||||
return mkstr("{}---{}\nabc\n---{}\ndef\n", bom1.bom, bom1.bom, bom2.bom);
|
||||
};
|
||||
test_bom2(GetParam(), mkyaml, test_bom_scalar_doc_bom_scalar_2_fn, bom2_err_none);
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------
|
||||
// this is needed to use the test case library
|
||||
Case const* get_case(csubstr /*name*/)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace yml
|
||||
} // namespace c4
|
||||
@@ -48,326 +48,6 @@ scalar
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
struct bomspec
|
||||
{
|
||||
csubstr name; Encoding_e encoding; csubstr bom;
|
||||
void checkchars() const
|
||||
{
|
||||
if(name == "NOBOM")
|
||||
return;
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(c4::first_non_bom(bom), bom.len);
|
||||
EXPECT_EQ(c4::get_bom(bom), bom);
|
||||
EXPECT_EQ(c4::skip_bom(bom), "");
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(c4::first_non_bom(bom), 0u);
|
||||
EXPECT_EQ(c4::get_bom(bom), "");
|
||||
EXPECT_EQ(c4::skip_bom(bom), bom);
|
||||
}
|
||||
switch(encoding)
|
||||
{
|
||||
case UTF32BE:
|
||||
ASSERT_EQ(bom.len, 4u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\x00');
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
EXPECT_EQ(bom.str[2], '\xfe');
|
||||
EXPECT_EQ(bom.str[3], '\xff');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\x00');
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
EXPECT_EQ(bom.str[2], '\x00');
|
||||
EXPECT_GE(bom.str[3], 0);
|
||||
EXPECT_LE(bom.str[3], 0x7f);
|
||||
}
|
||||
break;
|
||||
case UTF32LE:
|
||||
ASSERT_EQ(bom.len, 4u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\xff');
|
||||
EXPECT_EQ(bom.str[1], '\xfe');
|
||||
EXPECT_EQ(bom.str[2], '\x00');
|
||||
EXPECT_EQ(bom.str[3], '\x00');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_GE(bom.str[0], 0);
|
||||
EXPECT_LE(bom.str[0], 0x7f);
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
EXPECT_EQ(bom.str[2], '\x00');
|
||||
EXPECT_EQ(bom.str[3], '\x00');
|
||||
}
|
||||
break;
|
||||
case UTF16BE:
|
||||
ASSERT_EQ(bom.len, 2u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\xfe');
|
||||
EXPECT_EQ(bom.str[1], '\xff');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\x00');
|
||||
EXPECT_GE(bom.str[1], 0);
|
||||
EXPECT_LE(bom.str[1], 0x7f);
|
||||
}
|
||||
break;
|
||||
case UTF16LE:
|
||||
ASSERT_EQ(bom.len, 2u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\xff');
|
||||
EXPECT_EQ(bom.str[1], '\xfe');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_GE(bom.str[0], 0);
|
||||
EXPECT_LE(bom.str[0], 0x7f);
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
}
|
||||
break;
|
||||
case UTF8:
|
||||
ASSERT_EQ(bom.len, 3u);
|
||||
EXPECT_EQ(bom.str[0], '\xef');
|
||||
EXPECT_EQ(bom.str[1], '\xbb');
|
||||
EXPECT_EQ(bom.str[2], '\xbf');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
const bomspec boms[] = {
|
||||
{"NOBOM" , UTF8, ""},
|
||||
{"UTF8" , UTF8, "\xef\xbb\xbf"},
|
||||
{"UTF16BE" , UTF16BE, "\xfe\xff"},
|
||||
{"!UTF16BE-a" , UTF16BE, csubstr("\x00""a", 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16BE-b" , UTF16BE, csubstr("\x00""b", 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16BE-0" , UTF16BE, csubstr("\x00""0", 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"UTF16LE" , UTF16LE, "\xff\xfe"},
|
||||
{"!UTF16LE-a" , UTF16LE, csubstr("a""\x00" , 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16LE-b" , UTF16LE, csubstr("b""\x00" , 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16LE-0" , UTF16LE, csubstr("0""\x00" , 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"UTF32BE" , UTF32BE, csubstr("\x00\x00\xfe\xff", 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32BE-a" , UTF32BE, csubstr("\x00\x00\x00""a" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32BE-b" , UTF32BE, csubstr("\x00\x00\x00""b" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32BE-0" , UTF32BE, csubstr("\x00\x00\x00""0" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"UTF32LE" , UTF32LE, csubstr("\xff\xfe\x00\x00", 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32LE-a" , UTF32LE, csubstr("a""\x00\x00\x00" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32LE-b" , UTF32LE, csubstr("b""\x00\x00\x00" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32LE-0" , UTF32LE, csubstr("0""\x00\x00\x00" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
};
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_boms(CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
for(const bomspec bom : boms)
|
||||
{
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(bom.name);
|
||||
SCOPED_TRACE(buf);
|
||||
bom.checkchars();
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree, bom);
|
||||
}
|
||||
}
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_boms_json(CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
for(const bomspec bom : boms)
|
||||
{
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(bom.name);
|
||||
SCOPED_TRACE(buf);
|
||||
bom.checkchars();
|
||||
Tree tree = parse_json_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree, bom);
|
||||
}
|
||||
}
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_boms2(CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
for(const bomspec bom1 : boms)
|
||||
{
|
||||
SCOPED_TRACE(bom1.name);
|
||||
bom1.checkchars();
|
||||
for(const bomspec bom2 : boms)
|
||||
{
|
||||
SCOPED_TRACE(bom2.name);
|
||||
bom2.checkchars();
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom1, bom2);
|
||||
SCOPED_TRACE(buf);
|
||||
if(bom1.encoding == bom2.encoding || bom2.bom.empty())
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree, bom1);
|
||||
}
|
||||
else
|
||||
{
|
||||
pfn_error orig = get_callbacks().m_error;
|
||||
ExpectError::check_error([&]{
|
||||
Tree tree;
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
ASSERT_EQ((pfn_error)tree.callbacks().m_error, (pfn_error)parser.callbacks().m_error);
|
||||
ASSERT_NE((pfn_error)tree.callbacks().m_error, orig);
|
||||
parse_in_arena(&parser, to_csubstr(buf), &tree);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, only_bom)
|
||||
{
|
||||
test_boms(
|
||||
[](bomspec bom){
|
||||
return std::string(bom.bom.str, bom.bom.len);
|
||||
},
|
||||
[](Parser const& parser, Tree const&, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_scalar)
|
||||
{
|
||||
test_boms(
|
||||
[](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("this is a scalar");
|
||||
return yaml;
|
||||
},
|
||||
[](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(tree.rootref().val(), "this is a scalar");
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, scalar_and_bom)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml("this is a scalar");
|
||||
yaml.append(bom.bom.str, bom.bom.len);
|
||||
return yaml;
|
||||
};
|
||||
test_boms(mkyaml,
|
||||
[&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), UTF8);
|
||||
EXPECT_EQ(tree.rootref().val(), mkyaml(bom));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, scalar_bom_scalar)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml("this is a scalar");
|
||||
yaml.append(bom.bom.str, bom.bom.len);
|
||||
yaml.append("and it continues");
|
||||
return yaml;
|
||||
};
|
||||
test_boms(mkyaml,
|
||||
[&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), UTF8);
|
||||
EXPECT_EQ(tree.rootref().val(), mkyaml(bom));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_seq)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("[]");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const&, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
};
|
||||
test_boms(mkyaml, test);
|
||||
test_boms_json(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_map)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("{}");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const&, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
};
|
||||
test_boms(mkyaml, test);
|
||||
test_boms_json(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_doc)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("---\nabc");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(tree.docref(0).val(), "abc");
|
||||
};
|
||||
test_boms(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_doc_bom)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom1, bomspec bom2){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("---\n");
|
||||
yaml.append(bom2.bom.str, bom2.bom.len);
|
||||
yaml.append("abc");
|
||||
std::cout << bom1.name << " vs " << bom2.name << "\n" << yaml << "\n";
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(tree.docref(0).val(), "abc");
|
||||
};
|
||||
test_boms2(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_scalar_doc_bom_scalar)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom1, bomspec bom2){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("abc\n");
|
||||
yaml.append("---\n");
|
||||
yaml.append(bom2.bom.str, bom2.bom.len);
|
||||
yaml.append("def\n");
|
||||
std::cout << bom1.name << " vs " << bom2.name << "\n" << yaml << "\n";
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_EQ(tree.rootref().num_children(), 2);
|
||||
EXPECT_EQ(tree.docref(0).val(), "abc");
|
||||
EXPECT_EQ(tree.docref(1).val(), "def");
|
||||
};
|
||||
test_boms2(mkyaml, test);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user