mirror of
https://github.com/biojppm/rapidyaml.git
synced 2026-01-18 21:41:18 +01:00
re #476: handle byte order marks
This commit is contained in:
@@ -761,6 +761,11 @@ following situations:
|
||||
reflects the usual practice of having at most 1 or 2 tag directives;
|
||||
also, be aware that this feature is under consideration for removal
|
||||
in YAML 1.3.
|
||||
* Byte Order Marks: while ryml correctly handles BOMs at the beginning
|
||||
of the stream or documents (as per the standard), BOMs inside
|
||||
scalars are ignored. The [standard mandates that they should be
|
||||
quoted](https://yaml.org/spec/1.2.2/#52-character-encodings) when
|
||||
emitted, this is not done.
|
||||
* ryml tends to be on the permissive side in several cases where the
|
||||
YAML standard dictates that there should be an error; in many of these
|
||||
cases, ryml will tolerate the input. This may be good or bad, but in
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
- add workarounds for problems with codegen of gcc 11,12,13.
|
||||
- improve CI coverage of gcc and clang optimization levels.
|
||||
- [BREAKING] Fix [#477](https://github.com/biojppm/rapidyaml/issues/477) ([PR#479](https://github.com/biojppm/rapidyaml/pull/479)): changed `read<std::map>()` to overwrite existing entries. The provided implementations had an inconsistency between `std::map` (which wasn't overwriting) and `std::vector` (which *was* overwriting).
|
||||
- Fix [#476](https://github.com/biojppm/rapidyaml/issues/476) [PR#493](https://github.com/biojppm/rapidyaml/pull/493): add handling of Byte Order Marks.
|
||||
- [PR#492](https://github.com/biojppm/rapidyaml/pull/492): fix emit of explicit keys when indented:
|
||||
```yaml
|
||||
fixed:
|
||||
|
||||
@@ -419,6 +419,20 @@ struct RYML_EXPORT Callbacks
|
||||
/** @} */
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
typedef enum {
|
||||
NOBOM,
|
||||
UTF8,
|
||||
UTF16LE,
|
||||
UTF16BE,
|
||||
UTF32LE,
|
||||
UTF32BE,
|
||||
} Encoding_e;
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
@@ -523,7 +537,7 @@ namespace detail {
|
||||
template<int8_t signedval, uint8_t unsignedval>
|
||||
struct _charconstant_t
|
||||
: public std::conditional<std::is_signed<char>::value,
|
||||
std::integral_constant<int8_t, signedval>,
|
||||
std::integral_constant<int8_t, static_cast<int8_t>(unsignedval)>,
|
||||
std::integral_constant<uint8_t, unsignedval>>::type
|
||||
{};
|
||||
#define _RYML_CHCONST(signedval, unsignedval) ::c4::yml::detail::_charconstant_t<INT8_C(signedval), UINT8_C(unsignedval)>::value
|
||||
|
||||
@@ -73,7 +73,7 @@ inline void _dbg_dumper(csubstr s)
|
||||
inline substr _dbg_buf() noexcept
|
||||
{
|
||||
static char writebuf[2048];
|
||||
return writebuf;
|
||||
return substr{writebuf, sizeof(writebuf)}; // g++-5 has trouble with return writebuf;
|
||||
}
|
||||
template<class ...Args>
|
||||
C4_NO_INLINE void _dbg_printf(c4::csubstr fmt, Args const& ...args)
|
||||
|
||||
@@ -5,13 +5,13 @@
|
||||
#include "c4/error.hpp"
|
||||
#include "c4/charconv.hpp"
|
||||
#include "c4/utf.hpp"
|
||||
#include <c4/dump.hpp>
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "c4/yml/detail/parser_dbg.hpp"
|
||||
#include "c4/yml/filter_processor.hpp"
|
||||
#ifdef RYML_DBG
|
||||
#include <c4/dump.hpp>
|
||||
#include "c4/yml/detail/print.hpp"
|
||||
#endif
|
||||
|
||||
@@ -261,6 +261,9 @@ ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions
|
||||
, m_evt_handler(evt_handler)
|
||||
, m_pending_anchors()
|
||||
, m_pending_tags()
|
||||
, m_was_inside_qmrk(false)
|
||||
, m_doc_empty(false)
|
||||
, m_encoding(NOBOM)
|
||||
, m_newline_offsets()
|
||||
, m_newline_offsets_size(0)
|
||||
, m_newline_offsets_capacity(0)
|
||||
@@ -277,6 +280,9 @@ ParseEngine<EventHandler>::ParseEngine(ParseEngine &&that) noexcept
|
||||
, m_evt_handler(that.m_evt_handler)
|
||||
, m_pending_anchors(that.m_pending_anchors)
|
||||
, m_pending_tags(that.m_pending_tags)
|
||||
, m_was_inside_qmrk(false)
|
||||
, m_doc_empty(false)
|
||||
, m_encoding(NOBOM)
|
||||
, m_newline_offsets(that.m_newline_offsets)
|
||||
, m_newline_offsets_size(that.m_newline_offsets_size)
|
||||
, m_newline_offsets_capacity(that.m_newline_offsets_capacity)
|
||||
@@ -293,6 +299,9 @@ ParseEngine<EventHandler>::ParseEngine(ParseEngine const& that)
|
||||
, m_evt_handler(that.m_evt_handler)
|
||||
, m_pending_anchors(that.m_pending_anchors)
|
||||
, m_pending_tags(that.m_pending_tags)
|
||||
, m_was_inside_qmrk(false)
|
||||
, m_doc_empty(false)
|
||||
, m_encoding(NOBOM)
|
||||
, m_newline_offsets()
|
||||
, m_newline_offsets_size()
|
||||
, m_newline_offsets_capacity()
|
||||
@@ -317,6 +326,9 @@ ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine &&th
|
||||
m_evt_handler = that.m_evt_handler;
|
||||
m_pending_anchors = that.m_pending_anchors;
|
||||
m_pending_tags = that.m_pending_tags;
|
||||
m_was_inside_qmrk = that.m_was_inside_qmrk;
|
||||
m_doc_empty = that.m_doc_empty;
|
||||
m_encoding = that.m_encoding;
|
||||
m_newline_offsets = (that.m_newline_offsets);
|
||||
m_newline_offsets_size = (that.m_newline_offsets_size);
|
||||
m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
|
||||
@@ -337,6 +349,9 @@ ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine cons
|
||||
m_evt_handler = that.m_evt_handler;
|
||||
m_pending_anchors = that.m_pending_anchors;
|
||||
m_pending_tags = that.m_pending_tags;
|
||||
m_was_inside_qmrk = that.m_was_inside_qmrk;
|
||||
m_doc_empty = that.m_doc_empty;
|
||||
m_encoding = that.m_encoding;
|
||||
if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
|
||||
_resize_locations(that.m_newline_offsets_capacity);
|
||||
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
|
||||
@@ -357,6 +372,9 @@ void ParseEngine<EventHandler>::_clr()
|
||||
m_evt_handler = {};
|
||||
m_pending_anchors = {};
|
||||
m_pending_tags = {};
|
||||
m_was_inside_qmrk = false;
|
||||
m_doc_empty = true;
|
||||
m_encoding = NOBOM;
|
||||
m_newline_offsets = {};
|
||||
m_newline_offsets_size = {};
|
||||
m_newline_offsets_capacity = {};
|
||||
@@ -385,11 +403,12 @@ void ParseEngine<EventHandler>::_reset()
|
||||
m_pending_anchors = {};
|
||||
m_pending_tags = {};
|
||||
m_doc_empty = true;
|
||||
m_was_inside_qmrk = false;
|
||||
m_encoding = NOBOM;
|
||||
if(m_options.locations())
|
||||
{
|
||||
_prepare_locations();
|
||||
}
|
||||
m_was_inside_qmrk = false;
|
||||
}
|
||||
|
||||
|
||||
@@ -4351,6 +4370,72 @@ void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
|
||||
}
|
||||
}
|
||||
|
||||
template<class EventHandler>
|
||||
bool ParseEngine<EventHandler>::_handle_bom()
|
||||
{
|
||||
const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
|
||||
if(rem.len)
|
||||
{
|
||||
const csubstr rest = rem.sub(1);
|
||||
// https://yaml.org/spec/1.2.2/#52-character-encodings
|
||||
#define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
|
||||
if(rem.begins_with({"\x00\x00\xfe\xff", 4}) || (rem.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF32BE");
|
||||
_handle_bom(UTF32BE);
|
||||
_line_progressed(4);
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xff\xfe\x00\x00") || (rest.begins_with({"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF32LE");
|
||||
_handle_bom(UTF32LE);
|
||||
_line_progressed(4);
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF16BE");
|
||||
_handle_bom(UTF16BE);
|
||||
_line_progressed(2);
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF16LE");
|
||||
_handle_bom(UTF16LE);
|
||||
_line_progressed(2);
|
||||
return true;
|
||||
}
|
||||
else if(rem.begins_with("\xef\xbb\xbf"))
|
||||
{
|
||||
_c4dbgp("byte order mark: UTF8");
|
||||
_handle_bom(UTF8);
|
||||
_line_progressed(3);
|
||||
return true;
|
||||
}
|
||||
#undef _rymlisascii
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class EventHandler>
|
||||
void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
|
||||
{
|
||||
if(m_encoding == NOBOM)
|
||||
{
|
||||
const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
|
||||
if(enc == UTF8 || is_beginning_of_file)
|
||||
m_encoding = enc;
|
||||
else
|
||||
_c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
|
||||
}
|
||||
else if(enc != m_encoding)
|
||||
{
|
||||
_c4err("byte order mark can only be set once");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
@@ -7202,6 +7287,10 @@ void ParseEngine<EventHandler>::_handle_unk_json()
|
||||
_set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
|
||||
_line_progressed(1);
|
||||
}
|
||||
else if(_handle_bom())
|
||||
{
|
||||
_c4dbgp("byte order mark");
|
||||
}
|
||||
else
|
||||
{
|
||||
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL));
|
||||
@@ -7288,8 +7377,15 @@ void ParseEngine<EventHandler>::_handle_unk()
|
||||
|
||||
if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
|
||||
{
|
||||
const char first = rem.str[0];
|
||||
_c4dbgp("rtop: zero indent + at line begin");
|
||||
if(_handle_bom())
|
||||
{
|
||||
_c4dbgp("byte order mark!");
|
||||
rem = m_evt_handler->m_curr->line_contents.rem;
|
||||
if(!rem.len)
|
||||
return;
|
||||
}
|
||||
const char first = rem.str[0];
|
||||
if(first == '-')
|
||||
{
|
||||
_c4dbgp("rtop: suspecting doc");
|
||||
|
||||
@@ -366,6 +366,10 @@ public:
|
||||
/** Get the latest YAML buffer parsed by this object. */
|
||||
csubstr source() const { return m_buf; }
|
||||
|
||||
/** Get the encoding of the latest YAML buffer parsed by this object.
|
||||
* If no encoding was specified, UTF8 is assumed as per the YAML standard. */
|
||||
Encoding_e encoding() const { return m_encoding != NOBOM ? m_encoding : UTF8; }
|
||||
|
||||
id_type stack_capacity() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.capacity(); }
|
||||
size_t locations_capacity() const { return m_newline_offsets_capacity; }
|
||||
|
||||
@@ -714,6 +718,8 @@ private:
|
||||
void _handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line);
|
||||
size_t _select_indentation_from_annotations(size_t val_indentation, size_t val_line);
|
||||
void _handle_directive(csubstr rem);
|
||||
bool _handle_bom();
|
||||
void _handle_bom(Encoding_e enc);
|
||||
|
||||
void _check_tag(csubstr tag);
|
||||
|
||||
@@ -738,6 +744,8 @@ private:
|
||||
bool m_was_inside_qmrk;
|
||||
bool m_doc_empty = true;
|
||||
|
||||
Encoding_e m_encoding = UTF8;
|
||||
|
||||
private:
|
||||
|
||||
size_t *m_newline_offsets;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "./test_lib/test_group.hpp"
|
||||
#include "./test_lib/test_group.def.hpp"
|
||||
#include <c4/utf.hpp>
|
||||
|
||||
namespace c4 {
|
||||
namespace yml {
|
||||
@@ -47,6 +48,326 @@ scalar
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
struct bomspec
|
||||
{
|
||||
csubstr name; Encoding_e encoding; csubstr bom;
|
||||
void checkchars() const
|
||||
{
|
||||
if(name == "NOBOM")
|
||||
return;
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(c4::first_non_bom(bom), bom.len);
|
||||
EXPECT_EQ(c4::get_bom(bom), bom);
|
||||
EXPECT_EQ(c4::skip_bom(bom), "");
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(c4::first_non_bom(bom), 0u);
|
||||
EXPECT_EQ(c4::get_bom(bom), "");
|
||||
EXPECT_EQ(c4::skip_bom(bom), bom);
|
||||
}
|
||||
switch(encoding)
|
||||
{
|
||||
case UTF32BE:
|
||||
ASSERT_EQ(bom.len, 4u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\x00');
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
EXPECT_EQ(bom.str[2], '\xfe');
|
||||
EXPECT_EQ(bom.str[3], '\xff');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\x00');
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
EXPECT_EQ(bom.str[2], '\x00');
|
||||
EXPECT_GE(bom.str[3], 0);
|
||||
EXPECT_LE(bom.str[3], 0x7f);
|
||||
}
|
||||
break;
|
||||
case UTF32LE:
|
||||
ASSERT_EQ(bom.len, 4u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\xff');
|
||||
EXPECT_EQ(bom.str[1], '\xfe');
|
||||
EXPECT_EQ(bom.str[2], '\x00');
|
||||
EXPECT_EQ(bom.str[3], '\x00');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_GE(bom.str[0], 0);
|
||||
EXPECT_LE(bom.str[0], 0x7f);
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
EXPECT_EQ(bom.str[2], '\x00');
|
||||
EXPECT_EQ(bom.str[3], '\x00');
|
||||
}
|
||||
break;
|
||||
case UTF16BE:
|
||||
ASSERT_EQ(bom.len, 2u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\xfe');
|
||||
EXPECT_EQ(bom.str[1], '\xff');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\x00');
|
||||
EXPECT_GE(bom.str[1], 0);
|
||||
EXPECT_LE(bom.str[1], 0x7f);
|
||||
}
|
||||
break;
|
||||
case UTF16LE:
|
||||
ASSERT_EQ(bom.len, 2u);
|
||||
if(!name.begins_with('!'))
|
||||
{
|
||||
EXPECT_EQ(bom.str[0], '\xff');
|
||||
EXPECT_EQ(bom.str[1], '\xfe');
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_GE(bom.str[0], 0);
|
||||
EXPECT_LE(bom.str[0], 0x7f);
|
||||
EXPECT_EQ(bom.str[1], '\x00');
|
||||
}
|
||||
break;
|
||||
case UTF8:
|
||||
ASSERT_EQ(bom.len, 3u);
|
||||
EXPECT_EQ(bom.str[0], '\xef');
|
||||
EXPECT_EQ(bom.str[1], '\xbb');
|
||||
EXPECT_EQ(bom.str[2], '\xbf');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
const bomspec boms[] = {
|
||||
{"NOBOM" , UTF8, ""},
|
||||
{"UTF8" , UTF8, "\xef\xbb\xbf"},
|
||||
{"UTF16BE" , UTF16BE, "\xfe\xff"},
|
||||
{"!UTF16BE-a" , UTF16BE, csubstr("\x00""a", 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16BE-b" , UTF16BE, csubstr("\x00""b", 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16BE-0" , UTF16BE, csubstr("\x00""0", 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"UTF16LE" , UTF16LE, "\xff\xfe"},
|
||||
{"!UTF16LE-a" , UTF16LE, csubstr("a""\x00" , 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16LE-b" , UTF16LE, csubstr("b""\x00" , 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF16LE-0" , UTF16LE, csubstr("0""\x00" , 2)}, // bare string causes problems in gcc5 and earlier
|
||||
{"UTF32BE" , UTF32BE, csubstr("\x00\x00\xfe\xff", 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32BE-a" , UTF32BE, csubstr("\x00\x00\x00""a" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32BE-b" , UTF32BE, csubstr("\x00\x00\x00""b" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32BE-0" , UTF32BE, csubstr("\x00\x00\x00""0" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"UTF32LE" , UTF32LE, csubstr("\xff\xfe\x00\x00", 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32LE-a" , UTF32LE, csubstr("a""\x00\x00\x00" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32LE-b" , UTF32LE, csubstr("b""\x00\x00\x00" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
{"!UTF32LE-0" , UTF32LE, csubstr("0""\x00\x00\x00" , 4)}, // bare string causes problems in gcc5 and earlier
|
||||
};
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_boms(CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
for(const bomspec bom : boms)
|
||||
{
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(bom.name);
|
||||
SCOPED_TRACE(buf);
|
||||
bom.checkchars();
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree, bom);
|
||||
}
|
||||
}
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_boms_json(CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
for(const bomspec bom : boms)
|
||||
{
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom);
|
||||
SCOPED_TRACE(bom.name);
|
||||
SCOPED_TRACE(buf);
|
||||
bom.checkchars();
|
||||
Tree tree = parse_json_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree, bom);
|
||||
}
|
||||
}
|
||||
template<class CreateFn, class TestFn>
|
||||
void test_boms2(CreateFn &&createfn, TestFn &&testfn)
|
||||
{
|
||||
for(const bomspec bom1 : boms)
|
||||
{
|
||||
SCOPED_TRACE(bom1.name);
|
||||
bom1.checkchars();
|
||||
for(const bomspec bom2 : boms)
|
||||
{
|
||||
SCOPED_TRACE(bom2.name);
|
||||
bom2.checkchars();
|
||||
std::string buf = std::forward<CreateFn>(createfn)(bom1, bom2);
|
||||
SCOPED_TRACE(buf);
|
||||
if(bom1.encoding == bom2.encoding || bom2.bom.empty())
|
||||
{
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
Tree tree = parse_in_arena(&parser, to_csubstr(buf));
|
||||
std::forward<TestFn>(testfn)(parser, tree, bom1);
|
||||
}
|
||||
else
|
||||
{
|
||||
pfn_error orig = get_callbacks().m_error;
|
||||
ExpectError::check_error([&]{
|
||||
Tree tree;
|
||||
Parser::handler_type handler;
|
||||
Parser parser(&handler);
|
||||
ASSERT_EQ((pfn_error)tree.callbacks().m_error, (pfn_error)parser.callbacks().m_error);
|
||||
ASSERT_NE((pfn_error)tree.callbacks().m_error, orig);
|
||||
parse_in_arena(&parser, to_csubstr(buf), &tree);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, only_bom)
|
||||
{
|
||||
test_boms(
|
||||
[](bomspec bom){
|
||||
return std::string(bom.bom.str, bom.bom.len);
|
||||
},
|
||||
[](Parser const& parser, Tree const&, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_scalar)
|
||||
{
|
||||
test_boms(
|
||||
[](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("this is a scalar");
|
||||
return yaml;
|
||||
},
|
||||
[](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(tree.rootref().val(), "this is a scalar");
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, scalar_and_bom)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml("this is a scalar");
|
||||
yaml.append(bom.bom.str, bom.bom.len);
|
||||
return yaml;
|
||||
};
|
||||
test_boms(mkyaml,
|
||||
[&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), UTF8);
|
||||
EXPECT_EQ(tree.rootref().val(), mkyaml(bom));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, scalar_bom_scalar)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml("this is a scalar");
|
||||
yaml.append(bom.bom.str, bom.bom.len);
|
||||
yaml.append("and it continues");
|
||||
return yaml;
|
||||
};
|
||||
test_boms(mkyaml,
|
||||
[&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), UTF8);
|
||||
EXPECT_EQ(tree.rootref().val(), mkyaml(bom));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_seq)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("[]");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const&, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
};
|
||||
test_boms(mkyaml, test);
|
||||
test_boms_json(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_map)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("{}");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const&, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
};
|
||||
test_boms(mkyaml, test);
|
||||
test_boms_json(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_and_doc)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom){
|
||||
std::string yaml(bom.bom.str, bom.bom.len);
|
||||
yaml.append("---\nabc");
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(tree.docref(0).val(), "abc");
|
||||
};
|
||||
test_boms(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_doc_bom)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom1, bomspec bom2){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("---\n");
|
||||
yaml.append(bom2.bom.str, bom2.bom.len);
|
||||
yaml.append("abc");
|
||||
std::cout << bom1.name << " vs " << bom2.name << "\n" << yaml << "\n";
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
EXPECT_EQ(tree.docref(0).val(), "abc");
|
||||
};
|
||||
test_boms2(mkyaml, test);
|
||||
}
|
||||
|
||||
TEST(byte_order_mark, bom_scalar_doc_bom_scalar)
|
||||
{
|
||||
auto mkyaml = [](bomspec bom1, bomspec bom2){
|
||||
std::string yaml(bom1.bom.str, bom1.bom.len);
|
||||
yaml.append("abc\n");
|
||||
yaml.append("---\n");
|
||||
yaml.append(bom2.bom.str, bom2.bom.len);
|
||||
yaml.append("def\n");
|
||||
std::cout << bom1.name << " vs " << bom2.name << "\n" << yaml << "\n";
|
||||
return yaml;
|
||||
};
|
||||
auto test = [&](Parser const& parser, Tree const& tree, bomspec bom){
|
||||
EXPECT_EQ(parser.encoding(), bom.encoding);
|
||||
ASSERT_EQ(tree.rootref().num_children(), 2);
|
||||
EXPECT_EQ(tree.docref(0).val(), "abc");
|
||||
EXPECT_EQ(tree.docref(1).val(), "def");
|
||||
};
|
||||
test_boms2(mkyaml, test);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
@@ -216,7 +216,7 @@ ExpectError::ExpectError(Tree *tree, Location loc)
|
||||
: m_got_an_error(false)
|
||||
, m_tree(tree)
|
||||
, m_glob_prev(get_callbacks())
|
||||
, m_tree_prev(tree ? tree->callbacks() : Callbacks{})
|
||||
, m_tree_prev(tree ? tree->callbacks() : m_glob_prev)
|
||||
, expected_location(loc)
|
||||
{
|
||||
auto err = [](const char* msg, size_t len, Location errloc, void *this_) {
|
||||
@@ -230,20 +230,23 @@ ExpectError::ExpectError(Tree *tree, Location loc)
|
||||
);
|
||||
C4_UNREACHABLE_AFTER_ERR();
|
||||
};
|
||||
pfn_error perr = err;
|
||||
#ifdef RYML_NO_DEFAULT_CALLBACKS
|
||||
c4::yml::Callbacks tcb((void*)this, nullptr, nullptr, err);
|
||||
c4::yml::Callbacks gcb((void*)this, nullptr, nullptr, err);
|
||||
c4::yml::Callbacks tcb((void*)this, nullptr, nullptr, perr);
|
||||
c4::yml::Callbacks gcb((void*)this, nullptr, nullptr, perr);
|
||||
#else
|
||||
c4::yml::Callbacks tcb((void*)this, tree ? m_tree_prev.m_allocate : nullptr, tree ? m_tree_prev.m_free : nullptr, err);
|
||||
c4::yml::Callbacks gcb((void*)this, m_glob_prev.m_allocate, m_glob_prev.m_free, err);
|
||||
c4::yml::Callbacks tcb((void*)this, tree ? m_tree_prev.m_allocate : nullptr, tree ? m_tree_prev.m_free : nullptr, perr);
|
||||
c4::yml::Callbacks gcb((void*)this, m_glob_prev.m_allocate, m_glob_prev.m_free, perr);
|
||||
#endif
|
||||
if(tree)
|
||||
{
|
||||
_c4dbgp("setting error callback: tree");
|
||||
_c4dbgpf("setting error callback: tree err={}", c4::fmt::hex(perr));
|
||||
tree->callbacks(tcb);
|
||||
EXPECT_EQ(tree->callbacks().m_error, perr);
|
||||
}
|
||||
_c4dbgp("setting error callback: global");
|
||||
_c4dbgpf("setting error callback: global err={}", c4::fmt::hex(perr));
|
||||
set_callbacks(gcb);
|
||||
EXPECT_EQ(get_callbacks().m_error, perr);
|
||||
}
|
||||
|
||||
ExpectError::~ExpectError()
|
||||
|
||||
Reference in New Issue
Block a user