mirror of
https://github.com/biojppm/rapidyaml.git
synced 2026-01-18 21:41:18 +01:00
ryml-parse-emit: improve command-line options
This commit is contained in:
@@ -73,7 +73,7 @@ void ReferenceResolver::gather_anchors_and_refs__(id_type n)
|
||||
}
|
||||
else if(m_tree->is_key_ref(n))
|
||||
{
|
||||
_c4dbgpf("node[{}]: key ref: '{}'", n, m_tree->key_ref(n));
|
||||
_c4dbgpf("node[{}]: instance[{}]: key ref: '{}', key='{}'", n, m_refs.size(), m_tree->key_ref(n), m_tree->has_key(n) ? m_tree->key(n) : csubstr{"-"});
|
||||
_RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->key(n) != "<<");
|
||||
_RYML_CB_CHECK(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n)));
|
||||
m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
|
||||
@@ -150,6 +150,9 @@ id_type ReferenceResolver::lookup_(RefData *C4_RESTRICT ra)
|
||||
while(ra->prev_anchor != NONE)
|
||||
{
|
||||
ra = &m_refs[ra->prev_anchor];
|
||||
_c4dbgpf("instance[{}:node{}]: lookup '{}' at [{}:node{}]: keyref='{}' valref='{}'", instance, node, refname, ra-m_refs.m_stack, ra->node,
|
||||
(m_tree->has_key_anchor(ra->node) ? m_tree->key_anchor(ra->node) : csubstr("~")),
|
||||
(m_tree->has_val_anchor(ra->node) ? m_tree->val_anchor(ra->node) : csubstr("~")));
|
||||
if(m_tree->has_anchor(ra->node, refname))
|
||||
return ra->node;
|
||||
}
|
||||
@@ -167,18 +170,8 @@ void ReferenceResolver::reset_(Tree *t_)
|
||||
m_tree = t_;
|
||||
}
|
||||
|
||||
void ReferenceResolver::resolve(Tree *t_)
|
||||
void ReferenceResolver::resolve_()
|
||||
{
|
||||
_c4dbgp("resolving references...");
|
||||
|
||||
reset_(t_);
|
||||
|
||||
_c4dbg_tree("unresolved tree", *m_tree);
|
||||
|
||||
gather_anchors_and_refs_();
|
||||
if(m_refs.empty())
|
||||
return;
|
||||
|
||||
/* from the specs: "an alias node refers to the most recent
|
||||
* node in the serialization having the specified anchor". So
|
||||
* we need to start looking upward from ref nodes.
|
||||
@@ -273,17 +266,46 @@ void ReferenceResolver::resolve(Tree *t_)
|
||||
}
|
||||
}
|
||||
}
|
||||
_c4dbg_tree("after insertion", *m_tree);
|
||||
}
|
||||
_c4dbgp("modifying tree: finished");
|
||||
}
|
||||
|
||||
void ReferenceResolver::resolve(Tree *t_)
|
||||
{
|
||||
_c4dbgp("resolving references...");
|
||||
|
||||
reset_(t_);
|
||||
|
||||
_c4dbg_tree("unresolved tree", *m_tree);
|
||||
|
||||
gather_anchors_and_refs_();
|
||||
if(m_refs.empty())
|
||||
return;
|
||||
resolve_();
|
||||
_c4dbg_tree("resolved tree", *m_tree);
|
||||
|
||||
// clear anchors and refs
|
||||
_c4dbgp("clearing anchors/refs");
|
||||
for(auto const& C4_RESTRICT ar : m_refs)
|
||||
{
|
||||
m_tree->rem_anchor_ref(ar.node);
|
||||
if(ar.parent_ref != NONE)
|
||||
if(m_tree->type(ar.parent_ref) != NOTYPE)
|
||||
m_tree->remove(ar.parent_ref);
|
||||
_c4dbgp("clearing anchors/refs");
|
||||
auto clear_ = [this]{
|
||||
for(auto const& C4_RESTRICT ar : m_refs)
|
||||
{
|
||||
m_tree->rem_anchor_ref(ar.node);
|
||||
if(ar.parent_ref != NONE)
|
||||
if(m_tree->type(ar.parent_ref) != NOTYPE)
|
||||
m_tree->remove(ar.parent_ref);
|
||||
}
|
||||
};
|
||||
clear_();
|
||||
// some of the elements injected during the resolution may
|
||||
// have nested anchors; these anchors will have been newly
|
||||
// injected during the resolution; collect again, and clear
|
||||
// again, to ensure those are also cleared:
|
||||
gather_anchors_and_refs_();
|
||||
clear_();
|
||||
_c4dbgp("clearing anchors/refs: finished");
|
||||
}
|
||||
_c4dbgp("clearing anchors/refs: finished");
|
||||
|
||||
|
||||
@@ -29,9 +29,19 @@ struct RYML_EXPORT ReferenceResolver
|
||||
*
|
||||
* So, depending on the number of anchor/alias nodes, this is a
|
||||
* potentially expensive operation, with a best-case linear
|
||||
* complexity (from the initial traversal).
|
||||
* complexity (from the initial traversal). This potential cost is
|
||||
* one of the reasons for requiring an explicit call.
|
||||
*
|
||||
* The @ref Tree has an `Tree::resolve()` overload set forwarding
|
||||
* here. Previously this operation was done there, using a
|
||||
* discarded object; using this separate class offers opportunity
|
||||
* for reuse of the object.
|
||||
*
|
||||
* @warning resolving references opens an attack vector when the
|
||||
* data is malicious or severely malformed, as the tree can expand
|
||||
* exponentially. See for example the [Billion Laughs
|
||||
* Attack](https://en.wikipedia.org/wiki/Billion_laughs_attack).
|
||||
*
|
||||
* @todo verify sanity against anchor-ref attacks (https://en.wikipedia.org/wiki/Billion_laughs_attack )
|
||||
*/
|
||||
void resolve(Tree *t_);
|
||||
|
||||
@@ -50,6 +60,7 @@ public:
|
||||
};
|
||||
|
||||
void reset_(Tree *t_);
|
||||
void resolve_();
|
||||
void gather_anchors_and_refs_();
|
||||
void gather_anchors_and_refs__(id_type n);
|
||||
id_type count_anchors_and_refs_(id_type n);
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include <ryml_all.hpp>
|
||||
#else
|
||||
#include <c4/yml/std/std.hpp>
|
||||
#include <c4/yml/detail/print.hpp>
|
||||
#include <c4/yml/parse.hpp>
|
||||
#include <c4/yml/emit.hpp>
|
||||
#include <c4/yml/common.hpp>
|
||||
@@ -20,46 +21,32 @@ c4::csubstr jmp_msg = {};
|
||||
#endif
|
||||
|
||||
|
||||
using namespace c4;
|
||||
|
||||
|
||||
C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
|
||||
C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
|
||||
|
||||
|
||||
bool quiet = false;
|
||||
bool emit_as_json = false;
|
||||
bool timed_sections = false;
|
||||
bool emit_to_string = false;
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
struct timed_section
|
||||
{
|
||||
using myclock = std::chrono::steady_clock;
|
||||
using msecs = std::chrono::duration<double, std::milli>;
|
||||
|
||||
csubstr name;
|
||||
myclock::time_point start;
|
||||
|
||||
msecs since() const { return myclock::now() - start; }
|
||||
timed_section(csubstr n) : name(n), start(myclock::now()) {}
|
||||
~timed_section()
|
||||
{
|
||||
if(timed_sections)
|
||||
{
|
||||
fprintf(stderr, "%.6fms: %.*s\n", since().count(), (int)name.len, name.str);
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define TS(name) timed_section C4_XCAT(__, C4_XCAT(name, __LINE__))(#name)
|
||||
using namespace c4;
|
||||
|
||||
// LCOV_EXCL_START
|
||||
|
||||
constexpr const char *usage = R"(usage:
|
||||
struct Args
|
||||
{
|
||||
c4::csubstr filename = "-";
|
||||
c4::yml::id_type reserve_size = false;
|
||||
bool resolve_refs = false;
|
||||
bool keep_refs = false;
|
||||
bool print_tree = false;
|
||||
bool quiet = false;
|
||||
bool emit_as_json = false;
|
||||
bool emit_to_string = false;
|
||||
bool timed_sections = false;
|
||||
};
|
||||
void print_usage(const char *exename)
|
||||
{
|
||||
const Args defs = {};
|
||||
fprintf(stderr, R"(usage:
|
||||
|
||||
%s <options> <path/to/file.yaml>
|
||||
|
||||
@@ -67,44 +54,75 @@ Parse yaml from file (or stdin when file is `-`) and emit to stdout.
|
||||
|
||||
Options:
|
||||
|
||||
-q,--quiet do not emit (default: emit yaml)
|
||||
-d,--debug print rapidyaml tree
|
||||
-j,--json emit json instead of yaml (default: emit yaml)
|
||||
-s,--string emit to string before dumping to stdout.
|
||||
otherwise, emit directly to stdout
|
||||
-t,--timed time sections (print timings to stderr)
|
||||
-e [N],--reserve [N] reserve tree size before parsing (default: N=%d):
|
||||
0=do not reserve
|
||||
1=reserve by estimating size
|
||||
all other values=reserve with value
|
||||
-r,--resolve resolve references (default: %s)
|
||||
-k,--keep-refs keep refs and anchors after resolving (default: %s)
|
||||
-p,--print-tree print parsed rapidyaml tree before emitting (default: %s)
|
||||
-q,--quiet do not emit (default: %s)
|
||||
-j,--json emit json instead of yaml (default: %s)
|
||||
-s,--string emit to string before dumping to stdout.
|
||||
otherwise, emit directly to stdout (default: %s)
|
||||
-t,--timed time sections (print timings to stderr) (default: %s)
|
||||
|
||||
)";
|
||||
|
||||
csubstr parse_args(int argc, const char *argv[])
|
||||
)",
|
||||
exename,
|
||||
(int)defs.reserve_size,
|
||||
defs.resolve_refs ? "resolve refs" : "do not resolve refs",
|
||||
defs.keep_refs ? "keep refs" : "remove refs",
|
||||
defs.print_tree ? "print tree" : "do not print tree",
|
||||
defs.quiet ? "do not emit" : "emit",
|
||||
defs.emit_as_json ? "emit as json" : "emit as yaml",
|
||||
defs.emit_to_string ? "emit to string" : "no",
|
||||
defs.timed_sections ? "show timings" : "no"
|
||||
);
|
||||
}
|
||||
bool timing_enabled = false;
|
||||
Args parse_args(int argc, const char *argv[])
|
||||
{
|
||||
if(argc < 2)
|
||||
{
|
||||
printf(usage, argv[0]);
|
||||
yml::error("unknown argument");
|
||||
print_usage(argv[0]);
|
||||
c4::yml::error("missing filename (use - to read from stdin)");
|
||||
}
|
||||
csubstr file = to_csubstr(argv[argc - 1]);
|
||||
Args args = {};
|
||||
args.filename = c4::to_csubstr(argv[argc - 1]);
|
||||
for(int i = 1; i+1 < argc; ++i)
|
||||
{
|
||||
csubstr arg = to_csubstr(argv[i]);
|
||||
if(arg == "-q" || arg == "--quiet")
|
||||
quiet = true;
|
||||
else if(arg == "-t" || arg == "--timed")
|
||||
timed_sections = true;
|
||||
else if(arg == "-s" || arg == "--string")
|
||||
emit_to_string = true;
|
||||
else if(arg == "-j" || arg == "--json")
|
||||
emit_as_json = true;
|
||||
c4::csubstr arg = c4::to_csubstr(argv[i]);
|
||||
auto arg0_is = [&](c4::csubstr argshort, c4::csubstr arglong){
|
||||
return (arg == argshort) || (arg == arglong);
|
||||
};
|
||||
auto arg1_is = [&](c4::csubstr argshort, c4::csubstr arglong){
|
||||
if(arg0_is(argshort, arglong))
|
||||
{
|
||||
if(i + 1 >= argc)
|
||||
c4::yml::error("missing argument value");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if /**/(arg1_is("-e", "--reserve")) C4_CHECK(c4::from_chars(c4::to_csubstr(argv[++i]), &args.reserve_size));
|
||||
else if(arg1_is("-r", "--resolve")) args.resolve_refs = true;
|
||||
else if(arg0_is("-k", "--keep-refs")) args.keep_refs = true;
|
||||
else if(arg0_is("-p", "--print-tree")) args.print_tree = true;
|
||||
else if(arg0_is("-q", "--quiet")) args.quiet = true;
|
||||
else if(arg0_is("-j", "--json")) args.emit_as_json = true;
|
||||
else if(arg0_is("-s", "--string")) args.emit_to_string = true;
|
||||
else if(arg0_is("-t", "--timed")) args.timed_sections = true;
|
||||
else
|
||||
{
|
||||
printf(usage, argv[0]);
|
||||
yml::error("unknown argument");
|
||||
print_usage(argv[0]);
|
||||
c4::yml::error("unknown argument");
|
||||
}
|
||||
}
|
||||
return file;
|
||||
timing_enabled = args.timed_sections;
|
||||
return args;
|
||||
}
|
||||
|
||||
void load_file(csubstr filename, std::string *buf)
|
||||
void read_file(csubstr filename, std::string *buf)
|
||||
{
|
||||
buf->clear();
|
||||
if(filename == "-") // read from stdin
|
||||
@@ -120,7 +138,7 @@ void load_file(csubstr filename, std::string *buf)
|
||||
{
|
||||
if(!fs::path_exists(filename.str))
|
||||
{
|
||||
std::fprintf(stderr, "cannot find file: %s (cwd=%s)\n", filename.str, fs::cwd<std::string>().c_str());
|
||||
std::fprintf(stderr, "cannot find file: %s (cwd=%s)\n", filename.str, fs::cwd<std::string>().c_str()); // NOLINT
|
||||
yml::error("file not found");
|
||||
}
|
||||
fs::file_get_contents<std::string>(filename.str, buf);
|
||||
@@ -138,6 +156,7 @@ void emit_json_docs(yml::Tree const& tree, std::string *dst=nullptr)
|
||||
else
|
||||
{
|
||||
emit_json(node, stdout);
|
||||
printf("\n");
|
||||
}
|
||||
};
|
||||
yml::ConstNodeRef root = tree.rootref();
|
||||
@@ -152,17 +171,17 @@ void report_error(const char* msg, size_t length, yml::Location loc, FILE *f)
|
||||
{
|
||||
if(!loc.name.empty())
|
||||
{
|
||||
fwrite(loc.name.str, 1, loc.name.len, f);
|
||||
fputc(':', f);
|
||||
fwrite(loc.name.str, 1, loc.name.len, f); // NOLINT
|
||||
fputc(':', f); // NOLINT
|
||||
}
|
||||
fprintf(f, "%zu:", loc.line);
|
||||
fprintf(f, "%zu:", loc.line); // NOLINT
|
||||
if(loc.col)
|
||||
fprintf(f, "%zu:", loc.col);
|
||||
fprintf(f, "%zu:", loc.col); // NOLINT
|
||||
if(loc.offset)
|
||||
fprintf(f, " (%zuB):", loc.offset);
|
||||
fputc(' ', f);
|
||||
fprintf(f, "%.*s\n", static_cast<int>(length), msg);
|
||||
fflush(f);
|
||||
fprintf(f, " (%zuB):", loc.offset); // NOLINT
|
||||
fputc(' ', f); // NOLINT
|
||||
fprintf(f, "%.*s\n", static_cast<int>(length), msg); // NOLINT
|
||||
fflush(f); // NOLINT
|
||||
}
|
||||
|
||||
yml::Callbacks create_custom_callbacks()
|
||||
@@ -181,60 +200,95 @@ yml::Callbacks create_custom_callbacks()
|
||||
return callbacks;
|
||||
}
|
||||
|
||||
#define TS(name) timed_section C4_XCAT(__, C4_XCAT(name, __LINE__))(#name)
|
||||
struct timed_section
|
||||
{
|
||||
using myclock = std::chrono::steady_clock;
|
||||
using msecs = std::chrono::duration<double, std::milli>;
|
||||
|
||||
csubstr name;
|
||||
myclock::time_point start;
|
||||
|
||||
msecs since() const { return myclock::now() - start; }
|
||||
timed_section(csubstr n)
|
||||
: name(n)
|
||||
, start(timing_enabled ? myclock::now() : myclock::time_point{})
|
||||
{}
|
||||
~timed_section()
|
||||
{
|
||||
if(timing_enabled)
|
||||
{
|
||||
fprintf(stderr, "%.6fms: %.*s\n", since().count(), (int)name.len, name.str); // NOLINT
|
||||
fflush(stderr); // NOLINT
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void process_file(csubstr file)
|
||||
void process_file(Args const& args)
|
||||
{
|
||||
TS(objects);
|
||||
std::string contents;
|
||||
yml::Tree tree(yml::get_callbacks());
|
||||
{
|
||||
TS(read_file);
|
||||
load_file(file, &contents);
|
||||
read_file(args.filename, &contents);
|
||||
}
|
||||
if(args.reserve_size)
|
||||
{
|
||||
TS(tree_reserve);
|
||||
yml::id_type cap;
|
||||
yml::id_type cap = args.reserve_size;
|
||||
if(args.reserve_size)
|
||||
{
|
||||
TS(estimate_capacity);
|
||||
cap = yml::estimate_tree_capacity(to_csubstr(contents));
|
||||
}
|
||||
fprintf(stderr, "reserving capacity=%zu\n", (size_t)cap);
|
||||
if(timing_enabled)
|
||||
fprintf(stderr, "reserving capacity=%zu\n", (size_t)cap); // NOLINT
|
||||
tree.reserve(cap);
|
||||
}
|
||||
{
|
||||
TS(parse_yml);
|
||||
yml::parse_in_place(file, to_substr(contents), &tree);
|
||||
yml::parse_in_place(args.filename, to_substr(contents), &tree);
|
||||
}
|
||||
if(emit_as_json)
|
||||
if(args.print_tree)
|
||||
{
|
||||
print_tree(args.filename.str, tree); // safe because we are getting from argv which is zero-terminated
|
||||
}
|
||||
if(args.resolve_refs || args.emit_as_json)
|
||||
{
|
||||
TS(resolve_refs);
|
||||
tree.resolve();
|
||||
if(args.print_tree)
|
||||
{
|
||||
print_tree("resolved tree", tree);
|
||||
}
|
||||
}
|
||||
if(emit_to_string)
|
||||
if(args.emit_to_string)
|
||||
{
|
||||
std::string output;
|
||||
{
|
||||
TS(emit_to_buffer);
|
||||
output.resize(contents.size()); // resize, not just reserve
|
||||
if(!emit_as_json)
|
||||
if(!args.emit_as_json)
|
||||
yml::emitrs_yaml(tree, &output);
|
||||
else
|
||||
emit_json_docs(tree, &output);
|
||||
}
|
||||
if(!quiet)
|
||||
if(!args.quiet)
|
||||
{
|
||||
TS(print_stdout);
|
||||
fwrite(output.data(), 1, output.size(), stdout);
|
||||
fwrite(output.data(), 1, output.size(), stdout); // NOLINT
|
||||
}
|
||||
}
|
||||
else if(!quiet)
|
||||
else if(!args.quiet)
|
||||
{
|
||||
TS(emit_to_stdout);
|
||||
if(!emit_as_json)
|
||||
if(!args.emit_as_json)
|
||||
yml::emit_yaml(tree);
|
||||
else
|
||||
emit_json_docs(tree);
|
||||
@@ -244,16 +298,16 @@ void process_file(csubstr file)
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
const Args args = parse_args(argc, argv);
|
||||
TS(TOTAL);
|
||||
set_callbacks(create_custom_callbacks());
|
||||
C4_IF_EXCEPTIONS_(try, if(setjmp(jmp_env) == 0))
|
||||
{
|
||||
csubstr file = parse_args(argc, argv);
|
||||
process_file(file);
|
||||
process_file(args);
|
||||
}
|
||||
C4_IF_EXCEPTIONS_(catch(std::exception const&), else)
|
||||
C4_IF_EXCEPTIONS_(catch(std::exception const&), else) // LCOV_EXCL_LINE
|
||||
{
|
||||
return 1;
|
||||
return 1; // LCOV_EXCL_LINE
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user