python API: unit tests, benchmark

This commit is contained in:
Joao Paulo Magalhaes
2019-03-19 00:26:21 +00:00
parent 6d0c042647
commit 7afb30a524
7 changed files with 284 additions and 13 deletions

View File

@@ -116,6 +116,14 @@ if(RYML_BUILD_API_PYTHON3)
endfunction()
add_python_test(parse.py)
#if(RYML_BUILD_BENCHMARKS)
# c4_add_benchmark_cmd(ryml ryml-python3-api-bm-travis-ryml
# COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/../bm/cases/travis.yml ryml)
# c4_add_benchmark_cmd(ryml ryml-python3-api-bm-appveyor-ryml
# COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/../bm/cases/appveyor.yml ryml)
# c4_add_benchmark_cmd(ryml ryml-python3-api-bm-compile_commands-ryml
# COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/../bm/cases/compile_commands.json ryml)
#endif()
endif()

View File

@@ -7,8 +7,18 @@ class SimpleHardcoded:
yaml = "{HELLO: a, foo: b, bar: c, baz: d, seq: [0, 1, 2, 3]}"
def check(self, ut, t):
for i, sib in enumerate(ryml.siblings(t, 5)):
s = t.key(sib)
r = [b"HELLO", b"foo", b"bar", b"baz", b"seq"][i]
print("'{}' vs '{}': {}, {}".format(s, r, s == r, s is not r))
s = str(t.key(sib), "utf8")
r = ["HELLO", "foo", "bar", "baz", "seq"][i]
print("'{}' vs '{}': {}, {}".format(s, r, s == r, s is not r))
# some convenient shorthands
eq = ut.assertEqual
ne = ut.assertNotEqual
fs = ut.assertFalse
tr = ut.assertTrue
#
@@ -95,24 +105,45 @@ class SimpleHardcoded:
eq(num, 5)
eq(num, t.num_siblings(t.first_child(t.root_id())))
#
for i, ch in enumerate(ryml.children(t, 5)):
eq(t.val(ch), [b"0", b"1", b"2", b"3"][i])
sibs = [b"HELLO", b"foo", b"bar", b"baz", b"seq"]
sibs_s = ["HELLO", "foo", "bar", "baz", "seq"]
for i, sib in enumerate(ryml.siblings(t, 5)):
k = t.key(sib)
k_s = str(k, "utf8")
eq(k, sibs[i])
eq(k_s, sibs_s[i])
ne(k, sibs_s[i])
ne(k_s, sibs[i])
k_s = str(k)
ne(k_s, sibs_s[i])
ne(k_s, sibs[i])
num = 0
for id in ryml.siblings(t, 0):
num += 1
eq(num, 1)
#
num = 0
for id in ryml.walk(t):
for id, level in ryml.walk(t):
num += 1
if t.is_root(id):
eq(id, 0)
eq(level, 0)
if t.is_map(id):
eq(id, 0)
eq(level, 0)
if t.is_seq(id):
eq(id, 5)
eq(level, 1)
if t.is_keyval(id):
tr(id > 0 and id < 5)
if t.is_val(id):
tr(id > 5)
eq(level, 2)
eq(num, t.size())
#
num = 0

128
api/python/parse_bm.py Normal file
View File

@@ -0,0 +1,128 @@
import ryml
import ruamel.yaml
import yaml
import timeit
import time
import prettytable
from collections import OrderedDict as odict
class RunResults:
__slots__ = ('name', 'count', 'time', 'avg', 'MBps', 'timeit')
def __init__(self, name, time, count, MB, timeit):
self.name = name
self.time = time
self.count = count
self.avg = time / count
self.MBps = MB / self.time / 1000.0
self.timeit = timeit
def __str__(self):
fmt = "{}: count={} time={:.3f}ms avg={:.3f}ms MB/s={:.3f}"
fmt = fmt.format(self.name, self.count, self.time, self.avg, self.MBps)
return fmt
class BmCase:
def __init__(self, filename):
with open(filename, "r") as f:
src = f.read()
self.src_as_str = src
self.src_as_bytes = bytes(src, "utf8")
self.src_as_bytearray = bytearray(src, "utf8")
def run(self, bm_name, cls):
obj = cls()
method = getattr(obj, bm_name)
self.count = 0
self.MB = 0
def fn():
method(self)
self.count += 1
self.MB += len(self.src_as_str)
t = timeit.Timer(fn)
delta = time.time()
result = t.autorange()
delta = 1000. * (time.time() - delta)
name = bm_name + ":" + cls.__name__
return RunResults(name, delta, self.count, self.MB, result)
class RymlRo:
def parse(self, case):
r = ryml.parse(case.src_as_bytearray)
class RymlRoReuse:
def __init__(self):
self.tree = ryml.Tree()
def parse(self, case):
ryml.parse(case.src_as_bytearray, tree=ryml.Tree())
class RymlInSitu:
def parse(self, case):
r = ryml.parse_in_situ(case.src_as_bytearray)
class RymlInSituReuse:
def __init__(self):
self.tree = ryml.Tree()
def parse(self, case):
self.tree.clear()
self.tree.clear_arena()
ryml.parse_in_situ(case.src_as_bytearray, tree=self.tree)
class RuamelYaml:
def parse(self, case):
r = ruamel.yaml.load(case.src_as_str, Loader=ruamel.yaml.Loader)
class PyYaml:
def parse(self, case):
r = yaml.safe_load(case.src_as_str)
def run(filename):
case = BmCase(filename)
approaches = (RuamelYaml,
PyYaml,
RymlRo,
RymlRoReuse,
RymlInSitu,
RymlInSituReuse)
benchmarks = ('parse', )
for bm in benchmarks:
results = odict()
for cls in approaches:
r = case.run(bm, cls)
results[r.name] = r
print(r)
table = prettytable.PrettyTable()
table.field_names = ["case", "count", "time(ms)", "avg(ms)", "avg_read(MB/s)"]
table.align["case"] = "l"
def f(v): return "{:.3f}".format(v)
for v in results.values():
table.add_row([v.name, v.count, f(v.time), f(v.avg), f(v.MBps)])
print(table)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
raise Exception("")
filename = sys.argv[1]
run(filename)

View File

@@ -0,0 +1,3 @@
ruamel.yaml
pyyaml
prettytable

View File

@@ -100,16 +100,16 @@ using csubstr = c4::csubstr;
void parse_csubstr(c4::csubstr s, c4::yml::Tree *t)
{
printf("PARSE READONLY: s=%.*s\n", (int)s.len, s.str);
//printf("PARSE READONLY: s=%.*s\n", (int)s.len, s.str);
c4::yml::parse(s, t);
printf("PARSE READONLY OK: tree size=%zu\n", t->size());
//printf("PARSE READONLY OK: tree size=%zu\n", t->size());
}
void parse_substr(c4::substr s, c4::yml::Tree *t)
{
printf("PARSE INPLACE: s=%.*s\n", (int)s.len, s.str);
//printf("PARSE INPLACE: s=%.*s\n", (int)s.len, s.str);
c4::yml::parse(s, t);
printf("PARSE INPLACE OK: tree size=%zu\n", t->size());
//printf("PARSE INPLACE OK: tree size=%zu\n", t->size());
}
%}
@@ -137,14 +137,14 @@ def siblings(tree, node):
ch = tree.next_sibling(ch)
def walk(tree, node=None):
def walk(tree, node=None, indentation_level=0):
assert tree is not None
if node is None: node = tree.root_id()
yield node
yield node, indentation_level
ch = tree.first_child(node)
while ch != NONE:
for gc in walk(tree, ch):
yield gc
for gc, il in walk(tree, ch, indentation_level + 1):
yield gc, il
ch = tree.next_sibling(ch)