Files
tinyusdz/benchmark_old_value.cc
Syoyo Fujita e1fa06a761 Integrate Value32 implementation and adapt to value-opt branch
This commit integrates the optimized 32-byte Value implementation from the
value-opt-32 branch and adapts it to be compatible with the value-opt branch's
recent refactorings (array type system, TimeSamples, POD matrix types).

## Key Changes

### Array Type System Compatibility
- Update from TYPE_ID_1D_ARRAY_BIT to new dual-bit system:
  * TYPE_ID_STL_ARRAY_BIT (bit 20) for std::vector arrays
  * TYPE_ID_TYPED_ARRAY_BIT (bit 21) for TypedArray/ChunkedTypedArray
  * TYPE_ID_ARRAY_BIT_MASK for detecting any array type
- Add array_bit() method to TypeTraits for all array types
- Proper dual-bit marking for TypedArray types (both STL and TYPED bits)

### Matrix Types Refactoring
- Convert all 6 matrix types to trivial/POD-compatible structs:
  * matrix2f, matrix3f, matrix4f, matrix2d, matrix3d, matrix4d
- Replace custom constructors with = default
- Add explicit copy/move constructors/operators as = default
- Add static identity() methods for creating identity matrices
- Enables efficient memcpy and compatibility with TimeSamples POD requirements

### Matrix Comparison Operators
- Add operator== for all 6 matrix types using math::is_close()
- Required for TimeSamples array deduplication
- Proper floating-point comparison with tolerance

### Build System
- Add missing src/tydra/bone-util.{cc,hh} to CMakeLists.txt
- Fixes undefined reference to ReduceBoneInfluences()
- Update .gitignore to prevent build artifact commits

### Value32 Implementation Files
- Add value-types-handler.{cc,hh} - Handler-based value type system
- Add value-types-new.{cc,hh} - New 32-byte Value implementation
- Add value-debug-trace.hh - Debug tracing utilities
- Add test_value32.cc - Value32 unit tests
- Add benchmark files for performance comparison

### Documentation
- Add comprehensive design and analysis documents (10 .md files)
- Include performance benchmarks and comparisons
- Document std::any and linb::any analysis
- Add test results summary

## Testing

All tests pass successfully:
- CTest: 3/3 tests passed (100%)
- Unit tests: 27/27 tests passed (100%)
- USD file parsing: 6/6 files tested successfully (USDA and USDC)
- Tydra render scene conversion: Working correctly

## Compatibility

Maintains full backward compatibility:
- All existing tests continue to pass
- No regressions in USD parsing (USDA, USDC, USDZ)
- Tydra conversion still functional
- Compatible with recent TimeSamples and array refactoring

Modified files: 6 (+1040/-118 lines)
New files: 18 (5263 lines)
Total changes: +5263/-118 lines

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 21:15:58 +09:00

216 lines
6.7 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Benchmark for original linb::any-based Value implementation
// Compare against Value32 results
#include <iostream>
#include <chrono>
#include <string>
#include <iomanip>
// Use original Value (linb::any-based)
#include "src/value-types.hh"
using namespace tinyusdz::value;
class Timer {
public:
Timer() : start_(std::chrono::high_resolution_clock::now()) {}
double elapsed_ms() const {
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start_).count();
}
private:
std::chrono::high_resolution_clock::time_point start_;
};
constexpr size_t ITERATIONS = 1000000;
constexpr size_t ACCESS_ITERATIONS = 10000000;
void print_header() {
std::cout << "=== Original Value (linb::any) Performance Benchmark ===\n\n";
std::cout << "sizeof(Value) = " << sizeof(Value) << " bytes\n";
std::cout << "Stack storage in linb::any: 2 * sizeof(void*) = " << (2 * sizeof(void*)) << " bytes\n";
std::cout << "Iterations: " << ITERATIONS << " (10M for access test)\n\n";
std::cout << std::left << std::setw(40) << "Operation"
<< std::right << std::setw(15) << "Time (ms)"
<< std::setw(20) << "ns/op"
<< std::setw(15) << "Mop/s\n";
std::cout << std::string(90, '-') << "\n";
}
void print_result(const char* name, double ms, size_t iterations) {
double ns_per_op = (ms * 1000000.0) / iterations;
double mops = iterations / (ms * 1000.0);
std::cout << std::left << std::setw(40) << name
<< std::right << std::setw(15) << std::fixed << std::setprecision(3) << ms
<< std::setw(20) << std::setprecision(2) << ns_per_op
<< std::setw(15) << std::setprecision(2) << mops << "\n";
}
int main() {
print_header();
volatile int sink_i = 0;
volatile double sink_d = 0;
volatile size_t sink_s = 0;
// 1. Construct inline (int32_t) - 4 bytes, fits in 16-byte stack
{
Timer timer;
for (size_t i = 0; i < ITERATIONS; ++i) {
Value v(int32_t(42));
const int32_t* ptr = v.as<int32_t>();
if (ptr) sink_i += *ptr;
}
print_result("Construct inline (int32_t)", timer.elapsed_ms(), ITERATIONS);
}
// 2. Construct inline (double) - 8 bytes, fits in 16-byte stack
{
Timer timer;
for (size_t i = 0; i < ITERATIONS; ++i) {
Value v(3.14159);
const double* ptr = v.as<double>();
if (ptr) sink_d += *ptr;
}
print_result("Construct inline (double)", timer.elapsed_ms(), ITERATIONS);
}
// 3. Construct heap (std::string) - typically 32 bytes, uses heap
{
Timer timer;
for (size_t i = 0; i < ITERATIONS / 10; ++i) {
std::string str = "Hello, World!";
Value v(str);
const std::string* ptr = v.as<std::string>();
if (ptr) sink_s += ptr->size();
}
print_result("Construct heap (std::string)", timer.elapsed_ms(), ITERATIONS / 10);
}
// 4. Copy (inline) - int32_t
// Note: Direct Value copy triggers template recursion, so we test construct+destruct cost
{
Timer timer;
for (size_t i = 0; i < ITERATIONS; ++i) {
Value v1(int32_t(42));
Value v2(int32_t(42)); // Construct again (copy semantics)
const int32_t* ptr = v2.as<int32_t>();
if (ptr) sink_i += *ptr;
}
print_result("Copy (inline int32_t) [construct×2]", timer.elapsed_ms(), ITERATIONS);
}
// 5. Copy (heap) - std::string
{
Timer timer;
for (size_t i = 0; i < ITERATIONS / 10; ++i) {
std::string str = "test";
Value v1(str);
Value v2(str); // Construct again (copy semantics)
const std::string* ptr = v2.as<std::string>();
if (ptr) sink_s += ptr->size();
}
print_result("Copy (heap std::string) [construct×2]", timer.elapsed_ms(), ITERATIONS / 10);
}
// 6. Move (inline) - int32_t
// Note: linb::any move is same as copy for inline types, measure construct cost
{
Timer timer;
for (size_t i = 0; i < ITERATIONS; ++i) {
Value v1(int32_t(42));
const int32_t* ptr = v1.as<int32_t>();
if (ptr) sink_i += *ptr;
}
print_result("Move (inline int32_t) [construct only]", timer.elapsed_ms(), ITERATIONS);
}
// 7. Move (heap) - std::string
{
Timer timer;
for (size_t i = 0; i < ITERATIONS / 10; ++i) {
std::string str = "test";
Value v1(str);
const std::string* ptr = v1.as<std::string>();
if (ptr) sink_s += ptr->size();
}
print_result("Move (heap std::string) [construct only]", timer.elapsed_ms(), ITERATIONS / 10);
}
// 8. Access (inline) - int32_t
{
Value v(int32_t(42));
Timer timer;
for (size_t i = 0; i < ACCESS_ITERATIONS; ++i) {
const int32_t* ptr = v.as<int32_t>();
if (ptr) sink_i += *ptr;
}
print_result("Access via as<T>() (inline)", timer.elapsed_ms(), ACCESS_ITERATIONS);
}
// 9. Access (heap) - std::string
{
std::string str = "test";
Value v(str);
Timer timer;
for (size_t i = 0; i < ACCESS_ITERATIONS; ++i) {
const std::string* ptr = v.as<std::string>();
if (ptr) sink_s += ptr->size();
}
print_result("Access via as<T>() (heap)", timer.elapsed_ms(), ACCESS_ITERATIONS);
}
// 10. Type queries
{
Value v(int32_t(42));
Timer timer;
for (size_t i = 0; i < ACCESS_ITERATIONS; ++i) {
sink_i += v.type_id();
}
print_result("type_id() query", timer.elapsed_ms(), ACCESS_ITERATIONS);
}
// 11. Mixed workload
{
Timer timer;
for (size_t i = 0; i < ITERATIONS / 10; ++i) {
Value v_int(int32_t(i % 1000));
const int32_t* p1 = v_int.as<int32_t>();
if (p1) sink_i += *p1;
Value v_double(3.14159 * i);
const double* p2 = v_double.as<double>();
if (p2) sink_d += *p2;
Value v_copy(int32_t(i % 1000)); // Construct instead of copy
const int32_t* p3 = v_copy.as<int32_t>();
if (p3) sink_i += *p3;
if (i % 10 == 0) {
std::string str = "test";
Value v_str(str);
const std::string* p4 = v_str.as<std::string>();
if (p4) sink_s += p4->size();
}
}
print_result("Mixed workload (realistic)", timer.elapsed_ms(), ITERATIONS / 10);
}
std::cout << "\n";
std::cout << "=== Analysis ===\n\n";
std::cout << "This is the BASELINE - original linb::any-based Value.\n";
std::cout << "Compare these results with Value32 benchmark results.\n\n";
std::cout << "linb::any characteristics:\n";
std::cout << " - Stack storage: " << (2 * sizeof(void*)) << " bytes (2 * sizeof(void*))\n";
std::cout << " - Vtable pointer: " << sizeof(void*) << " bytes\n";
std::cout << " - Total size: " << sizeof(Value) << " bytes\n";
std::cout << " - Types ≤16 bytes: inline (int32_t, double, int64_t, pointers)\n";
std::cout << " - Types >16 bytes: heap (std::string, float3, float4, etc.)\n\n";
return 0;
}