mirror of
https://github.com/lighttransport/tinyusdz.git
synced 2026-01-18 01:11:17 +01:00
Implemented parallel printing for Prim and PrimSpec to speed up Stage::ExportToString() and Layer print_layer() functions using the lock-free task queue. Features: - Parallel printing controlled by optional `parallel` parameter - Only enabled when TINYUSDZ_ENABLE_THREAD is defined - Auto-detects number of CPU cores (std::thread::hardware_concurrency()) - Configurable minimum prims threshold (default: 4 prims) - Falls back to sequential printing when not beneficial - Preserves original ordering of output Changes: - Added src/prim-pprint-parallel.hh: Parallel printing interface - Added src/prim-pprint-parallel.cc: Task queue-based implementation - Modified Stage::ExportToString(): Added parallel parameter - Modified print_layer(): Added parallel parameter - Updated CMakeLists.txt: Added new parallel printing files - Added sandbox/parallel-print-benchmark.cc: Benchmark tool Implementation: - Uses TaskQueue for lock-free work distribution - Worker threads consume tasks from queue - Each Prim/PrimSpec printed to separate buffer - Results concatenated in original order Testing: - Sequential printing: 9873ms (258MB output) - Parallel printing: 10345ms (258MB output) - ✓ Outputs match exactly - ✓ All unit tests pass Note: For files with few root prims, parallel overhead may exceed benefits. The min_prims_for_parallel threshold prevents this. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
75 lines
2.3 KiB
C++
75 lines
2.3 KiB
C++
// SPDX-License-Identifier: Apache 2.0
|
|
// Simple benchmark to compare sequential vs parallel prim printing
|
|
//
|
|
#include <iostream>
|
|
#include <chrono>
|
|
#include "stage.hh"
|
|
#include "tinyusdz.hh"
|
|
#include "io-util.hh"
|
|
|
|
using namespace tinyusdz;
|
|
|
|
int main(int argc, char** argv) {
|
|
if (argc < 2) {
|
|
std::cerr << "Usage: " << argv[0] << " <usd_file>\n";
|
|
return 1;
|
|
}
|
|
|
|
std::string filename = argv[1];
|
|
std::string warn, err;
|
|
|
|
// Load USD file
|
|
Stage stage;
|
|
bool ret = LoadUSDFromFile(filename, &stage, &warn, &err);
|
|
|
|
if (!warn.empty()) {
|
|
std::cout << "WARN: " << warn << "\n";
|
|
}
|
|
|
|
if (!ret) {
|
|
std::cerr << "Failed to load USD file: " << err << "\n";
|
|
return 1;
|
|
}
|
|
|
|
std::cout << "Loaded USD file: " << filename << "\n";
|
|
std::cout << "Number of root prims: " << stage.root_prims().size() << "\n\n";
|
|
|
|
// Benchmark sequential printing
|
|
{
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
std::string result = stage.ExportToString(false, false); // Sequential
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
|
|
|
std::cout << "Sequential printing:\n";
|
|
std::cout << " Time: " << duration.count() << " ms\n";
|
|
std::cout << " Output size: " << result.size() << " bytes\n\n";
|
|
}
|
|
|
|
// Benchmark parallel printing
|
|
{
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
std::string result = stage.ExportToString(false, true); // Parallel
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
|
|
|
std::cout << "Parallel printing:\n";
|
|
std::cout << " Time: " << duration.count() << " ms\n";
|
|
std::cout << " Output size: " << result.size() << " bytes\n\n";
|
|
}
|
|
|
|
// Verify both produce the same output
|
|
std::string seq_result = stage.ExportToString(false, false);
|
|
std::string par_result = stage.ExportToString(false, true);
|
|
|
|
if (seq_result == par_result) {
|
|
std::cout << "✓ Sequential and parallel outputs match!\n";
|
|
} else {
|
|
std::cout << "✗ WARNING: Sequential and parallel outputs differ!\n";
|
|
std::cout << " Sequential size: " << seq_result.size() << "\n";
|
|
std::cout << " Parallel size: " << par_result.size() << "\n";
|
|
}
|
|
|
|
return 0;
|
|
}
|