Improve MCP tools.

This commit is contained in:
Syoyo Fujita
2025-07-28 14:55:03 +09:00
parent b209e8679f
commit 1b03cf0dc0
6 changed files with 484 additions and 45 deletions

BIN
models/teapot-pbr.usdc Executable file

Binary file not shown.

View File

@@ -12,6 +12,15 @@ namespace tinyusdz {
namespace tydra {
namespace mcp {
// Generic Asset(USD, textures, etc.)
struct MCPAsset
{
std::string name;
std::string data; // base64 encoded asset data
std::string description; // optional
std::string uuid;
};
struct USDLayer
{
std::string uri;
@@ -34,8 +43,10 @@ struct Context
// key = UUID
std::unordered_map<std::string, USDLayer> layers;
// key = URI, value = UUID
std::unordered_map<std::string, std::string> resources;
// key = name
std::unordered_map<std::string, MCPAsset> assets;
std::vector<std::string> selected_assets;
// key = name
std::unordered_map<std::string, Screenshot> screenshots;

View File

@@ -24,17 +24,11 @@ static bool ListResourcesImpl(const Context &ctx, json &result) {
result["resources"] = nlohmann::json::array();
for (const auto &res : ctx.resources) {
for (const auto &res : ctx.assets) {
if (!ctx.layers.count(res.second)) {
continue;
}
json res_j;
res_j["uri"] = ctx.layers.at(res.second).uri;
res_j["name"] = ctx.layers.at(res.second).uri; // FIXME
res_j["name"] = res.second.name;
res_j["mimeType"] = "application/octet-stream"; // FIXME
// TODO: size, title, description
@@ -51,33 +45,20 @@ bool GetResourcesList(const Context &ctx, nlohmann::json &result) {
return ListResourcesImpl(ctx, result);
}
bool ReadResource(const Context &ctx, const std::string &uri, nlohmann::json &result) {
bool ReadResource(const Context &ctx, const std::string &name, nlohmann::json &result) {
// TODO: multiple resources
if (!ctx.resources.count(uri)) {
if (!ctx.assets.count(name)) {
// TODO: report error
return false;
}
const std::string &uuid = ctx.resources.at(uri);
if (!ctx.layers.count(uuid)) {
// This should not happen though.
return false;
}
const auto &asset = ctx.assets.at(name);
json res;
res["uri"] = uri;
res["name"] = uri; // FIXME
res["mimeType"] = "text/plain";
// TODO: title
const Layer &layer = ctx.layers.at(uuid).layer;
// TODO: binary
std::string str = to_string(layer); // to USDA
res["text"] = str;
res["type"] = "text"; // FIXME
res["text"] = asset.data;
result["contents"] = json::array();
result["contents"].push_back(res);

View File

@@ -74,6 +74,10 @@ bool GetAllUSDDescriptions(Context &ctx, const nlohmann::json &args, nlohmann::j
bool LoadUSDLayerFromFile(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err);
#endif
bool LoadUSDLayerFromData(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err);
bool StoreAsset(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err);
bool ReadAsset(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err);
bool GetAssetDescription(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err);
bool GetAllAssetDescriptions(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err);
bool GetVersion(nlohmann::json &result) {
@@ -142,7 +146,6 @@ bool LoadUSDLayerFromFile(Context &ctx, const nlohmann::json &args, nlohmann::js
usd_layer.description = description;
ctx.layers.emplace(uuid, std::move(usd_layer));
ctx.resources.emplace(uri, uuid);
DCOUT("loaded USD as Layer");
@@ -203,7 +206,6 @@ bool LoadUSDLayerFromData(Context &ctx, const nlohmann::json &args, nlohmann::js
usd_layer.layer = std::move(layer);
ctx.layers.emplace(uuid, std::move(usd_layer));
ctx.resources.emplace(name, uuid);
DCOUT("loaded USD as Layer");
@@ -217,6 +219,69 @@ bool LoadUSDLayerFromData(Context &ctx, const nlohmann::json &args, nlohmann::js
return true;
}
bool ReadAsset(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
DCOUT("args " << args);
if (!args.contains("name")) {
DCOUT("name param not found");
err = "`name` param not found.\n";
return false;
}
std::string name = args["name"];
if (!ctx.assets.count(name)) {
err = "Asset not found: " + name + "\n";
return false;
}
const std::string& data = ctx.assets.at(name).data;
nlohmann::json content;
content["type"] = "text";
content["text"] = data;
result["content"] = nlohmann::json::array();
result["content"].push_back(content);
return true;
}
bool StoreAsset(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
DCOUT("args " << args);
if (!args.contains("data")) {
DCOUT("data param not found");
err = "`data` param not found.\n";
return false;
}
if (!args.contains("name")) {
DCOUT("name param not found");
err = "`name` param not found.\n";
return false;
}
std::string name = args["name"];
const std::string& data = args["data"];
std::string description = args["description"];
std::string uuid = generateUUID();
MCPAsset asset;
asset.name = name;
asset.data = data;
asset.description = description;
asset.uuid = uuid;
ctx.assets.emplace(name, std::move(asset));
nlohmann::json content;
content["type"] = "text";
content["text"] = uuid;
result["content"] = nlohmann::json::array();
result["content"].push_back(content);
return true;
}
bool ListPrimSpecs(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
DCOUT("args " << args);
@@ -388,22 +453,62 @@ bool GetAllUSDDescriptions(Context &ctx, const nlohmann::json &args, nlohmann::j
return true;
}
bool GetAssetDescription(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
DCOUT("args " << args);
if (!args.contains("name")) {
DCOUT("name param not found");
err = "`name` param not found.\n";
return false;
}
std::string name = args.at("name");
std::string uuid = FindUUID(name, ctx.layers);
if (!ctx.assets.count(uuid)) {
// This should not happen though.
err = "Internal error. No corresponding Layer found\n";
return false;
}
nlohmann::json content;
content["type"] = "text";
content["text"] = ctx.assets.at(uuid).description;
result["content"] = nlohmann::json::array();
result["content"].push_back(content);
return true;
}
bool GetAllAssetDescriptions(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
(void)args;
(void)err;
result["content"] = nlohmann::json::array();
for (const auto &it : ctx.assets) {
nlohmann::json content;
content["type"] = "text";
content["text"] = it.second.name + ":" + it.second.description;
result["content"].push_back(content);
}
return true;
}
bool ToUSDA(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
DCOUT("args " << args);
if (!args.contains("uri")) {
DCOUT("uri param not found");
err = "`uri` param not found.\n";
DCOUT("name param not found");
err = "`name` param not found.\n";
return false;
}
std::string uri = args.at("uri");
std::string name = args.at("name");
if (!ctx.resources.count(uri)) {
err = "Resource not found: " + uri + "\n";
return false;
}
std::string uuid = ctx.resources.at(uri);
std::string uuid = FindUUID(name, ctx.layers);
if (!ctx.layers.count(uuid)) {
// This should not happen though.
@@ -425,6 +530,51 @@ bool ToUSDA(Context &ctx, const nlohmann::json &args, nlohmann::json &result, st
return true;
}
bool SelectAssets(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
DCOUT("args " << args);
if (!args.contains("names")) {
DCOUT("names param not found");
err = "`names` param not found.\n";
return false;
}
std::vector<std::string> names = args.at("names");
ctx.selected_assets.clear();
for (const auto &name : names) {
if (ctx.assets.count(name)) {
ctx.selected_assets.push_back(name);
}
}
//nlohmann::json content;
//content["type"] = "text";
//content["text"] = ctx.assets.at(uuid).description;
result["content"] = nlohmann::json::array();
//result["content"].push_back(content);
return true;
}
bool GetSelectedAssets(Context &ctx, const nlohmann::json &args, nlohmann::json &result, std::string &err) {
(void)err;
(void)args;
DCOUT("args " << args);
result["content"] = nlohmann::json::array();
for (const auto &name : ctx.selected_assets) {
nlohmann::json content;
content["type"] = "text";
content["text"] = name;
result["content"].push_back(content);
}
return true;
}
} // namespace
bool GetToolsList(Context &ctx, nlohmann::json &result) {
@@ -450,7 +600,7 @@ bool GetToolsList(Context &ctx, nlohmann::json &result) {
{
nlohmann::json j;
j["name"] = "get_all_usd_descriptions";
j["description"] = "Get description of all USD asset";
j["description"] = "Get description of all loaded USD Layers";
nlohmann::json schema;
schema["type"] = "object";
@@ -465,7 +615,39 @@ bool GetToolsList(Context &ctx, nlohmann::json &result) {
{
nlohmann::json j;
j["name"] = "get_usd_description";
j["description"] = "Get description of USD asset";
j["description"] = "Get description of loaded USD Layer";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
schema["properties"]["name"] ={{"type", "string"}}; // TODO: accept multiple names
schema["required"] = nlohmann::json::array({"name"});
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
{
nlohmann::json j;
j["name"] = "get_all_asset_descriptions";
j["description"] = "Get description of all Assets";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
//schena["required"] = nlohmann::json::array();
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
{
nlohmann::json j;
j["name"] = "get_asset_description";
j["description"] = "Get description of Asset";
nlohmann::json schema;
schema["type"] = "object";
@@ -518,6 +700,62 @@ bool GetToolsList(Context &ctx, nlohmann::json &result) {
}
{
nlohmann::json j;
j["name"] = "load_usd_layer_from_asset";
j["description"] = "Load USD as Layer from Asset";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
schema["properties"]["name"] ={{"type", "string"}};
schema["required"] = nlohmann::json::array({"name"});
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
{
nlohmann::json j;
j["name"] = "read_asset";
j["description"] = "Read asset as base64 string";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
schema["properties"]["name"] ={{"type", "string"}};
schema["required"] = nlohmann::json::array({"name"});
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
{
nlohmann::json j;
j["name"] = "store_asset";
j["description"] = "Store asset(e.g. USD, texture). `data` is base64 encoded string.";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
schema["properties"]["data"] ={{"type", "string"}};
schema["properties"]["name"] ={{"type", "string"}};
schema["properties"]["description"] ={{"type", "string"}}; // optional
schema["required"] = nlohmann::json::array({"data", "name"});
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
{
nlohmann::json j;
j["name"] = "list_primspecs";
@@ -608,7 +846,37 @@ bool GetToolsList(Context &ctx, nlohmann::json &result) {
}
{
nlohmann::json j;
j["name"] = "select_assets";
j["description"] = "Select assets. Specify by the array of asset names.";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
// string[]
schema["properties"]["names"] ={{"type", "array"}, {"items", {"type", "string"}}};
schema["required"] = nlohmann::json::array({"names"});
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
{
nlohmann::json j;
j["name"] = "get_selected_assets";
j["description"] = "Get selected asset names";
nlohmann::json schema;
schema["type"] = "object";
schema["properties"] = nlohmann::json::object();
j["inputSchema"] = schema;
result["tools"].push_back(j);
}
std::cout << result << "\n";
return true;
@@ -644,6 +912,20 @@ bool CallTool(Context &ctx, const std::string &tool_name, const nlohmann::json &
return SaveScreenshot(ctx, args, result, err);
} else if (tool_name == "read_screenshot") {
return ReadScreenshot(ctx, args, result, err);
} else if (tool_name == "read_asset") {
DCOUT("read_asset");
return ReadAsset(ctx, args, result, err);
} else if (tool_name == "store_asset") {
DCOUT("store_asset");
return StoreAsset(ctx, args, result, err);
} else if (tool_name == "get_all_asset_descriptions") {
return GetAllAssetDescriptions(ctx, args, result, err);
} else if (tool_name == "get_asset_description") {
return GetAssetDescription(ctx, args, result, err);
} else if (tool_name == "select_assets") {
return SelectAssets(ctx, args, result, err);
} else if (tool_name == "get_selected_assets") {
return GetSelectedAssets(ctx, args, result, err);
#if 0
} else if (tool_name == "get_texture_asset") {
return GetTextureAsset(ctx, args, result, err);

View File

@@ -30,6 +30,7 @@ ui_state['mcpServerConnected'] = "Not connected";
ui_state['mcpClient'] = null;
ui_state['screenshot'] = null;
ui_state['usdLoader'] = null;
// Create a parameters object
@@ -41,7 +42,9 @@ const params = {
connectMcpServer: connectMCPServer,
mcpServerConnected: ui_state['mcpServerConnected'],
take_screenshot: takeScreenshot,
send_screenshot_to_mcp: sendScreenshotToMCP
send_screenshot_to_mcp: sendScreenshotToMCP,
read_selected_assets: readSelectedAssets,
clear_scene: clearScene
};
// Add controls
@@ -64,6 +67,8 @@ gui.add(params, 'connectMcpServer').name('Connect MCP Server');
gui.add(params, 'mcpServerConnected').name('MCP Server Connected').listen();
gui.add(params, 'take_screenshot').name('Take Screenshot');
gui.add(params, 'send_screenshot_to_mcp').name('Send screenshot to MCP');
gui.add(params, 'read_selected_assets').name('Read selected assets');
gui.add(params, 'clear_scene').name('Clear Scene');
function takeScreenshot() {
@@ -111,6 +116,33 @@ function sendScreenshotToMCP() {
}
async function readSelectedAssets() {
const client = ui_state['mcpClient'];
if (!client) {
console.error('MCP client is not connected');
return;
}
client.callTool({
name: 'get_selected_assets',
arguments: {
}
}).then((response) => {
const names = [];
for (const item of response.content) {
names.push(item.text);
}
console.log('Selected assets:', names);
reloadScenes(ui_state['usdLoader'], names);
}).catch((error) => {
console.error('Error getting selected assets:', error);
});
}
async function connectMCPServer() {
const mcpServerUrl = ui_state['mcpServer'];
console.log('Connecting to MCP server:', mcpServerUrl);
@@ -165,6 +197,29 @@ async function connectMCPServer() {
params.mcpServerConnected = ui_state['mcpServerConnected']; // Update GUI parameter
}
async function getAsset(name) {
const client = ui_state['mcpClient'];
if (!client) {
console.error('MCP client is not connected');
return;
}
try {
const response = await client.callTool({
name: 'read_asset',
arguments: {
name: name
}
});
console.log('Asset retrieved:', response);
// data is base64 encoded
// add mime type prefix
return "data:application/octet-stream;base64, " + response.content[0].text;
} catch (error) {
console.error('Error retrieving asset:', error);
}
}
async function loadScenes() {
const loader = new TinyUSDZLoader();
@@ -173,6 +228,8 @@ async function loadScenes() {
// (wait loading/compiling wasm module in the early stage))
await loader.init();
ui_state['usdLoader'] = loader; // Store loader in ui_state
// Use zstd compressed tinyusdz.wasm to save the bandwidth.
//await loader.init({useZstdCompressedWasm: true});
@@ -221,6 +278,114 @@ async function loadScenes() {
}
function clearScene() {
// Remove all objects from the scene except lights and environment
const objectsToRemove = [];
scene.traverse((object) => {
// Keep lights, cameras, and the scene itself
if (object !== scene &&
!object.isLight &&
!object.isCamera &&
object.parent === scene) {
objectsToRemove.push(object);
}
});
// Remove objects
objectsToRemove.forEach((object) => {
scene.remove(object);
// Dispose of geometries and materials to free memory
if (object.geometry) {
object.geometry.dispose();
}
if (object.material) {
if (Array.isArray(object.material)) {
object.material.forEach((material) => {
if (material.map) material.map.dispose();
if (material.normalMap) material.normalMap.dispose();
if (material.roughnessMap) material.roughnessMap.dispose();
if (material.metalnessMap) material.metalnessMap.dispose();
material.dispose();
});
} else {
if (object.material.map) object.material.map.dispose();
if (object.material.normalMap) object.material.normalMap.dispose();
if (object.material.roughnessMap) object.material.roughnessMap.dispose();
if (object.material.metalnessMap) object.material.metalnessMap.dispose();
object.material.dispose();
}
}
});
console.log('Scene cleared');
}
async function reloadScenes(loader, asset_names) {
// Clear existing scenes first
clearScene();
var threeScenes = []
var usd_scenes = [];
for (const asset_name of asset_names) {
console.log('Loading asset:', asset_name);
const datauri = await getAsset(asset_name);
console.log('Data URI for asset:', datauri);
const usd_scene = await loader.loadAsync(datauri);
console.log('Loaded USD scene:', usd_scene);
usd_scenes.push(usd_scene);
}
const defaultMtl = ui_state['defaultMtl'];
const options = {
overrideMaterial: false, // override USD material with defaultMtl(default 'false')
envMap: defaultMtl.envMap, // reuse envmap from defaultMtl
envMapIntensity: ui_state['envMapIntensity'], // default envmap intensity
}
var offset = -(usd_scenes.length-1) * 1.5;
for (const usd_scene of usd_scenes) {
const usdRootNode = usd_scene.getDefaultRootNode();
const threeNode = TinyUSDZLoaderUtils.buildThreeNode(usdRootNode, defaultMtl, usd_scene, options);
if (usd_scene.getURI().includes('UsdCookie')) {
// Add exra scaling
threeNode.scale.x *= 2.5;
threeNode.scale.y *= 2.5;
threeNode.scale.z *= 2.5;
}
threeNode.position.x += offset;
offset += 3.0;
threeScenes.push(threeNode);
}
var offset = -(usd_scenes.length-1) * 1.5;
for (const rootNode of threeScenes) {
rootNode.position.x += offset;
offset += 3.0;
// HACK. upAxis
rootNode.rotation.x = -Math.PI / 2; // Rotate to match Y-up axis
//rootNode.rotation.z = Math.PI/2; // Rotate to match Y-up axis
scene.add(rootNode);
}
}
const scene = new THREE.Scene();

View File

@@ -50,7 +50,7 @@ for (const [filename, description] of Object.entries(descriptions)) {
console.log(`base64data: ${base64data.substring(0, 100)}...`);
await client.callTool({
name: "load_usd_layer_from_data",
name: "store_asset",
arguments: {
"name": filename,
"data": base64data,
@@ -66,7 +66,7 @@ for (const [filename, description] of Object.entries(descriptions)) {
const descs = await client.callTool({
name: "get_all_usd_descriptions",
name: "get_all_asset_descriptions",
arguments: {
}});
console.log("Descriptions:", descs);
console.log("Descriptions:", descs);