Benchmark Splits
| Split | Purpose | GT | Leaderboard |
|---|---|---|---|
native_seen_category_dev | method integration and debugging; excluded from final ranking | True | no |
native_seen_category_test | held-out instances from known categories | True | yes |
native_unseen_category_test | held-out categories for category generalization | True | yes |
native_hard_motion_test | multi-joint, narrow-range, non-standard-axis, and high-collision-risk samples | True | yes |
static_part_to_articulated_test | part mesh to articulated asset generation | optional | yes |
static_single_mesh_to_articulated_test | single static mesh to segmented articulated asset generation | optional | yes |
image_conditioned_real_smoke | in-the-wild readiness and failure analysis | False | no |
Fixed Split Files
All records
6352
Smoke records
20
Unseen categories
small_furniture, storage
Split summary
manifests/splits_v1.0_20260509/split_summary.json
Input Modes
category_text
必须在 run_manifest.json 中声明,评测时只能读取该模式允许的输入字段。
single_static_mesh
必须在 run_manifest.json 中声明,评测时只能读取该模式允许的输入字段。
part_mesh
必须在 run_manifest.json 中声明,评测时只能读取该模式允许的输入字段。
image_conditioned
必须在 run_manifest.json 中声明,评测时只能读取该模式允许的输入字段。
native_reconstruction
必须在 run_manifest.json 中声明,评测时只能读取该模式允许的输入字段。
Metric Groups
geometry
缺项保留 not_computed,不静默补零。
part
缺项保留 not_computed,不静默补零。
structure_tree
缺项保留 not_computed,不静默补零。
motion_axis_range_direction
缺项保留 not_computed,不静默补零。
physics_simulation
缺项保留 not_computed,不静默补零。
texture_appearance
缺项保留 not_computed,不静默补零。
semantic
缺项保留 not_computed,不静默补零。
generalization
缺项保留 not_computed,不静默补零。
Leaderboard Policy
- No real method score is published unless a run_manifest and per-sample metrics exist.
- A/B/C/D grid-position labels are never treated as method names.
- Native GT, generated assets, static-to-articulated outputs, and in-the-wild smoke outputs are reported separately.
- Failures stay in the denominator and must be represented by failure.json.
- not_computed is preserved explicitly and is not silently converted to zero or full credit.
Smoke Evaluator
Script
evaluate_submission_smoke_v1.py
Example status
smoke_pass
Example samples
2
Score policy
readiness_only_no_real_method_scores
Benchmark Manifest
{
"schema_version": "articulated_benchmark_manifest_v1.0",
"benchmark_id": "articulated_objects_generation_v1.0_20260509",
"created_utc": "2026-05-09T00:00:00Z",
"status": "schema_ready_no_method_scores",
"native_gt": {
"manifest": "/data/250010098/Unified_dataset/benchmarks/articulated_objects_v1.0_final_20260508/benchmark_manifest.json",
"asset_count": 6352,
"policy": "read_only_native_or_manually_cleaned_assets"
},
"split_plan": [
{
"split": "native_seen_category_dev",
"purpose": "method integration and debugging; excluded from final ranking",
"gt_available": true,
"leaderboard": false
},
{
"split": "native_seen_category_test",
"purpose": "held-out instances from known categories",
"gt_available": true,
"leaderboard": true
},
{
"split": "native_unseen_category_test",
"purpose": "held-out categories for category generalization",
"gt_available": true,
"leaderboard": true
},
{
"split": "native_hard_motion_test",
"purpose": "multi-joint, narrow-range, non-standard-axis, and high-collision-risk samples",
"gt_available": true,
"leaderboard": true
},
{
"split": "static_part_to_articulated_test",
"purpose": "part mesh to articulated asset generation",
"gt_available": "optional",
"leaderboard": true
},
{
"split": "static_single_mesh_to_articulated_test",
"purpose": "single static mesh to segmented articulated asset generation",
"gt_available": "optional",
"leaderboard": true
},
{
"split": "image_conditioned_real_smoke",
"purpose": "in-the-wild readiness and failure analysis",
"gt_available": false,
"leaderboard": false
}
],
"fixed_splits": {
"generator": "/data/250010098/Unified_dataset/articulated_generation_benchmark/build_route3_splits_v1_20260509.py",
"summary": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509/split_summary.json",
"all_splits": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509/all_splits.jsonl",
"smoke_splits": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509/smoke_splits.jsonl",
"split_dir": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509",
"generated_record_count": 6352,
"smoke_count": 20,
"generation_rule": "Stable SHA1 assignment from articulated_objects_v1.0_final_20260508/all_native_release.jsonl; see split_summary.json for the full rule."
},
"input_modes": [
"category_text",
"single_static_mesh",
"part_mesh",
"image_conditioned",
"native_reconstruction"
],
"required_submission_files": [
"run_manifest.json",
"outputs/<sample_id_safe>/asset.sdf",
"outputs/<sample_id_safe>/properties.json",
"outputs/<sample_id_safe>/motion_tree.json",
"outputs/<sample_id_safe>/parts.json",
"outputs/<sample_id_safe>/semantics.json",
"outputs/<sample_id_safe>/renders/closed.png",
"outputs/<sample_id_safe>/renders/mid.png",
"outputs/<sample_id_safe>/renders/open.png"
],
"failure_record": "outputs/<sample_id_safe>/failure.json is required when a sample does not produce a complete asset.",
"metric_groups": [
"geometry",
"part",
"structure_tree",
"motion_axis_range_direction",
"physics_simulation",
"texture_appearance",
"semantic",
"generalization"
],
"schemas": {
"submission_schema": "/data/250010098/Unified_dataset/articulated_generation_benchmark/schemas/submission_schema_v1.0.json",
"leaderboard_entry_schema": "/data/250010098/Unified_dataset/articulated_generation_benchmark/schemas/leaderboard_entry_schema_v1.0.json"
},
"smoke_evaluator": {
"script": "/data/250010098/Unified_dataset/articulated_generation_benchmark/evaluate_submission_smoke_v1.py",
"example_submission": "/data/250010098/Unified_dataset/articulated_generation_benchmark/examples/example_empty_submission/run_manifest.json",
"example_report_dir": "/data/250010098/Unified_dataset/articulated_generation_benchmark/evaluations/smoke_reports/example_empty_submission_20260509",
"score_policy": "readiness_only_no_real_method_scores"
},
"leaderboard_policy": [
"No real method score is published unless a run_manifest and per-sample metrics exist.",
"A/B/C/D grid-position labels are never treated as method names.",
"Native GT, generated assets, static-to-articulated outputs, and in-the-wild smoke outputs are reported separately.",
"Failures stay in the denominator and must be represented by failure.json.",
"not_computed is preserved explicitly and is not silently converted to zero or full credit."
],
"portal": {
"local_html": "/data/250010098/Unified_dataset/articulated_generation_benchmark/portals/route3_benchmark_schema_20260509/index.html",
"visual_portal_url": "http://106.14.105.96:28080/experiments/ud4-route3-perfect-benchmark-schema-20260509/index.html"
}
}
Split Summary
{
"benchmark_id": "articulated_objects_generation_v1.0_20260509",
"by_split_category_counts": {
"native_hard_motion_test": {
"Clock": 10,
"Dispenser": 9,
"architectural_fixtures": 255,
"electronics": 2,
"household_fixtures": 180,
"household_items": 19,
"large_furniture": 15,
"major_appliances": 127,
"small_appliances": 97
},
"native_seen_category_dev": {
"Clock": 2,
"Dispenser": 2,
"architectural_fixtures": 6,
"electronics": 4,
"household_fixtures": 6,
"household_items": 8,
"large_furniture": 8,
"major_appliances": 8,
"small_appliances": 8
},
"native_seen_category_test": {
"Clock": 19,
"Dispenser": 45,
"architectural_fixtures": 826,
"electronics": 88,
"household_fixtures": 539,
"household_items": 697,
"large_furniture": 1594,
"major_appliances": 456,
"small_appliances": 337
},
"native_unseen_category_test": {
"small_furniture": 840,
"storage": 145
}
},
"created_utc": "2026-05-09T00:00:00Z",
"full_generation_rule": [
"Read benchmark_manifest.json from articulated_objects_v1.0_final_20260508.",
"Read all_native_release.jsonl and derive sample_id as source_dataset/object_id.",
"Select 20% of categories, at least 2, by stable SHA1 hash as native_unseen_category_test.",
"Select up to 2 non-unseen samples per source_dataset/category by stable SHA1 hash as native_seen_category_dev.",
"From remaining seen categories, assign hard-motion candidates with quality/category motion hints and stable hash bucket <25 to native_hard_motion_test.",
"Assign remaining seen-category records to native_seen_category_test.",
"Build smoke_splits.jsonl from up to 5 deterministic records per generated split."
],
"generated_record_count": 6352,
"outputs": {
"all_splits": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509/all_splits.jsonl",
"per_split_dir": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509",
"smoke_splits": "/data/250010098/Unified_dataset/articulated_generation_benchmark/manifests/splits_v1.0_20260509/smoke_splits.jsonl"
},
"schema_version": "articulated_benchmark_split_summary_v1.0",
"seed": "route3_articulated_generation_benchmark_v1_20260509",
"smoke_count": 20,
"source_all_records": "/data/250010098/Unified_dataset/benchmarks/articulated_objects_v1.0_final_20260508/all_native_release.jsonl",
"source_asset_count": 6352,
"source_manifest": "/data/250010098/Unified_dataset/benchmarks/articulated_objects_v1.0_final_20260508/benchmark_manifest.json",
"split_counts": {
"native_hard_motion_test": 714,
"native_seen_category_dev": 52,
"native_seen_category_test": 4601,
"native_unseen_category_test": 985
},
"unseen_categories": [
"small_furniture",
"storage"
]
}
Example Smoke Summary
{
"benchmark_id": "articulated_objects_generation_v1.0_20260509",
"failed_check_count": 0,
"generated_utc": "2026-05-09T17:51:54+00:00",
"manifest_errors": [],
"output_dir": "/data/250010098/Unified_dataset/articulated_generation_benchmark/evaluations/smoke_reports/example_empty_submission_20260509",
"sample_count": 2,
"schema_version": "articulated_benchmark_smoke_report_v1.0",
"score_policy": "readiness_only_no_real_method_scores",
"scores": {
"appearance_semantic_score": null,
"generalization_score": null,
"geometry_part_score": null,
"overall_score": null,
"simulation_score": null,
"status": "not_computed",
"structure_motion_score": null,
"validity_score": null
},
"smoke_ok_count": 2,
"status": "smoke_pass",
"status_counts": {
"not_attempted": 2
},
"submission_manifest": "/data/250010098/Unified_dataset/articulated_generation_benchmark/examples/example_empty_submission/run_manifest.json"
}
Submission Schema
{
"schema_version": "articulated_benchmark_submission_v1.0",
"required_top_level_file": "run_manifest.json",
"run_manifest_required_fields": [
"schema_version",
"benchmark_id",
"method",
"run",
"input_policy",
"outputs"
],
"method_required_fields": [
"method_id",
"display_name",
"version",
"code_ref",
"weights_ref",
"input_modes"
],
"run_required_fields": [
"run_id",
"created_utc",
"random_seed",
"sample_count",
"environment"
],
"per_sample_output_required_fields": [
"sample_id",
"split",
"status",
"output_dir",
"asset_sdf",
"properties_json",
"motion_tree_json",
"parts_json",
"semantics_json",
"mesh_files",
"collision_files",
"render_files",
"failure_json"
],
"motion_tree_required_fields": [
"links",
"joints",
"root_link",
"coordinate_frame",
"unit"
],
"joint_required_fields": [
"joint_id",
"joint_type",
"parent",
"child",
"origin_xyz",
"axis_xyz",
"limit_lower",
"limit_upper",
"default_state",
"positive_direction_semantic"
],
"status_values": [
"success",
"failed",
"timeout",
"not_attempted"
],
"failure_required_fields": [
"sample_id",
"stage",
"reason",
"recoverable",
"log_path"
]
}
Leaderboard Entry Schema
{
"schema_version": "articulated_benchmark_leaderboard_entry_v1.0",
"required_fields": [
"schema_version",
"benchmark_id",
"method",
"run",
"scores",
"metric_paths",
"status"
],
"method_fields": [
"method_id",
"display_name",
"version",
"code_ref",
"input_modes"
],
"run_fields": [
"run_id",
"created_utc",
"split",
"sample_count",
"success_count",
"failure_count"
],
"score_fields": [
"overall_score",
"validity_score",
"geometry_part_score",
"structure_motion_score",
"simulation_score",
"appearance_semantic_score",
"generalization_score"
],
"status_values": [
"not_submitted",
"readiness_only",
"not_computed",
"computed",
"invalid_submission"
],
"rules": [
"Do not publish a ranked method entry without a path-backed run manifest.",
"Do not infer method names from grid cell labels.",
"Preserve not_computed as null plus an explicit status.",
"Report native, generated, static-to-articulated, and in-the-wild results in separate tables."
]
}