diff --git a/.gitignore b/.gitignore index 7405b279..315ba238 100644 --- a/.gitignore +++ b/.gitignore @@ -176,11 +176,11 @@ cache *.csv materials -embodichain/toolkits/outputs/* embodichain/toolkits/outputs/* embodichain/database/tmp/* embodichain/database/train/* +embodichain/database/agent_generated_content/ 3rdparty/ Log/ @@ -200,4 +200,8 @@ wandb/ embodichain/VERSION # benchmark results -scripts/benchmark/rl/reports/* \ No newline at end of file +scripts/benchmark/rl/reports/* + +# generated scene assets +*_gym_project/ +backup_*/ diff --git a/configs/gym/pour_water/gym_config.json b/configs/gym/pour_water/gym_config.json index 1c3e2876..c64de34e 100644 --- a/configs/gym/pour_water/gym_config.json +++ b/configs/gym/pour_water/gym_config.json @@ -278,8 +278,7 @@ "use_videos": true } } - }, - "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"] + } }, "robot": { "uid": "CobotMagic", diff --git a/configs/gym/pour_water/gym_config_simple.json b/configs/gym/pour_water/gym_config_simple.json index bcce5bc4..ca45e80b 100644 --- a/configs/gym/pour_water/gym_config_simple.json +++ b/configs/gym/pour_water/gym_config_simple.json @@ -203,7 +203,7 @@ "mode": "modify", "name": "robot/qpos", "params": { - "joint_ids": [6, 13] + "joint_ids": [12, 13, 14, 15] } } }, @@ -227,8 +227,7 @@ "use_videos": true } } - }, - "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"] + } }, "robot": { "uid": "CobotMagic", diff --git a/docs/source/features/failure_monitor_recovery.md b/docs/source/features/failure_monitor_recovery.md new file mode 100644 index 00000000..bb23677d --- /dev/null +++ b/docs/source/features/failure_monitor_recovery.md @@ -0,0 +1,150 @@ +# Failure-Monitor-Recovery Graph + +This document describes the graph-based monitor and recovery pipeline in +EmbodiChain. The runtime assumes failures come from the outside world or the +environment. The agent does not execute error-injection functions. Error +descriptions are prompt context only, used by the recovery agent to choose +useful monitors and recovery policies. + +The core flow is: + +```text +TaskAgent + -> nominal atomic-action graph +RecoveryAgent + -> lightweight recovery bindings +CompileAgent + -> explicit compiled recovery graph +AgentTaskGraph + -> execute nominal edges, switch to recovery edges when monitors trigger +``` + +## Agent Roles + +`TaskAgent` produces one deterministic nominal graph. It writes +`agent_task_graph.json` with semantic nodes, executable edges, a start node, and +a goal node. + +`RecoveryAgent` writes `agent_recovery_spec.json`. This is the LLM-facing file. +It contains only `recovery_bindings`: compact mappings from a nominal edge to a +failure monitor template and an ordered recovery policy. It does not contain +recovery node ids, recovery edge ids, recovery branches, or Python code. + +`CompileAgent` expands the task graph and recovery spec into +`agent_compiled_graph.json`. The compiled artifact contains: + +- `task_graph`: the nominal graph from `TaskAgent` +- `recovery_spec`: the compact authoring spec from `RecoveryAgent` +- `recovery_graph`: explicit recovery nodes, recovery edges, and recovery + branches generated by the compiler +- `metadata`: schema and recovery flags + +Before expansion, `CompileAgent` runs a deterministic normalization pass over +the recovery spec. It canonicalizes common aliases, fills simple fields from +the nominal edge when unambiguous, and only uses one LLM canonicalization call +when the compact spec still contains unresolved semantic intent. + +`AgentTaskGraph` executes the compiled graph through `drive(...)`. + +## Recovery Spec + +The LLM-facing recovery spec is intentionally small: + +```json +{ + "task": "dual_pour_water", + "recovery_bindings": [ + { + "edge_id": "e01_grasp_objects", + "failure_name": "object_moved_before_grasp", + "monitors": [ + {"type": "object_moved", "objects": ["cup", "bottle"], "threshold": 0.02} + ], + "recovery": [ + { + "type": "regrasp_both", + "arms": {"left_arm": "cup", "right_arm": "bottle"}, + "pre_grasp_dis": 0.1, + "force_valid": true + } + ], + "merge": "target", + "repeat_until_success": true + } + ] +} +``` + +Supported monitor templates are: + +- `object_moved`: for stationary objects that unexpectedly shift +- `hold_lost`: for objects that should already be held by an arm + +Supported recovery steps are: + +- `regrasp` +- `regrasp_both` +- `retry_failed_edge` +- `replay_edge` +- `action`, for a direct atomic-action spec when no template is enough + +The compiler converts these bindings into explicit graph topology. It generates +stable recovery nodes and edges, attaches monitor sequences, and adds reusable +self-recovery branches when `repeat_until_success` is true. + +## Runtime Semantics + +`AgentTaskGraph.run(...)` follows the single nominal start-to-goal chain. Each +edge is executed by one `drive(...)` call. + +During any monitored edge, nominal or recovery, `drive(...)` checks monitor +sequences after each environment step. Monitor functions use unified semantics: +`True` means the failure condition has triggered. + +If no monitor triggers, the graph moves to the edge target. If a monitor +triggers, the graph switches to the matching recovery branch and executes its +recovery edges. + +Recovery is repeatable. The compiler can make a recovery edge monitor itself and +point back to itself. That creates a reusable recovery edge: the same recovery +behavior repeats until its monitor no longer triggers or the graph reaches +`max_transitions`. + +Recovery-edge monitors are chosen by the compiler from the recovery step type. +For example, a `hold_lost` binding that recovers by `regrasp` will not monitor +the regrasp edge with `hold_lost`, because the object may not be held until the +grasp completes. The compiler uses object-displacement monitors for regrasp +steps, then uses hold-loss monitors again for movement/retry steps where the +object should already be held. + +For hold-loss failures inside a later recovery step, the compiler can insert a +small repair edge, such as regrasping the cup, then retry the failed recovery +edge and resume the remaining recovery path. + +## Validation Rules + +`graph_spec.py` validates the compiled graph before execution: + +- the nominal graph is one deterministic start-to-goal chain +- every edge id is unique +- every node reference is defined +- every nominal and recovery edge has at least one arm action +- recovery branches reference existing nominal and recovery edges +- branch recovery edges are path-contiguous +- a branch merges only to the failed edge source or target +- executable error functions and legacy recovery action lists are rejected + +`expand_recovery_spec(...)` separately validates the LLM-facing spec. It rejects +compiled graph keys such as `recovery_nodes`, `recovery_edges`, and +`recovery_branches`, because those are compiler output, not LLM output. + +## Current Artifacts + +For a generated task such as `DualPourWater`, the graph workflow produces: + +- `agent_task_graph.json` +- `agent_recovery_spec.json` +- `agent_compiled_graph.json` + +The old Python-code generation artifact is no longer part of the active +pipeline. diff --git a/docs/source/features/generative_sim/agents.md b/docs/source/features/generative_sim/agents.md index 213050a7..72f1f470 100644 --- a/docs/source/features/generative_sim/agents.md +++ b/docs/source/features/generative_sim/agents.md @@ -1,83 +1,39 @@ -# EmbodiAgent(aborted) +# EmbodiAgent -EmbodiAgent is a hierarchical multi-agent system that enables robots to perform complex manipulation tasks through closed-loop planning, code generation, and validation. The system combines vision-language models (VLMs) and large language models (LLMs) to translate high-level goals into executable robot actions. +EmbodiAgent is a hierarchical multi-agent system that enables robots to perform complex manipulation tasks through graph planning, graph compilation, runtime recovery, and environment-based success evaluation. The system combines vision-language models (VLMs) and large language models (LLMs) to translate high-level goals into executable robot actions. ## Quick Start ### Prerequisites -Ensure you have access to Azure OpenAI or a compatible LLM endpoint. +Ensure you have access to OpenAI or an OpenAI-compatible LLM endpoint. Do not +commit credentials to the repository. -```bash -# Set environment variables -export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/" -export AZURE_OPENAI_API_KEY="your-api-key" -``` - -### Using Different LLM/VLM APIs - -The system uses LangChain's `AzureChatOpenAI` by default. To use different LLM/VLM providers, you can modify the `create_llm` function in `embodichain/agents/hierarchy/llm.py`. - -#### Azure OpenAI -```bash -export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/" -export AZURE_OPENAI_API_KEY="your-api-key" -export OPENAI_API_VERSION="2024-10-21" # Optional, defaults to "2024-10-21" -``` - -#### OpenAI -To use OpenAI directly instead of Azure, modify `llm.py`: -```python -from langchain_openai import ChatOpenAI - -def create_llm(*, temperature=0.0, model="gpt-4o"): - return ChatOpenAI( - temperature=temperature, - model=model, - api_key=os.getenv("OPENAI_API_KEY"), - ) -``` +EmbodiAgent uses the shared gen-sim MLLM entry point +`embodichain.gen_sim.mllm`. Configure it with shell environment variables: -Then set: -```bash -export OPENAI_API_KEY="your-api-key" +```shell +OPENAI_API_KEY="" +OPENAI_BASE_URL="" +OPENAI_MODEL="gpt-4o" +EMBODICHAIN_LLM_PROXY="" ``` -#### Other Providers -You can use other LangChain-compatible providers by modifying the `create_llm` function, for example: - -**Anthropic Claude:** -```python -from langchain_anthropic import ChatAnthropic +`OPENAI_BASE_URL` is optional for the default OpenAI endpoint. Set it when using +an OpenAI-compatible proxy or vendor endpoint. -def create_llm(*, temperature=0.0, model="claude-3-opus-20240229"): - return ChatAnthropic( - temperature=temperature, - model=model, - anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"), - ) -``` - -**Google Gemini:** -```python -from langchain_google_genai import ChatGoogleGenerativeAI - -def create_llm(*, temperature=0.0, model="gemini-pro"): - return ChatGoogleGenerativeAI( - temperature=temperature, - model=model, - google_api_key=os.getenv("GOOGLE_API_KEY"), - ) -``` +The same entry point also reads the gen-sim LLM config at +`embodichain/gen_sim/simready_pipeline/configs/gen_config.json`, plus local +ignored `.env` files when present. ### Run the System Run the agent system with the following command: ```bash -python embodichain/lab/scripts/run_agent.py \ +python -m embodichain.gen_sim.action_agent_pipeline.cli.run_agent \ --task_name YourTask \ --gym_config configs/gym/your_task/gym_config.yaml \ - --agent_config configs/gym/agent/your_agent/agent_config.json \ + --agent_config embodichain/gen_sim/action_agent_pipeline/configs/your_agent/agent_config.json \ --regenerate False ``` @@ -85,51 +41,63 @@ python embodichain/lab/scripts/run_agent.py \ - `--task_name`: Name identifier for the task - `--gym_config`: Path to the gym environment configuration file (``.json``, ``.yaml``, or ``.yml``) - `--agent_config`: Path to the agent configuration file (defines prompts and agent behavior) -- `--regenerate`: If `True`, forces regeneration of plans/code even if cached +- `--regenerate`: If `True`, forces regeneration of graph artifacts even if cached ## System Architecture The system operates on a closed-loop control cycle: - **Observe**: The `TaskAgent` perceives the environment via multi-view camera inputs. -- **Plan**: It decomposes the goal into natural language steps. -- **Code**: The `CodeAgent` translates steps into executable Python code using atomic actions. -- **Execute**: The code runs in the environment; runtime errors are caught immediately. -- **Validate**: The `ValidationAgent` analyzes the result images, selects the best camera angle, and judges success. -- **Refine**: If validation fails, feedback is sent back to the agents to regenerate the plan or code. +- **Plan**: It decomposes the goal into a nominal atomic-action graph. +- **Recover**: The `RecoveryAgent` writes lightweight monitor-to-recovery bindings. +- **Compile**: The `CompileAgent` expands bindings into an executable recovery graph artifact. +- **Execute**: The graph runtime executes atomic actions and switches to recovery branches when monitors trigger. +- **Evaluate**: After execution, the environment reports task success through predicates such as `env.is_task_success()` and configurable success specs. +- **Regenerate**: When graph artifacts are stale or need replacement, run with `--regenerate True` to rebuild them. --- ## Core Components ### TaskAgent -*Located in:* `embodichain/agents/hierarchy/task_agent.py` +*Located in:* `embodichain/gen_sim/action_agent_pipeline/agents/task_agent.py` + +Responsible for high-level graph planning. It parses visual observations and +outputs a nominal atomic-action graph. + +* Generates `agent_task_graph.json` +* Uses atomic actions exactly as described in the prompt context +* Leaves recovery bindings to `RecoveryAgent` + +### RecoveryAgent +*Located in:* `embodichain/gen_sim/action_agent_pipeline/agents/recovery_agent.py` -Responsible for high-level reasoning. It parses visual observations and outputs a structured plan. +Generates compact monitor-to-recovery bindings for the nominal graph. -* For every step, it generates a specific condition (e.g., "The cup must be held by the gripper") which is used later by the ValidationAgent. -* Prompt Strategies: - * `one_stage_prompt`: Direct VLM-to-Plan generation. - * `two_stage_prompt`: Separates visual analysis from planning logic. +* Generates `agent_recovery_spec.json` +* Uses possible external failures only as prompt context +* Outputs `recovery_bindings` only; it does not write recovery nodes, recovery edges, or recovery branches -### CodeAgent -*Located in:* `embodichain/agents/hierarchy/code_agent.py` +### CompileAgent +*Located in:* `embodichain/gen_sim/action_agent_pipeline/agents/compile_agent.py` -Translates natural language plans into executable Python code using atomic actions from the action bank. +Compiles generated atomic-action graph specs into an executable graph artifact. -* Generates Python code that follows strict coding guidelines (no loops, only provided APIs) -* Executes code in a sandboxed environment with immediate error detection -* Uses Abstract Syntax Tree (AST) parsing to ensure code safety and correctness -* Supports few-shot learning through code examples in the configuration +* Expands `agent_recovery_spec.json` into explicit recovery nodes, edges, and branches +* Canonicalizes aliases and simple omissions before expansion; uses at most one LLM call only when a recovery spec contains unresolved semantic intent +* Packages the nominal task graph, recovery spec, and compiled recovery graph into `agent_compiled_graph.json` +* Executes compiled graph JSON through the graph runtime +* Does not call an LLM to generate Python control code -### ValidationAgent -*Located in:* `embodichain/agents/hierarchy/validation_agent.py` +### Success Evaluation +*Located in:* `embodichain/lab/gym/envs/tasks/tableware/configurable_success.py` -Closes the loop by verifying if the robot actually achieved what it planned. +Reports whether the executed graph achieved the task goal. The current baseline uses environment predicates instead of an image-based validation agent. -* Uses a specialized LLM call (`select_best_view_dir`) to analyze images from all cameras and pick the single best angle that proves the action's outcome, ignoring irrelevant views. -* If an error occurs (runtime or logic), it generates a detailed explanation which is fed back to the `TaskAgent` or `CodeAgent` for the next attempt. +* Calls task-level checks such as `env.is_task_success()` after graph execution +* Supports config-driven object/container/pose predicates through `configurable_success` +* Logs success status from the runner; it does not run an LLM-based image validation loop --- @@ -137,32 +105,34 @@ Closes the loop by verifying if the robot actually achieved what it planned. The `Agent` configuration block controls the context provided to the LLMs. Prompt files are resolved in the following order: -1. **Config directory**: Task-specific prompt files in the same directory as the agent configuration file (e.g., `configs/gym/agent/pour_water_agent/`) -2. **Default prompts directory**: Reusable prompt templates in `embodichain/agents/prompts/` +1. **Config directory**: Task-specific prompt files in the same directory as the agent configuration file (e.g., `embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/`) +2. **Default prompts directory**: Reusable prompt templates in `embodichain/gen_sim/action_agent_pipeline/prompts/` | Parameter | Description | Typical Use | | :--- | :--- | :--- | | `task_prompt` | Task-specific goal description | "Pour water from the red cup to the blue cup." | | `basic_background` | Physical rules & constraints | World coordinate system definitions, safety rules. | | `atom_actions` | API Documentation | List of available functions (e.g., `drive(action='pick', ...)`). | -| `code_prompt` | Coding guidelines | "Use provided APIs only. Do not use loops." | -| `code_example` | Few-shot examples | Previous successful code snippets to guide style. | +| `error_functions` | Failure context only | Possible external failure descriptions for recovery spec design. | +| `monitor_functions` | Monitor API documentation | Available runtime monitor functions. | +| `recovery_rules` | Recovery spec rules | Constraints for lightweight monitor and recovery bindings. | --- ## File Structure ```text -embodichain/agents/ -├── hierarchy/ +embodichain/gen_sim/action_agent_pipeline/ +├── agents/ │ ├── agent_base.py # Abstract base handling prompts & images -│ ├── task_agent.py # Plan generation logic -│ ├── code_agent.py # Code generation & AST execution engine -│ ├── validation_agent.py # Visual analysis & view selection +│ ├── task_agent.py # Nominal graph generation logic +│ ├── recovery_agent.py # Recovery binding generation logic +│ ├── compile_agent.py # Graph compilation & execution interface │ └── llm.py # LLM configuration and instances -├── mllm/ -│ └── prompt/ # Prompt templates (LangChain) -└── prompts/ # Agent prompt templates +├── env_adapters/tableware/ # Tableware task adapters and success predicates +├── prompt_builders/ # Prompt templates +├── prompts/ # Reusable agent prompt context +└── graph_spec.py # Graph schema and normalization helpers ``` --- @@ -171,5 +141,5 @@ embodichain/agents/ - [Online Data Streaming](../online_data.md) — Streaming live simulation data for training - [RL Architecture](../../overview/rl/index.rst) — RL training pipeline and algorithms -- [Atomic Actions Tutorial](../../tutorial/atomic_actions.rst) — Action primitives used by the CodeAgent +- [Atomic Actions Tutorial](../../tutorial/atomic_actions.rst) — Action primitives used by the graph runtime - [Supported Tasks](../../resources/task/index.rst) — Available task environments diff --git a/docs/source/features/generative_sim/index.rst b/docs/source/features/generative_sim/index.rst index 1f7c759f..c6fa9426 100644 --- a/docs/source/features/generative_sim/index.rst +++ b/docs/source/features/generative_sim/index.rst @@ -7,3 +7,5 @@ Generative Simulation collects EmbodiChain features for generating simulation-re :maxdepth: 2 SimReady Asset Pipeline + EmbodiAgent + Failure Monitor Recovery <../failure_monitor_recovery.md> diff --git a/embodichain/agents/hierarchy/code_agent.py b/embodichain/agents/hierarchy/code_agent.py deleted file mode 100644 index 1a4c84d2..00000000 --- a/embodichain/agents/hierarchy/code_agent.py +++ /dev/null @@ -1,288 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -from embodichain.agents.hierarchy.agent_base import AgentBase -from langchain_core.prompts import ChatPromptTemplate -import os -import numpy as np -from typing import Dict, Tuple -from embodichain.agents.mllm.prompt import CodePrompt -from embodichain.data import database_agent_prompt_dir -from pathlib import Path -import re -import importlib.util -from datetime import datetime - - -def format_execution_history(execution_history): - if not execution_history or len(execution_history) == 0: - return "None." - - return "\n\n".join(f"{i}. {entry}" for i, entry in enumerate(execution_history, 1)) - - -def extract_python_code_and_text(llm_response: str) -> Tuple[str, str]: - """ - Extract exactly ONE python code block from the LLM response, - and return: - - code: the content inside the python block - - text: all remaining explanation text (outside the code block) - - Raises ValueError if zero or multiple python blocks are found. - """ - - pattern = r"```python\s*(.*?)\s*```" - matches = list(re.finditer(pattern, llm_response, re.DOTALL)) - - if len(matches) == 0: - raise ValueError("No python code block found in LLM response.") - if len(matches) > 1: - raise ValueError("Multiple python code blocks found in LLM response.") - - match = matches[0] - code = match.group(1).strip() - - # Optional sanity check - if not code.startswith("#") and not code.startswith("drive("): - raise ValueError( - f"Invalid code block content. Expected `drive(...)` or `# TASK_COMPLETE`, got:\n{code}" - ) - - # Extract remaining text (before + after the code block) - text_before = llm_response[: match.start()].strip() - text_after = llm_response[match.end() :].strip() - - explanation_text = "\n\n".join(part for part in [text_before, text_after] if part) - - return code, explanation_text - - -def format_llm_response_md( - llm_analysis: str, # plain-text explanation (NO code) - extracted_code: str, # validated executable code - step_id: int = None, - execution_history: str = None, - obs_image_path: Path = None, - md_file_path: Path = None, -) -> str: - ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - header = f"## Step: {step_id if step_id is not None else '-'} | {ts}\n\n" - - history_block = "" - if execution_history: - history_block = ( - "### Execution History (Input to LLM)\n\n" - "```\n" - f"{execution_history}\n" - "```\n\n" - ) - - image_block = "" - if obs_image_path is not None and md_file_path is not None: - try: - rel_path = obs_image_path.relative_to(md_file_path.parent) - except ValueError: - # Fallback: just use filename - rel_path = obs_image_path.name - - image_block = ( - "### Observation Image\n\n" f"![]({Path(rel_path).as_posix()})\n\n" - ) - - body = ( - image_block + history_block + "### LLM Analysis\n\n" - f"{llm_analysis.strip()}\n\n" - "### Executed Code\n\n" - "```python\n" - f"{extracted_code.strip()}\n" - "```\n\n" - "---\n\n" - ) - - return header + body - - -class CodeAgent(AgentBase): - query_prefix = "# " - query_suffix = "." - prompt: ChatPromptTemplate - prompt_kwargs: Dict[str, Dict] - - def __init__(self, llm, **kwargs) -> None: - super().__init__(**kwargs) - if llm is None: - raise ValueError( - "LLM is None. Please set the following environment variables:\n" - " - AZURE_OPENAI_ENDPOINT\n" - " - AZURE_OPENAI_API_KEY\n" - "Example:\n" - " export AZURE_OPENAI_ENDPOINT='https://your-endpoint.openai.azure.com/'\n" - " export AZURE_OPENAI_API_KEY='your-api-key'" - ) - self.llm = llm - - def generate(self, **kwargs): - log_dir = kwargs.get( - "log_dir", Path(database_agent_prompt_dir) / self.task_name - ) - file_path = log_dir / "agent_generated_code.py" - - # Check if the file already exists - if not kwargs.get("regenerate", False): - if file_path.exists(): - print(f"Code file already exists at {file_path}, skipping writing.") - return file_path, kwargs, None - - # Generate code via LLM - prompt = getattr(CodePrompt, self.prompt_name)( - **kwargs, - ) - - # insert feedback if exists - if len(kwargs.get("error_messages", [])) != 0: - # just use the last one - last_code = kwargs["generated_codes"][-1] - last_error = kwargs["error_messages"][-1] - last_observation = ( - kwargs.get("observation_feedbacks")[-1] - if kwargs.get("observation_feedbacks") - else None - ) - - # Add extra human message with feedback - feedback_msg = self.build_feedback_message( - last_code, last_error, last_observation - ) - prompt.messages.append(feedback_msg) - - llm_code = self.llm.invoke(prompt) - - # Normalize content - llm_code = getattr(llm_code, "content", str(llm_code)) - - print(f"\033[92m\nCode agent output:\n{llm_code}\n\033[0m") - - # Write the code to the file if it does not exist - match = re.search(r"```python\n(.*?)\n```", llm_code, re.DOTALL) - if match: - code_to_save = match.group(1).strip() - else: - code_to_save = llm_code.strip() - - file_path.parent.mkdir(parents=True, exist_ok=True) - with open(file_path, "w") as f: - f.write(code_to_save) - print(f"Generated function code saved to {file_path}") - - return file_path, kwargs, code_to_save - - def act(self, code_file_path, **kwargs): - """Execute generated code with proper execution environment. - - Supports two modes: - 1. If code defines 'create_agent_action_list' function, call it - 2. If code contains module-level drive() calls, execute them directly - """ - import ast - - # Read the generated code file - with open(code_file_path, "r") as f: - code_content = f.read() - - # Build execution namespace with necessary imports - ns = { - "__builtins__": __builtins__, - "__name__": "__main__", - "__file__": str(code_file_path), - "kwargs": kwargs, # Make kwargs available for injection - } - - # Import atom action functions into namespace - try: - exec( - "from embodichain.lab.sim.atom_actions import *", - ns, - ns, - ) - except Exception as e: - raise RuntimeError( - "Failed to import embodichain.lab.sim.atom_actions" - ) from e - - # Parse code to check if it defines a function or contains module-level calls - tree = ast.parse(code_content) - - # Check if code defines create_agent_action_list function - has_function = any( - isinstance(node, ast.FunctionDef) - and node.name == "create_agent_action_list" - for node in tree.body - ) - - if has_function: - # Execute code (function will be defined in namespace) - exec(code_content, ns, ns) - - # Call the function if it exists - if "create_agent_action_list" in ns: - result = ns["create_agent_action_list"](**kwargs) - print("Function executed successfully.") - return result - else: - raise AttributeError( - "The function 'create_agent_action_list' was not found after execution." - ) - else: - # Code contains module-level drive() calls - # AST transformer to inject **kwargs into function calls - class InjectKwargs(ast.NodeTransformer): - def visit_Call(self, node): - self.generic_visit(node) - # Inject **kwargs if not present - has_kwargs = any( - kw.arg is None - and isinstance(kw.value, ast.Name) - and kw.value.id == "kwargs" - for kw in node.keywords - ) - if not has_kwargs: - node.keywords.append( - ast.keyword( - arg=None, value=ast.Name(id="kwargs", ctx=ast.Load()) - ) - ) - return node - - # Transform AST to inject kwargs - tree = InjectKwargs().visit(tree) - ast.fix_missing_locations(tree) - - # Compile and execute transformed code - compiled_code = compile(tree, filename=str(code_file_path), mode="exec") - exec(compiled_code, ns, ns) - - # Collect actions from drive() calls if they were executed - # drive() function stores actions in env._episode_action_list - if "env" in kwargs: - env = kwargs["env"] - if hasattr(env, "_episode_action_list") and env._episode_action_list: - print( - f"Collected {len(env._episode_action_list)} actions from module-level drive() calls." - ) - return env._episode_action_list - - print("Code executed successfully, but no actions were collected.") - return [] diff --git a/embodichain/agents/hierarchy/llm.py b/embodichain/agents/hierarchy/llm.py deleted file mode 100644 index 1fc1bbfe..00000000 --- a/embodichain/agents/hierarchy/llm.py +++ /dev/null @@ -1,72 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -import os -from langchain_openai import AzureChatOpenAI - -# ------------------------------------------------------------------------------ -# Environment configuration -# ------------------------------------------------------------------------------ - -# Clear proxy if not needed (optional, can be set via environment variables) - -os.environ["ALL_PROXY"] = "" -os.environ["all_proxy"] = "" - -# Proxy configuration (optional, uncomment if needed) -# os.environ["HTTP_PROXY"] = "http://127.0.0.1:7890" -# os.environ["HTTPS_PROXY"] = "http://127.0.0.1:7890" - -# API version (optional, defaults to "2024-10-21" if not set) -# os.environ["OPENAI_API_VERSION"] = "2024-10-21" - -# Note: AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY must be set via environment variables -# Example in bash: -# export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/" -# export AZURE_OPENAI_API_KEY="your-api-key" - -# ------------------------------------------------------------------------------ -# LLM factory -# ------------------------------------------------------------------------------ - - -def create_llm(*, temperature=0.0, model="gpt-4o"): - return AzureChatOpenAI( - temperature=temperature, - model=model, - azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), - api_key=os.getenv("AZURE_OPENAI_API_KEY"), - api_version=os.getenv("OPENAI_API_VERSION", "2024-10-21"), - ) - - -# ------------------------------------------------------------------------------ -# LLM instances -# ------------------------------------------------------------------------------ - - -# Initialize LLM instances, but handle errors gracefully for documentation builds -def _create_llm_safe(*, temperature=0.0, model="gpt-4o"): - try: - return create_llm(temperature=temperature, model=model) - except Exception: - return None - - -task_llm = _create_llm_safe(temperature=0.0, model="gpt-4o") -code_llm = _create_llm_safe(temperature=0.0, model="gpt-4o") -validation_llm = _create_llm_safe(temperature=0.0, model="gpt-4o") -view_selection_llm = _create_llm_safe(temperature=0.0, model="gpt-4o") diff --git a/embodichain/agents/hierarchy/task_agent.py b/embodichain/agents/hierarchy/task_agent.py deleted file mode 100644 index 9c4f37cc..00000000 --- a/embodichain/agents/hierarchy/task_agent.py +++ /dev/null @@ -1,157 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -from typing import List, Dict, Tuple -from embodichain.agents.hierarchy.agent_base import AgentBase -from langchain_core.prompts import ChatPromptTemplate -from embodichain.data import database_2d_dir -from embodichain.utils.utility import load_txt -from embodichain.agents.mllm.prompt import TaskPrompt -from embodichain.data import database_agent_prompt_dir -from pathlib import Path -import numpy as np -import time -import re - -USEFUL_INFO = """The error may be caused by: -1. You did not follow the basic background information, especially the world coordinate system with its xyz directions. -2. You did not take into account the NOTE given in the atom actions or in the example functions. -3. You did not follow the steps of the task descriptions.\n -""" - - -def extract_plan_and_validation(text: str) -> Tuple[str, List[str], List[str]]: - def get_section(src: str, name: str, next_name) -> str: - if next_name: - pat = re.compile( - rf"\[{name}\]\s*:\s*(.*?)\s*(?=\[{next_name}\]\s*:|\Z)", - re.DOTALL | re.IGNORECASE, - ) - else: - pat = re.compile( - rf"\[{name}\]\s*:\s*(.*?)\s*\Z", - re.DOTALL | re.IGNORECASE, - ) - m = pat.search(src) - return m.group(1).strip() if m else "" - - step_re = re.compile( - r"Step\s*\d+\s*:.*?(?=Step\s*\d+\s*:|\Z)", - re.DOTALL | re.IGNORECASE, - ) - - # ---- plans ---- - plans_raw = get_section(text, "PLANS", "VALIDATION_CONDITIONS") - plan_steps = [m.group(0).rstrip() for m in step_re.finditer(plans_raw)] - plan_str = "\n".join(plan_steps) - - # normalized plan list (strip "Step k:") - plan_list = [] - for step in plan_steps: - content = re.sub(r"^Step\s*\d+\s*:\s*", "", step, flags=re.IGNORECASE).strip() - if content: - plan_list.append(content) - - # ---- validations ---- - vals_raw = get_section(text, "VALIDATION_CONDITIONS", None) - validation_list = [] - for m in step_re.finditer(vals_raw): - content = re.sub( - r"^Step\s*\d+\s*:\s*", "", m.group(0), flags=re.IGNORECASE - ).strip() - if content: - validation_list.append(content) - - return plan_str, plan_list, validation_list - - -class TaskAgent(AgentBase): - prompt: ChatPromptTemplate - object_list: List[str] - target: np.ndarray - prompt_name: str - prompt_kwargs: Dict[str, Dict] - - def __init__(self, llm, **kwargs) -> None: - super().__init__(**kwargs) - if llm is None: - raise ValueError( - "LLM is None. Please set the following environment variables:\n" - " - AZURE_OPENAI_ENDPOINT\n" - " - AZURE_OPENAI_API_KEY\n" - "Example:\n" - " export AZURE_OPENAI_ENDPOINT='https://your-endpoint.openai.azure.com/'\n" - " export AZURE_OPENAI_API_KEY='your-api-key'" - ) - self.llm = llm - - def generate(self, **kwargs) -> str: - log_dir = kwargs.get( - "log_dir", Path(database_agent_prompt_dir) / self.task_name - ) - file_path = log_dir / "agent_generated_plan.txt" - - # Check if the file already exists - if not kwargs.get("regenerate", False): - if file_path.exists(): - print(f"Plan file already exists at {file_path}, skipping writing.") - return load_txt(file_path) - - # Generate query via LLM - prompts_ = getattr(TaskPrompt, self.prompt_name)(**kwargs) - if isinstance(prompts_, list): - # TODO: support two-stage prompts with feedback - start_time = time.time() - response = self.llm.invoke(prompts_[0]) - query = response.content - print( - f"\033[92m\nSystem tasks output ({np.round(time.time()-start_time, 4)}s):\n{query}\n\033[0m" - ) - for prompt in prompts_[1:]: - temp = prompt["kwargs"] - temp.update({"query": query}) - start_time = time.time() - response = self.llm.invoke(prompt["prompt"].invoke(temp)) - query = response.content - print( - f"\033[92m\nSystem tasks output({np.round(time.time()-start_time, 4)}s):\n{query}\n\033[0m" - ) - else: - # insert feedback if exists - if len(kwargs.get("error_messages", [])) != 0: - # just use the last one - last_plan = kwargs["generated_plans"][-1] - last_code = kwargs["generated_codes"][-1] - last_error = kwargs["error_messages"][-1] - - # Add extra human message with feedback - feedback_msg = self.build_feedback_message( - last_plan, last_code, last_error - ) - prompts_.messages.append(feedback_msg) - - response = self.llm.invoke(prompts_) - print(f"\033[92m\nTask agent output:\n{response.content}\n\033[0m") - - file_path.parent.mkdir(parents=True, exist_ok=True) - with open(file_path, "w") as f: - f.write(response.content) - print(f"Generated task plan saved to {file_path}") - - return response.content - - def act(self, *args, **kwargs): - return super().act(*args, **kwargs) diff --git a/embodichain/agents/hierarchy/validation_agent.py b/embodichain/agents/hierarchy/validation_agent.py deleted file mode 100644 index 759ac7e2..00000000 --- a/embodichain/agents/hierarchy/validation_agent.py +++ /dev/null @@ -1,241 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -import os -from langchain_core.messages import SystemMessage, HumanMessage -from abc import ABCMeta -from embodichain.utils.utility import encode_image_from_path -import glob -from embodichain.agents.hierarchy.llm import view_selection_llm - - -def save_obs_image(obs_image, save_dir, step_id=None): - """ - Save observation image using encode_image() and return its file path. - """ - import base64 - from embodichain.utils.utility import encode_image - - if obs_image is None: - return None - - if isinstance(save_dir, str): - from pathlib import Path - - save_dir = Path(save_dir) - - save_dir.mkdir(parents=True, exist_ok=True) - - name = f"obs_step_{step_id}.png" if step_id is not None else "obs.png" - img_path = save_dir / name - - # Encode to base64 - base64_image = encode_image(obs_image) - - # Decode base64 → bytes - img_bytes = base64.b64decode(base64_image) - - # Write to file - with open(img_path, "wb") as f: - f.write(img_bytes) - - return img_path - - -def get_obj_position_info(env): - import json - - position_info = {} - obj_uids = env.sim.get_rigid_object_uid_list() - for obj_name in obj_uids: - target_obj = env.sim.get_rigid_object(obj_name) - target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0)[:3, 3] - position_info[obj_name] = target_obj_pose.tolist() - return json.dumps(position_info, indent=4) - - -class ValidationAgent(metaclass=ABCMeta): - - def __init__(self, llm, **kwargs) -> None: - super().__init__() - for key, value in kwargs.items(): - setattr(self, key, value) - if llm is None: - raise ValueError( - "LLM is None. Please set the following environment variables:\n" - " - AZURE_OPENAI_ENDPOINT\n" - " - AZURE_OPENAI_API_KEY\n" - "Example:\n" - " export AZURE_OPENAI_ENDPOINT='https://your-endpoint.openai.azure.com/'\n" - " export AZURE_OPENAI_API_KEY='your-api-key'" - ) - self.llm = llm - - def validate(self, step_names, problematic_code, error_message, image_files): - # Construct the prompt - prompt = f""" - Analyze the execution of the following robot task: - - Task name: {self.task_name} - Task description: {self.task_description} - Basic background knowledge: {self.basic_background} - - You will be given images showing each step of the execution. For the step sequence: - {', '.join(step_names)} - - Provide the following analysis: - 1. Decide whether the full task succeeded or failed. - 2. If the task failed, provide a precise and detailed explanation. - - Below is a potentially problematic piece of code and the corresponding execution error: - - ```python - {problematic_code} - # Execution error: - {error_message} - Explain whether (and how) this code contributed to the observed failure. - """ - - # Prepare message content for API call - user_content = [] - - # Add textual prompt - user_content.append({"type": "text", "text": prompt}) - - # Add images and step names - for img_path in image_files: - filename = os.path.basename(img_path) - first_underscore_pos = filename.find("_") - if first_underscore_pos != -1: - step_name = filename[first_underscore_pos + 1 :].rsplit(".", 1)[0] - else: - step_name = filename.rsplit(".", 1)[0] - - # Add step name - user_content.append({"type": "text", "text": f"Step: {step_name}"}) - - # Add image as base64 - base64_image = encode_image_from_path(img_path) - user_content.append( - { - "type": "image_url", - "image_url": {"url": f"data:image/png;base64,{base64_image}"}, - } - ) - - messages = [ - SystemMessage( - content="You are a robot task execution analysis expert. Please analyze the provided image sequence." - ), - HumanMessage(content=user_content), - ] - - response = self.llm.invoke(messages) - return response.content - - def select_best_view_dir( - self, img_dirs: dict, action_description: str, valid_condition: str - ): - """ - img_dirs: { - "cam_1": Path, - "cam_2": Path, - "cam_3": Path - } - """ - - # --- collect final images --- - last_images = {} - for cam_id, cam_dir in img_dirs.items(): - imgs = sorted( - glob.glob(os.path.join(cam_dir, "obs_step_*.png")), - key=lambda p: int(os.path.basename(p).split("_")[-1].split(".")[0]), - ) - if imgs: - last_images[cam_id] = imgs[-1] - - if not last_images: - raise ValueError("No images found in any camera directory.") - - # --- system prompt --- - system_prompt = ( - "You are a robot perception assistant specialized in VIEW SELECTION.\n\n" - "TASK:\n" - "- You are given ONE final observation image from EACH camera view.\n" - "- Your job is NOT to judge success or failure.\n" - "- Your job is ONLY to select the SINGLE camera view that is MOST SUITABLE\n" - " for OBJECT-LEVEL validation of the action result.\n\n" - "ACTION CONTEXT:\n" - "- The robot has just executed ONE atomic action.\n" - "- You are given the action intention and the expected object-level outcome\n" - " ONLY to help you decide which view best reveals that outcome.\n\n" - "SELECTION CRITERIA (PRIORITY ORDER):\n" - "- Prefer views with:\n" - " * the clearest visibility of the relevant object(s)\n" - " * minimal occlusion by the arm or environment\n" - " * the clearest evidence related to the expected object-level result\n" - " (e.g., contact, separation, support, stability)\n\n" - "STRICT CONSTRAINTS:\n" - "- Do NOT judge robot motion quality or execution accuracy.\n" - "- Do NOT reason about numeric values (distance, angle, offset).\n" - "- Do NOT decide whether the action succeeded or failed.\n" - "- If multiple views are acceptable, choose the clearest overall view.\n\n" - "OUTPUT FORMAT (STRICT):\n" - "Output EXACTLY ONE of the following tokens:\n" - "- cam_1\n" - "- cam_2\n" - "- cam_3\n" - ) - - # --- human content --- - human_content = [ - { - "type": "text", - "text": ( - "Select the best camera view for object-level validation.\n\n" - "--------------------------------------------------\n" - "ACTION DESCRIPTION (INTENT ONLY):\n" - f"{action_description}\n\n" - "EXPECTED OBJECT-LEVEL RESULT (REFERENCE ONLY):\n" - f"{valid_condition}\n" - "--------------------------------------------------" - ), - } - ] - - for cam_id, img_path in last_images.items(): - img_b64 = encode_image_from_path(img_path) - human_content.extend( - [ - {"type": "text", "text": f"View candidate: {cam_id}"}, - { - "type": "image_url", - "image_url": {"url": f"data:image/png;base64,{img_b64}"}, - }, - ] - ) - - messages = [ - SystemMessage(content=system_prompt), - HumanMessage(content=human_content), - ] - - response = view_selection_llm.invoke(messages).content.strip() - - if response not in img_dirs: - raise ValueError(f"Invalid camera selection from LLM: {response}") - - return response diff --git a/embodichain/agents/mllm/prompt/__init__.py b/embodichain/agents/mllm/prompt/__init__.py deleted file mode 100644 index 5d5a8d46..00000000 --- a/embodichain/agents/mllm/prompt/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# All rights reserved. -# ---------------------------------------------------------------------------- - -from .task_prompt import TaskPrompt -from .code_prompt import CodePrompt diff --git a/embodichain/agents/mllm/prompt/code_prompt.py b/embodichain/agents/mllm/prompt/code_prompt.py deleted file mode 100644 index 794e554f..00000000 --- a/embodichain/agents/mllm/prompt/code_prompt.py +++ /dev/null @@ -1,149 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -from langchain_core.messages import SystemMessage -from langchain_core.prompts import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, -) -from embodichain.utils.utility import encode_image - - -class CodePrompt: - @staticmethod - def one_stage_prompt(**kwargs) -> ChatPromptTemplate: - prompt = ChatPromptTemplate.from_messages( - [ - SystemMessage( - content="You are an AI assistant that can generate python code to execute robot arms." - ), - HumanMessagePromptTemplate.from_template( - [ - { - "type": "text", - "text": ( - "Generate a Python code snippet that accomplishes the following task:\n" - "{query}\n\n" - "You must strictly follow the rules and available functions described below:\n" - "{code_prompt}\n\n" - "Here are some reference examples of the expected output code:\n" - "{code_example}\n\n" - ), - } - ] - ), - ] - ) - return prompt.invoke(kwargs) - - @staticmethod - def unified_prompt(observations, **kwargs): - """ - Unified Vision→Code prompt: - - Model observes the image - - Understands the scene and the task goal - - Generates final executable Python code using atomic robot APIs - """ - - # Encode the image - observation = observations["rgb"] - kwargs.update({"observation": encode_image(observation)}) - - prompt = ChatPromptTemplate.from_messages( - [ - SystemMessage( - content=( - "You are a reliable Vision-Language-Code robot assistant. " - "You observe an image, understand the scene and the task goal, " - "and generate correct Python code using ONLY the allowed atomic robot actions. " - "Your final output must be a single Python code block." - ) - ), - HumanMessagePromptTemplate.from_template( - [ - { - "type": "image_url", - "image_url": { - "url": "data:image/png;base64,{observation}", - }, - }, - { - "type": "text", - "text": ( - "### Task Goal\n" - "{task_prompt}\n\n" - "### Environment Background\n" - "{basic_background}\n\n" - "### Allowed Atomic Actions\n" - "{atom_actions}\n\n" - "### Code Rules\n" - "{code_prompt}\n\n" - "### Reference Code Examples\n" - "{code_example}\n\n" - "### Final Instructions\n" - "Understand the scene from the image and generate final executable Python code " - "that performs the task using ONLY the allowed atomic actions.\n\n" - "Your entire response must be EXACTLY one Python code block:\n" - "```python\n" - "# your solution code here\n" - "```\n" - ), - }, - ] - ), - ] - ) - - return prompt.invoke(kwargs) - - @staticmethod - def one_stage_prompt_according_to_task_plan(**kwargs) -> ChatPromptTemplate: - prompt = ChatPromptTemplate.from_messages( - [ - SystemMessage( - content=( - "You are a reliable robot control code generator.\n" - "Your task is to generate Python code that executes robot arm actions.\n\n" - "CRITICAL RULES:\n" - "- The TASK PLAN defines the available atomic actions, rules, and execution logic.\n" - "- You MUST strictly follow the TASK PLAN.\n" - "- The CONSTRAINTS section contains additional global constraints you must obey.\n" - "- Do NOT invent new actions, functions, parameters, or control flow.\n" - "- You MAY include Python comments (# ...) inside the code.\n" - "- Your ENTIRE response MUST be a single Python code block.\n" - "- The code block MUST be directly executable without modification.\n" - "- Do NOT include any text, explanation, or markdown outside the Python code block.\n" - ) - ), - HumanMessagePromptTemplate.from_template( - [ - { - "type": "text", - "text": ( - "TASK PLAN (atomic actions, rules, and intended behavior):\n" - "{task_plan}\n\n" - "GLOBAL CONSTRAINTS (must be satisfied):\n" - "{code_prompt}\n\n" - "REFERENCE CODE (style and structure only; do NOT copy logic):\n" - "{code_example}\n\n" - "Generate the corrected Python code now." - ), - } - ] - ), - ] - ) - return prompt.invoke(kwargs) diff --git a/embodichain/agents/mllm/prompt/task_prompt.py b/embodichain/agents/mllm/prompt/task_prompt.py deleted file mode 100644 index 7db5d1bb..00000000 --- a/embodichain/agents/mllm/prompt/task_prompt.py +++ /dev/null @@ -1,144 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -import torch -from langchain_core.messages import SystemMessage -from langchain_core.prompts import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, -) -from embodichain.utils.utility import encode_image - - -class TaskPrompt: - @staticmethod - def one_stage_prompt(observations, **kwargs): - """ - Hybrid one-pass prompt: - Step 1: VLM analyzes the image and extracts object IDs. - Step 2: LLM generates task instructions using only those IDs. - """ - # Encode image - observation = ( - observations["rgb"].cpu().numpy() - if isinstance(observations["rgb"], torch.Tensor) - else observations["rgb"] - ) - kwargs.update({"observation": encode_image(observation)}) - - # Build hybrid prompt - prompt = ChatPromptTemplate.from_messages( - [ - SystemMessage( - content=( - "You are a precise and reliable robotic manipulation planner. " - "Given a camera observation and a task description, you must generate " - "a clear, step-by-step task plan for a robotic arm. " - "All actions must strictly use the provided atomic API functions, " - "and the plan must be executable without ambiguity." - ) - ), - HumanMessagePromptTemplate.from_template( - [ - { - "type": "image_url", - "image_url": { - "url": "data:image/png;base64,{observation}", - }, - }, - { - "type": "text", - "text": ( - "Here is the latest camera observation.\n" - "First, analyze the scene in the image.\n" - "Then, using the context below, produce an actionable task plan.\n\n" - "**Environment background:** \n{basic_background}\n\n" - '**Task goal:** \n"{task_prompt}"\n\n' - "**Available atomic actions:** \n{atom_actions}\n" - ), - }, - ] - ), - ] - ) - - # Return the prompt template and kwargs to be executed by the caller - return prompt.invoke(kwargs) - - @staticmethod - def two_stage_prompt(observations, **kwargs): - # for VLM generate image descriptions - prompt = ChatPromptTemplate.from_messages( - [ - SystemMessage( - content="You are a helpful assistant to operate a robotic arm with a camera to generate task plans according to descriptions." - ), - HumanMessagePromptTemplate.from_template( - [ - { - "type": "image_url", - "image_url": { - "url": "data:image/jpg;base64,{observation}", - }, - }, - { - "type": "text", - "text": "What is in the image? Return answer with their potential effects.", - }, - ] - ), - ] - ) - - observation = ( - observations["rgb"].cpu().numpy() - if isinstance(observations["rgb"], torch.Tensor) - else observations["rgb"] - ) - kwargs.update({"observation": encode_image(observation)}) - # for LLM generate task descriptions - prompt_query = ChatPromptTemplate.from_messages( - [ - SystemMessage( - content="You are a helpful assistant to operate a robotic arm with a camera to generate task plans according to descriptions." - ), - HumanMessagePromptTemplate.from_template( - [ - { - "type": "image_url", - "image_url": { - "url": "data:image/jpg;base64,{observation}", - }, - }, - { - "type": "text", - "text": "Here is analysis for this image: {query}.", - }, - { - "type": "text", - "text": ( - "Using the context below, produce an actionable task plan.\n\n" - "**Environment background:** \n{basic_background}\n\n" - '**Task goal:** \n"{task_prompt}"\n\n' - "**Available atomic actions:** \n{atom_actions}\n" - ), - }, - ] - ), - ] - ) - - return [prompt.invoke(kwargs), {"prompt": prompt_query, "kwargs": kwargs}] diff --git a/embodichain/agents/prompts/basic_background.txt b/embodichain/agents/prompts/basic_background.txt deleted file mode 100644 index dc6d1c30..00000000 --- a/embodichain/agents/prompts/basic_background.txt +++ /dev/null @@ -1,42 +0,0 @@ -The environment uses a right-handed world coordinate system, where 1 unit equals 1 meter. -All robot poses are represented as 4×4 homogeneous transformation matrices. - -The robot base coordinate frame is the ONLY authoritative frame for all spatial reasoning, planning, and action generation. - -CAMERA AND IMAGE INTERPRETATION - -The camera is positioned in front of the robot, facing the robot arm and looking toward the robot base. -Because of this viewpoint, the rendered image is horizontally mirrored relative to the robot base frame. -This mirroring affects LEFT–RIGHT only. There is NO vertical or depth inversion. - -Mirror mapping (image → robot base frame): - -* Image left corresponds to robot right -* Image right corresponds to robot left -* Image up corresponds to robot up -* Image down corresponds to robot down - -REQUIRED REASONING PERSPECTIVE (NON-NEGOTIABLE) - -You must ignore the camera and rendered image orientation when reasoning. -All spatial reasoning must be performed as if you are physically located at the robot base, looking outward along the robot’s +x (forward) direction. - -Do NOT reason from the camera viewpoint. -Do NOT trust left/right as shown in the image. -Always remap image left/right before reasoning. - -ROBOT BASE COORDINATE DEFINITIONS - -All directions below are defined strictly in the robot base frame: - -* Moving forward increases x -* Moving backward decreases x -* Moving left increases y (appears as right in the image) -* Moving right decreases y (appears as left in the image) -* Moving up increases z -* Moving down decreases z - -ROBOT INITIALIZATION AND TERMINATION - -Both robot arms start in predefined initial configurations with their end-effectors open. -At task completion, both arms must be returned to their initial poses. \ No newline at end of file diff --git a/embodichain/agents/prompts/code_example.txt b/embodichain/agents/prompts/code_example.txt deleted file mode 100644 index c2952fed..00000000 --- a/embodichain/agents/prompts/code_example.txt +++ /dev/null @@ -1,35 +0,0 @@ -# Python scripts -# Use the right arm to grasp bottle, move to the target location (x=0.2, y=0.1), and then open the gripper to release the object. - -```python -# Step 1 — Reach and grasp the bottle -drive( - right_arm_action=grasp( - robot_name="right_arm", - obj_name="bottle", - ), -) - -# Step 2 — Move to target location -drive( - right_arm_action=move_to_absolute_position( - robot_name="right_arm", - x=0.2, - y=0.1, - ), -) - -# Step 3 — Open gripper to release the object -drive( - right_arm_action=open_gripper( - robot_name="right_arm", - ), -) - -# Step 4 — Return the arm to the initial pose -drive( - right_arm_action=back_to_initial_pose( - robot_name="right_arm", - ), -) -``` \ No newline at end of file diff --git a/embodichain/agents/prompts/code_prompt.txt b/embodichain/agents/prompts/code_prompt.txt deleted file mode 100644 index 3fadf1c9..00000000 --- a/embodichain/agents/prompts/code_prompt.txt +++ /dev/null @@ -1,7 +0,0 @@ -Constraints: -- Every atomic action MUST be executed via a single drive(...) call. -- Each drive(...) call must directly contain the atomic action(s); do NOT define actions separately and then pass them into drive. -- For single-arm execution: specify the active arm’s action and explicitly set the unused arm to None within the same drive(...) call. -- For dual-arm execution: both arms’ actions MUST be specified within the same drive(...) call. -- Use exactly one drive(...) call per step; no exceptions. -- Output MUST be executable Python code only: no explanations, no comments, no markdown, and no extra text. \ No newline at end of file diff --git a/embodichain/data/__init__.py b/embodichain/data/__init__.py index c768425d..efb66bc0 100644 --- a/embodichain/data/__init__.py +++ b/embodichain/data/__init__.py @@ -20,7 +20,7 @@ database_dir = EMBODICHAIN_DEFAULT_DATABASE_ROOT database_2d_dir = os.path.join(database_dir, "2dasset") -database_agent_prompt_dir = os.path.join(database_dir, "agent_prompt") +database_agent_prompt_dir = os.path.join(database_dir, "agent_generated_content") database_demo_dir = os.path.join(database_dir, "demostration") from . import assets diff --git a/embodichain/data/dataset.py b/embodichain/data/dataset.py index 694ca5c4..9422a64f 100644 --- a/embodichain/data/dataset.py +++ b/embodichain/data/dataset.py @@ -158,7 +158,9 @@ def get_data_path(data_path_in_config: str) -> str: 2. If a matching file/directory exists under ``EMBODICHAIN_DEFAULT_DATA_ROOT`` (which can be overridden via the ``EMBODICHAIN_DATA_ROOT`` environment variable), return that path. - 3. Otherwise, resolve via the registered data-class download mechanism. + 3. If a matching file/directory exists under the repository root, return + that path. + 4. Otherwise, resolve via the registered data-class download mechanism. Args: data_path_in_config (str): The dataset path in the format @@ -177,6 +179,11 @@ def get_data_path(data_path_in_config: str) -> str: if os.path.exists(local_path): return local_path + repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + repo_path = os.path.join(repo_root, data_path_in_config) + if os.path.exists(repo_path): + return repo_path + # Fall back to the data-class download mechanism split_str = data_path_in_config.split("/") dataset_name = split_str[0] diff --git a/embodichain/gen_sim/action_agent_pipeline/__init__.py b/embodichain/gen_sim/action_agent_pipeline/__init__.py new file mode 100644 index 00000000..0517d273 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/__init__.py @@ -0,0 +1,21 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +"""Action-agent graph compilation and atomic-action runtime.""" + +__all__: list[str] = [] diff --git a/embodichain/gen_sim/action_agent_pipeline/agent_graph.py b/embodichain/gen_sim/action_agent_pipeline/agent_graph.py new file mode 100644 index 00000000..d96f169f --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/agent_graph.py @@ -0,0 +1,196 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Any + +from embodichain.gen_sim.action_agent_pipeline.atom_actions import drive + +__all__ = [ + "AgentGraphEdge", + "AgentGraphNode", + "AgentRecoveryBranch", + "AgentTaskGraph", + "ExecutedActionList", +] + + +@dataclass +class AgentGraphNode: + """Semantic keyframe in an atomic-action task graph.""" + + id: str + semantic: str = "" + + +@dataclass +class AgentGraphEdge: + """Executable transition between two graph nodes.""" + + id: str + source: str + target: str + left_arm_action: Any = None + right_arm_action: Any = None + monitor_sequences: list[list[Any]] | None = None + monitor_labels: list[str] = field(default_factory=list) + is_recovery: bool = False + + +@dataclass +class AgentRecoveryBranch: + """Mapping from one edge monitor trigger to recovery edge transitions.""" + + edge_id: str + monitor_index: int + recovery_edges: list[str] + + +class ExecutedActionList: + """Action sequence already executed online by the graph runtime.""" + + already_executed = True + + def __init__(self, actions: list[Any]) -> None: + self.actions = actions + + def __len__(self) -> int: + return len(self.actions) + + def __iter__(self): + return iter(self.actions) + + def __getitem__(self, index): + return self.actions[index] + + +class AgentTaskGraph: + """Atomic-action graph with monitor-triggered recovery edge switching.""" + + def __init__(self, start: str, goal: str, max_transitions: int = 1000) -> None: + self.start = start + self.goal = goal + self.max_transitions = max_transitions + self.nodes: dict[str, AgentGraphNode] = {} + self.edges: dict[str, AgentGraphEdge] = {} + self.outgoing: dict[str, list[str]] = defaultdict(list) + self.recovery_branches: dict[tuple[str, int], AgentRecoveryBranch] = {} + + def add_node(self, node_id: str, semantic: str = "") -> "AgentTaskGraph": + self.nodes[node_id] = AgentGraphNode(node_id, semantic) + return self + + def add_edge( + self, + edge_id: str, + source: str, + target: str, + *, + left_arm_action=None, + right_arm_action=None, + monitor_sequences=None, + monitor_labels=None, + is_recovery: bool = False, + ) -> "AgentTaskGraph": + self.edges[edge_id] = AgentGraphEdge( + id=edge_id, + source=source, + target=target, + left_arm_action=left_arm_action, + right_arm_action=right_arm_action, + monitor_sequences=monitor_sequences, + monitor_labels=list(monitor_labels or []), + is_recovery=is_recovery, + ) + self.outgoing[source].append(edge_id) + return self + + def add_recovery( + self, + edge_id: str, + monitor_index: int, + recovery_edges: list[str], + ) -> "AgentTaskGraph": + self.recovery_branches[(edge_id, monitor_index)] = AgentRecoveryBranch( + edge_id=edge_id, + monitor_index=monitor_index, + recovery_edges=list(recovery_edges), + ) + return self + + def run(self, env=None, **kwargs) -> ExecutedActionList: + current = self.start + pending_edges: list[str] = [] + continuation_stack: list[list[str]] = [] + executed_actions: list[Any] = [] + transitions = 0 + + while current != self.goal or pending_edges or continuation_stack: + transitions += 1 + if transitions > self.max_transitions: + raise RuntimeError("Agent task graph exceeded max_transitions.") + + if not pending_edges and continuation_stack: + pending_edges = continuation_stack.pop() + + edge_id = ( + pending_edges.pop(0) if pending_edges else self._next_edge(current) + ) + edge = self.edges[edge_id] + result = drive( + left_arm_action=edge.left_arm_action, + right_arm_action=edge.right_arm_action, + monitor_sequences=edge.monitor_sequences, + env=env, + return_result=True, + **kwargs, + ) + executed_actions.extend(result["actions"]) + + monitor_index = result["monitor_index"] + if monitor_index is not None: + branch = self.recovery_branches.get((edge.id, monitor_index)) + if branch is None: + raise RuntimeError( + f"No recovery branch for edge '{edge.id}' monitor {monitor_index}." + ) + branch_final_target = self.edges[branch.recovery_edges[-1]].target + continuation_edges = list(pending_edges) + if branch_final_target == edge.source and edge.source != edge.target: + continuation_edges = [edge.id, *continuation_edges] + elif branch_final_target != edge.target: + raise RuntimeError( + f"Recovery branch for edge '{edge.id}' must merge to its source or target." + ) + + if continuation_edges: + continuation_stack.append(continuation_edges) + pending_edges = list(branch.recovery_edges) + current = edge.source + continue + + current = edge.target + + return ExecutedActionList(executed_actions) + + def _next_edge(self, node_id: str) -> str: + for edge_id in self.outgoing[node_id]: + if not self.edges[edge_id].is_recovery: + return edge_id + raise RuntimeError(f"No nominal outgoing edge from node '{node_id}'.") diff --git a/embodichain/gen_sim/action_agent_pipeline/agents/__init__.py b/embodichain/gen_sim/action_agent_pipeline/agents/__init__.py new file mode 100644 index 00000000..0270853e --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/agents/__init__.py @@ -0,0 +1,25 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__ = [ + "agent_base", + "compile_agent", + "llm", + "recovery_agent", + "task_agent", +] diff --git a/embodichain/agents/hierarchy/agent_base.py b/embodichain/gen_sim/action_agent_pipeline/agents/agent_base.py similarity index 88% rename from embodichain/agents/hierarchy/agent_base.py rename to embodichain/gen_sim/action_agent_pipeline/agents/agent_base.py index 8956bb6f..fc967f65 100644 --- a/embodichain/agents/hierarchy/agent_base.py +++ b/embodichain/gen_sim/action_agent_pipeline/agents/agent_base.py @@ -14,15 +14,15 @@ # limitations under the License. # ---------------------------------------------------------------------------- +from __future__ import annotations + from abc import ABCMeta import os -from pathlib import Path + from embodichain.utils.utility import load_txt -import embodichain.agents.mllm.prompt as mllm_prompt -from embodichain.data import database_2d_dir -def _resolve_prompt_path(file_name: str, config_dir: str = None) -> str: +def _resolve_prompt_path(file_name: str, config_dir: str | None = None) -> str: # If absolute path, use directly if os.path.isabs(file_name): if os.path.exists(file_name): @@ -35,7 +35,7 @@ def _resolve_prompt_path(file_name: str, config_dir: str = None) -> str: if os.path.exists(config_path): return config_path - # Try agents/prompts directory (for reusable prompts) + # Try action_agent_pipeline/prompts directory for reusable prompts. agents_prompts_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "prompts" ) @@ -74,7 +74,7 @@ def __init__(self, **kwargs) -> None: for key, val in self.prompt_kwargs.items(): if val["type"] == "text": file_path = _resolve_prompt_path(val["name"], config_dir) - val["content"] = load_txt(file_path) # ← store content here + val["content"] = load_txt(file_path) else: raise ValueError( f"Now only support `text` type but {val['type']} is given." @@ -87,7 +87,7 @@ def act(self, *args, **kwargs): pass def get_composed_observations(self, **kwargs): - ret = {"observations": kwargs.get("env").get_obs_for_agent()} + ret = {} for key, val in self.prompt_kwargs.items(): ret[key] = val["content"] ret.update(kwargs) diff --git a/embodichain/gen_sim/action_agent_pipeline/agents/compile_agent.py b/embodichain/gen_sim/action_agent_pipeline/agents/compile_agent.py new file mode 100644 index 00000000..20a5cea6 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/agents/compile_agent.py @@ -0,0 +1,256 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import hashlib +import json +from pathlib import Path +from typing import Any + +from embodichain.gen_sim.action_agent_pipeline.agents.agent_base import AgentBase +from embodichain.data import database_agent_prompt_dir +from embodichain.utils.llm_json import extract_json_object, normalize_json_content + +__all__ = ["CompileAgent"] + +COMPILED_GRAPH_SCHEMA_VERSION = "recovery_bindings_v1" + + +class CompileAgent(AgentBase): + """Compile and execute atomic-action graph specs. + + The compile agent expands the LLM-facing recovery spec into an explicit + monitor/recovery graph artifact, then executes that artifact through the + graph runtime. + """ + + query_prefix = "# " + query_suffix = "." + prompt_kwargs: dict[str, dict[str, Any]] + + def __init__(self, llm, **kwargs) -> None: + for key, value in kwargs.items(): + setattr(self, key, value) + self.prompt_kwargs = kwargs.get("prompt_kwargs", {}) + self.llm = llm + + def generate(self, **kwargs): + log_dir = kwargs.get( + "log_dir", Path(database_agent_prompt_dir) / self.task_name + ) + file_path = Path(log_dir) / "agent_compiled_graph.json" + recovery_enabled = bool(kwargs.get("recovery_enabled", False)) + task_graph = extract_json_object(kwargs["task_graph"]) + raw_recovery_spec = extract_json_object( + kwargs.get("recovery_spec") or _empty_recovery_spec(task_graph) + ) + task_graph_hash = _stable_json_hash(task_graph) + raw_recovery_spec_hash = _stable_json_hash(raw_recovery_spec) + + if not kwargs.get("regenerate", False) and file_path.exists(): + existing_bundle = extract_json_object(file_path.read_text(encoding="utf-8")) + metadata = existing_bundle.get("metadata", {}) + if ( + metadata.get("recovery_enabled") == recovery_enabled + and metadata.get("schema_version") == COMPILED_GRAPH_SCHEMA_VERSION + and metadata.get("task_graph_hash") == task_graph_hash + and metadata.get("raw_recovery_spec_hash") == raw_recovery_spec_hash + ): + print(f"Compiled graph artifact already exists at {file_path}.") + return file_path, kwargs, None + + from embodichain.gen_sim.action_agent_pipeline.graph_spec import ( + expand_recovery_spec, + normalize_recovery_spec, + ) + + recovery_spec, issues = normalize_recovery_spec(task_graph, raw_recovery_spec) + if issues: + recovery_spec = _canonicalize_recovery_spec_with_llm( + self.llm, + task_graph=task_graph, + recovery_spec=raw_recovery_spec, + issues=issues, + ) + recovery_spec, issues = normalize_recovery_spec(task_graph, recovery_spec) + if issues: + issue_text = "; ".join(issues) + raise ValueError( + "CompileAgent could not canonicalize recovery_spec: " + f"{issue_text}" + ) + + recovery_spec_hash = _stable_json_hash(recovery_spec) + if not kwargs.get("regenerate", False) and file_path.exists(): + existing_bundle = extract_json_object(file_path.read_text(encoding="utf-8")) + metadata = existing_bundle.get("metadata", {}) + if ( + metadata.get("recovery_enabled") == recovery_enabled + and metadata.get("schema_version") == COMPILED_GRAPH_SCHEMA_VERSION + and metadata.get("task_graph_hash") == task_graph_hash + and metadata.get("recovery_spec_hash") == recovery_spec_hash + ): + print(f"Compiled graph artifact already exists at {file_path}.") + return file_path, kwargs, None + + recovery_graph = expand_recovery_spec(task_graph, recovery_spec) + content = normalize_json_content( + { + "task_graph": task_graph, + "recovery_spec": recovery_spec, + "recovery_graph": recovery_graph, + "metadata": { + "recovery_enabled": recovery_enabled, + "schema_version": COMPILED_GRAPH_SCHEMA_VERSION, + "task_graph_hash": task_graph_hash, + "raw_recovery_spec_hash": raw_recovery_spec_hash, + "recovery_spec_hash": recovery_spec_hash, + }, + } + ) + + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content, encoding="utf-8") + print(f"Compiled graph artifact saved to {file_path}") + return file_path, kwargs, content + + def act(self, graph_file_path, **kwargs): + graph_file_path = Path(graph_file_path) + if graph_file_path.suffix != ".json": + raise ValueError("CompileAgent executes compiled graph JSON artifacts.") + + from embodichain.gen_sim.action_agent_pipeline.graph_spec import ( + compile_agent_graph_from_file, + ) + + runtime_kwargs = _runtime_kwargs(kwargs, getattr(self, "prompt_kwargs", {})) + graph = compile_agent_graph_from_file( + graph_file_path, + env=runtime_kwargs.get("env"), + ) + result = graph.run(**runtime_kwargs) + print("Compiled agent graph executed successfully.") + return result + + def get_composed_observations(self, **kwargs): + return dict(kwargs) + + +def _canonicalize_recovery_spec_with_llm( + llm, + *, + task_graph: dict[str, Any], + recovery_spec: dict[str, Any], + issues: list[str], +) -> dict[str, Any]: + """Use one LLM call to map ambiguous recovery intent to canonical templates.""" + if llm is None: + raise ValueError( + "Recovery spec is ambiguous and CompileAgent has no LLM for canonicalization." + ) + + from langchain_core.messages import HumanMessage, SystemMessage + + human_content = ( + "Convert the recovery spec into this canonical authoring schema:\n" + "{\n" + ' "task": "",\n' + ' "recovery_bindings": [\n' + " {\n" + ' "edge_id": "",\n' + ' "failure_name": "",\n' + ' "monitors": [\n' + ' {"type": "object_moved", "objects": [""], "threshold": 0.02}\n' + " ],\n" + ' "recovery": [\n' + ' {"type": "regrasp", "robot_name": "", "obj_name": ""}\n' + " ],\n" + ' "merge": "target",\n' + ' "repeat_until_success": true\n' + " }\n" + " ]\n" + "}\n\n" + "Supported monitor templates:\n" + "- object_moved: objects or obj_name, optional threshold\n" + "- hold_lost: robot_name, obj_name, optional threshold\n\n" + "Supported recovery step templates:\n" + "- regrasp: robot_name, obj_name, optional pre_grasp_dis\n" + "- regrasp_both: arms mapping robot_name to obj_name, optional pre_grasp_dis\n" + "- retry_failed_edge\n" + "- replay_edge: edge_id\n" + "- action: left_arm_action and/or right_arm_action atomic action specs\n\n" + "Rules:\n" + "- Use only edge ids from the nominal task graph.\n" + "- Prefer canonical templates over direct action specs.\n" + "- For grasp-phase displacement use object_moved, not hold_lost.\n" + "- For an object lost after it is already held use hold_lost.\n" + "- Use merge target when recovery completes the failed edge state.\n" + "- Use merge source when recovery only restores the failed edge precondition.\n" + "- Keep the output compact; do not emit explicit graph topology.\n\n" + "Unresolved compiler issues:\n" + f"{json.dumps(issues, ensure_ascii=False, indent=2)}\n\n" + "Nominal task graph JSON:\n" + f"{json.dumps(task_graph, ensure_ascii=False, indent=2)}\n\n" + "Input recovery spec JSON:\n" + f"{json.dumps(recovery_spec, ensure_ascii=False, indent=2)}\n" + ) + response = llm.invoke( + [ + SystemMessage( + content=( + "You canonicalize robotic recovery specs. Return only JSON. " + "Do not create recovery_nodes, recovery_edges, or " + "recovery_branches." + ) + ), + HumanMessage(content=human_content), + ] + ) + content = getattr(response, "content", response) + return extract_json_object(content) + + +def _empty_recovery_spec(task_graph: dict[str, Any]) -> dict[str, Any]: + return { + "task": task_graph.get("task", ""), + "recovery_bindings": [], + } + + +def _stable_json_hash(content: dict[str, Any]) -> str: + payload = json.dumps( + content, ensure_ascii=False, sort_keys=True, separators=(",", ":") + ) + return hashlib.sha256(payload.encode("utf-8")).hexdigest() + + +def _runtime_kwargs( + kwargs: dict[str, Any], + prompt_kwargs: dict[str, dict[str, Any]], +) -> dict[str, Any]: + prompt_only_keys = set(prompt_kwargs) + prompt_only_keys.update( + { + "task_graph", + "recovery_spec", + "recovery_graph", + "recovery_enabled", + "observations", + "regenerate", + } + ) + return {key: value for key, value in kwargs.items() if key not in prompt_only_keys} diff --git a/embodichain/gen_sim/action_agent_pipeline/agents/llm.py b/embodichain/gen_sim/action_agent_pipeline/agents/llm.py new file mode 100644 index 00000000..ec89a383 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/agents/llm.py @@ -0,0 +1,58 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from embodichain.gen_sim.mllm import create_chat_openai + +__all__ = ["create_llm", "task_llm", "recovery_llm", "compile_llm"] + + +# ------------------------------------------------------------------------------ +# LLM factory +# ------------------------------------------------------------------------------ + + +def create_llm(*, temperature=0.0, model=None): + return create_chat_openai(temperature=temperature, model=model) + + +# ------------------------------------------------------------------------------ +# LLM instances +# ------------------------------------------------------------------------------ + + +# Initialize LLM instances, but handle errors gracefully for documentation builds +def _create_llm_safe(*, temperature=0.0, model=None): + try: + return create_llm(temperature=temperature, model=model) + except Exception: + return None + + +task_llm = _create_llm_safe(temperature=0.0) +recovery_llm = _create_llm_safe(temperature=0.0) +compile_llm = _create_llm_safe(temperature=0.0) + +if __name__ == "__main__": + + def call_llm(prompt, temperature=0.0, model=None): + llm = create_llm(temperature=temperature, model=model) + response = llm.invoke(prompt) + return response.content + + response = call_llm(prompt="Which model you are?", temperature=0.0) + print(response) diff --git a/embodichain/gen_sim/action_agent_pipeline/agents/recovery_agent.py b/embodichain/gen_sim/action_agent_pipeline/agents/recovery_agent.py new file mode 100644 index 00000000..e0aac37f --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/agents/recovery_agent.py @@ -0,0 +1,66 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from embodichain.gen_sim.action_agent_pipeline.agents.agent_base import AgentBase +from embodichain.gen_sim.action_agent_pipeline.prompt_builders import RecoveryPrompt +from embodichain.data import database_agent_prompt_dir +from embodichain.utils.llm_json import normalize_json_content +from embodichain.utils.utility import load_txt + +__all__ = ["RecoveryAgent"] + + +class RecoveryAgent(AgentBase): + """Generate lightweight recovery bindings for a nominal task graph.""" + + prompt_name: str + prompt_kwargs: dict[str, dict[str, Any]] + + def __init__(self, llm, **kwargs) -> None: + super().__init__(**kwargs) + if llm is None: + raise ValueError( + "LLM is None. Configure the shared MLLM entry point " + "`embodichain.gen_sim.mllm` with OPENAI_API_KEY, optional " + "OPENAI_MODEL/OPENAI_BASE_URL, or the gen-sim LLM config." + ) + self.llm = llm + + def generate(self, **kwargs) -> str: + log_dir = kwargs.get( + "log_dir", Path(database_agent_prompt_dir) / self.task_name + ) + file_path = Path(log_dir) / "agent_recovery_spec.json" + + if not kwargs.get("regenerate", False) and file_path.exists(): + print(f"Recovery spec already exists at {file_path}.") + return load_txt(file_path) + + prompt = getattr(RecoveryPrompt, self.prompt_name)(**kwargs) + response = self.llm.invoke(prompt) + print(f"\033[91m\nRecovery agent output:\n{response.content}\n\033[0m") + + content = normalize_json_content(response.content) + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content, encoding="utf-8") + print(f"Generated recovery spec saved to {file_path}") + + return content diff --git a/embodichain/gen_sim/action_agent_pipeline/agents/task_agent.py b/embodichain/gen_sim/action_agent_pipeline/agents/task_agent.py new file mode 100644 index 00000000..904a3cef --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/agents/task_agent.py @@ -0,0 +1,69 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from embodichain.gen_sim.action_agent_pipeline.agents.agent_base import AgentBase +from embodichain.gen_sim.action_agent_pipeline.prompt_builders import TaskPrompt +from embodichain.data import database_agent_prompt_dir +from embodichain.utils.llm_json import normalize_json_content +from embodichain.utils.utility import load_txt + +__all__ = ["TaskAgent"] + + +class TaskAgent(AgentBase): + """Generate the nominal atomic-action task graph.""" + + prompt_name: str + prompt_kwargs: dict[str, dict[str, Any]] + + def __init__(self, llm, **kwargs) -> None: + super().__init__(**kwargs) + if llm is None: + raise ValueError( + "LLM is None. Configure the shared MLLM entry point " + "`embodichain.gen_sim.mllm` with OPENAI_API_KEY, optional " + "OPENAI_MODEL/OPENAI_BASE_URL, or the gen-sim LLM config." + ) + self.llm = llm + + def generate(self, **kwargs) -> str: + log_dir = kwargs.get( + "log_dir", Path(database_agent_prompt_dir) / self.task_name + ) + file_path = Path(log_dir) / "agent_task_graph.json" + + if not kwargs.get("regenerate", False) and file_path.exists(): + print(f"Task graph already exists at {file_path}.") + return load_txt(file_path) + + prompt = getattr(TaskPrompt, self.prompt_name)(**kwargs) + response = self.llm.invoke(prompt) + print(f"\033[92m\nTask agent output:\n{response.content}\n\033[0m") + + content = normalize_json_content(response.content) + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content, encoding="utf-8") + print(f"Generated task graph saved to {file_path}") + + return content + + def act(self, *args, **kwargs): + return super().act(*args, **kwargs) diff --git a/embodichain/gen_sim/action_agent_pipeline/atom_action_utils.py b/embodichain/gen_sim/action_agent_pipeline/atom_action_utils.py new file mode 100644 index 00000000..6bfabdfe --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/atom_action_utils.py @@ -0,0 +1,170 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import ast +from typing import List + +from embodichain.utils.logger import log_error + + +def _available_arm_sides(env) -> list[str]: + sides = [] + for side in ("left", "right"): + if len(getattr(env, f"{side}_arm_joints", []) or []) > 0: + sides.append(side) + return sides + + +def resolve_arm_side(env, robot_name: str) -> str: + """Resolve robot_name to an available left/right graph slot.""" + name = robot_name or "" + if "right" in name: + side = "right" + elif "left" in name: + side = "left" + else: + sides = _available_arm_sides(env) + side = "right" if sides == ["right"] else "left" + + if side not in _available_arm_sides(env): + log_error( + f"Requested {side}_arm for robot_name='{robot_name}', but available " + f"control parts are {getattr(env.robot, 'control_parts', None)}.", + error_type=ValueError, + ) + return side + + +def get_arm_states(env, robot_name): + """Get the current state of the specified robot arm. + + Args: + env: The simulation environment. + robot_name: Name of the robot arm (should contain "left" or "right"). + + Returns: + Tuple of (is_left, select_arm, current_qpos, current_pose, current_gripper_state): + - is_left: bool, whether this is the left arm + - select_arm: str, arm identifier ("left_arm" or "right_arm") + - current_qpos: Current joint positions + - current_pose: Current end-effector pose (4x4 matrix) + - current_gripper_state: Current gripper state + """ + left_arm_current_qpos, right_arm_current_qpos = env.get_current_qpos_agent() + left_arm_current_pose, right_arm_current_pose = env.get_current_xpos_agent() + left_arm_current_gripper_state, right_arm_current_gripper_state = ( + env.get_current_gripper_state_agent() + ) + + side = resolve_arm_side(env, robot_name) + is_left = True if side == "left" else False + if hasattr(env, "get_agent_arm_control_part"): + select_arm = env.get_agent_arm_control_part(is_left) + else: + select_arm = "left_arm" if is_left else "right_arm" + + arms = { + "left": ( + left_arm_current_qpos, + left_arm_current_pose, + left_arm_current_gripper_state, + ), + "right": ( + right_arm_current_qpos, + right_arm_current_pose, + right_arm_current_gripper_state, + ), + } + ( + select_arm_current_qpos, + select_arm_current_pose, + select_arm_current_gripper_state, + ) = arms[side] + + return ( + is_left, + select_arm, + select_arm_current_qpos, + select_arm_current_pose, + select_arm_current_gripper_state, + ) + + +def extract_drive_calls(code_str: str) -> List[str]: + """Extract all drive() function calls from a code string. + + Args: + code_str: Python code string to parse. + + Returns: + List of code blocks containing drive() calls. + """ + tree = ast.parse(code_str) + lines = code_str.splitlines() + + drive_blocks = [] + + for node in tree.body: + # Match: drive(...) + if ( + isinstance(node, ast.Expr) + and isinstance(node.value, ast.Call) + and isinstance(node.value.func, ast.Name) + and node.value.func.id == "drive" + ): + # AST line numbers are 1-based + start = node.lineno - 1 + end = node.end_lineno + block = "\n".join(lines[start:end]) + drive_blocks.append(block) + + return drive_blocks + + +def apply_offset_to_pose(pose, offset: list): + pose[0, 3] += offset[0] + pose[1, 3] += offset[1] + pose[2, 3] += offset[2] + return pose + + +def resolve_action(action, env, kwargs): + if callable(action): + return action(env=env, **kwargs) + return action + + +def sync_agent_state_from_robot(env) -> None: + """Synchronize cached agent arm states from the physical robot state.""" + action = env.robot.get_qpos().squeeze(0) + for side in ("left", "right"): + is_left = side == "left" + arm_joints = getattr(env, f"{side}_arm_joints", []) + eef_joints = getattr(env, f"{side}_eef_joints", []) + if arm_joints: + arm_qpos = action[arm_joints] + env.set_current_qpos_agent(arm_qpos, is_left=is_left) + env.set_current_xpos_agent( + env.get_arm_fk(qpos=arm_qpos, is_left=is_left), + is_left=is_left, + ) + if eef_joints: + env.set_current_gripper_state_agent( + action[eef_joints][0].unsqueeze(0), + is_left=is_left, + ) diff --git a/embodichain/gen_sim/action_agent_pipeline/atom_actions.py b/embodichain/gen_sim/action_agent_pipeline/atom_actions.py new file mode 100644 index 00000000..b3b0c358 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/atom_actions.py @@ -0,0 +1,1317 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import hashlib +import os +import numpy as np +from embodichain.utils.logger import log_info, log_warning, log_error +from copy import deepcopy +from embodichain.lab.gym.utils.misc import get_rotation_replaced_pose +from embodichain.utils.math import get_offset_pose +import torch +from tqdm import tqdm +from scipy.spatial.transform import Rotation as R +from embodichain.lab.sim.atomic_actions import ( + AtomicActionEngine, + AntipodalAffordance, + MoveActionCfg, + ObjectSemantics, + PickUpActionCfg, + PlaceActionCfg, +) +from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg +from embodichain.toolkits.graspkit.pg_grasp import ( + AntipodalSamplerCfg, + GraspGeneratorCfg, + GripperCollisionCfg, +) +from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import ( + GRASP_ANNOTATOR_CACHE_DIR, +) + +# Import utility functions for atom actions +from embodichain.gen_sim.action_agent_pipeline.atom_action_utils import ( + get_arm_states, + resolve_action, + resolve_arm_side, + sync_agent_state_from_robot, +) +from embodichain.gen_sim.action_agent_pipeline.error_functions import ( + inject_interactive_error, + interactive_error_requested, + restore_interactive_error_input, + setup_interactive_error_input, +) +from embodichain.gen_sim.action_agent_pipeline.monitor_functions import * + +""" +--------------------------------------------Atom action functions---------------------------------------------------- +--------------------------------------------Atom action functions---------------------------------------------------- +--------------------------------------------Atom action functions---------------------------------------------------- +""" + + +def _select_arm_parts(env, robot_name): + is_left = resolve_arm_side(env, robot_name) == "left" + if hasattr(env, "get_agent_arm_control_part"): + arm_part = env.get_agent_arm_control_part(is_left) + hand_part = env.get_agent_eef_control_part(is_left) + else: + arm_part = "left_arm" if is_left else "right_arm" + hand_part = "left_eef" if is_left else "right_eef" + arm_joints = env.left_arm_joints if is_left else env.right_arm_joints + eef_joints = env.left_eef_joints if is_left else env.right_eef_joints + return is_left, arm_part, hand_part, list(arm_joints), list(eef_joints) + + +def _get_arm_aim_yaw_offset(env, robot_name): + offset = getattr(env, "arm_aim_yaw_offset", 0.0) + if isinstance(offset, dict): + side = resolve_arm_side(env, robot_name) + offset = offset.get(f"{side}_arm", offset.get(side, 0.0)) + return float(offset) + + +def _make_motion_generator(env): + return MotionGenerator( + cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=env.robot.uid)) + ) + + +def _make_atomic_engine(env, cfg): + return AtomicActionEngine( + motion_generator=_make_motion_generator(env), + actions_cfg_list=[cfg], + ) + + +def _log_public_atomic_backend( + *, + wrapper_name: str, + action, + cfg, + target_kind: str, + control_part: str, + steps: int, +): + log_info( + "Using public AtomicAction backend: " + f"wrapper={wrapper_name}, " + f"action={action.__class__.__name__}, " + f"cfg={cfg.__class__.__name__}, " + f"cfg_name={cfg.name}, " + f"target={target_kind}, " + f"control_part={control_part}, " + f"steps={steps}.", + color="green", + ) + + +def _state_to_hand_qpos(state, hand_dof, device): + if hand_dof <= 0: + return torch.empty(0, dtype=torch.float32, device=device) + + state = torch.as_tensor(state, dtype=torch.float32, device=device).flatten() + if state.numel() == 0: + return torch.zeros(hand_dof, dtype=torch.float32, device=device) + if state.numel() == hand_dof: + return state + if state.numel() == 1: + return state.repeat(hand_dof) + if state.numel() > hand_dof: + return state[:hand_dof] + + repeat_num = int(np.ceil(hand_dof / state.numel())) + return state.repeat(repeat_num)[:hand_dof] + + +def _is_current_gripper_open(env, current_state, threshold): + current_state = torch.as_tensor( + current_state, + dtype=env.open_state.dtype, + device=env.open_state.device, + ).flatten() + if current_state.numel() == 0: + return True + + open_state = _state_to_hand_qpos( + env.open_state, + max(current_state.numel(), int(env.open_state.numel())), + env.open_state.device, + )[: current_state.numel()] + return torch.all(torch.abs(current_state - open_state) <= threshold).item() + + +def _as_2d_action(action, action_name): + if action is None: + return None + if isinstance(action, torch.Tensor): + action = action.detach().cpu().numpy() + action = np.asarray(action, dtype=np.float32) + if action.ndim == 1: + action = action[None, :] + if action.ndim != 2 or len(action) == 0: + log_error( + f"{action_name} must have shape (T, D) with T > 0, got {action.shape}." + ) + return action + + +def _trajectory_to_agent_action(env, robot_name, trajectory, joint_ids): + _, _, _, arm_joints, eef_joints = _select_arm_parts(env, robot_name) + ( + _, + _, + select_arm_current_qpos, + _, + select_arm_current_gripper_state, + ) = get_arm_states(env, robot_name) + + if isinstance(trajectory, torch.Tensor): + trajectory = trajectory.detach() + else: + trajectory = torch.as_tensor(trajectory) + + if trajectory.dim() == 3: + trajectory = trajectory[0] + if trajectory.dim() != 2 or trajectory.shape[0] == 0: + raise ValueError( + "Public atomic trajectory must have shape (T, D) or (N, T, D), " + f"got {trajectory.shape}." + ) + + joint_ids = [int(joint_id) for joint_id in joint_ids] + if len(joint_ids) != trajectory.shape[-1]: + raise ValueError( + f"Public atomic joint_ids length {len(joint_ids)} does not match " + f"trajectory width {trajectory.shape[-1]}." + ) + + device = trajectory.device + current_arm_qpos = torch.as_tensor( + select_arm_current_qpos, dtype=torch.float32, device=device + ).flatten() + current_hand_qpos = _state_to_hand_qpos( + select_arm_current_gripper_state, + len(eef_joints), + device, + ) + agent_action = torch.cat([current_arm_qpos, current_hand_qpos], dim=0) + agent_action = agent_action.unsqueeze(0).repeat(trajectory.shape[0], 1) + + joint_id_to_col = {joint_id: col for col, joint_id in enumerate(joint_ids)} + for out_col, joint_id in enumerate(arm_joints + eef_joints): + if joint_id in joint_id_to_col: + agent_action[:, out_col] = trajectory[:, joint_id_to_col[joint_id]] + + return agent_action.detach().cpu().numpy().astype(np.float32) + + +def _sync_agent_state_from_public_action(env, robot_name, action_np): + if action_np is None or len(action_np) == 0: + raise ValueError("Public atomic action is empty; cannot sync agent state.") + + is_left, _, _, arm_joints, eef_joints = _select_arm_parts(env, robot_name) + ( + _, + _, + _, + _, + select_arm_current_gripper_state, + ) = get_arm_states(env, robot_name) + + final_action = np.asarray(action_np[-1], dtype=np.float32) + arm_dof = len(arm_joints) + arm_qpos = torch.as_tensor( + final_action[:arm_dof], + dtype=torch.float32, + device=env.robot.device, + ) + env.set_current_qpos_agent(arm_qpos, is_left=is_left) + env.set_current_xpos_agent( + env.get_arm_fk(qpos=arm_qpos, is_left=is_left), + is_left=is_left, + ) + + if len(eef_joints) == 0: + return + + eef_qpos = final_action[arm_dof : arm_dof + len(eef_joints)] + state_dof = max(int(torch.as_tensor(select_arm_current_gripper_state).numel()), 1) + if len(eef_qpos) >= state_dof: + gripper_qpos = eef_qpos[:state_dof] + else: + gripper_qpos = np.resize(eef_qpos, state_dof) + + current_gripper_state = torch.as_tensor(select_arm_current_gripper_state) + env.set_current_gripper_state_agent( + torch.as_tensor( + gripper_qpos, + dtype=current_gripper_state.dtype, + device=current_gripper_state.device, + ), + is_left=is_left, + ) + + +def _sync_agent_state_from_public_qpos_action( + env, + robot_name, + action_np, + control_part, +): + if action_np is None or len(action_np) == 0: + raise ValueError("Public atomic action is empty; cannot sync agent state.") + + is_left, arm_part, hand_part, arm_joints, eef_joints = _select_arm_parts( + env, robot_name + ) + final_action = np.asarray(action_np[-1], dtype=np.float32) + arm_dof = len(arm_joints) + + if control_part == arm_part: + arm_qpos = torch.as_tensor( + final_action[:arm_dof], + dtype=torch.float32, + device=env.robot.device, + ) + env.set_current_qpos_agent(arm_qpos, is_left=is_left) + env.set_current_xpos_agent( + env.get_arm_fk(qpos=arm_qpos, is_left=is_left), + is_left=is_left, + ) + return + + if control_part == hand_part: + if len(eef_joints) == 0: + return + ( + _, + _, + _, + _, + select_arm_current_gripper_state, + ) = get_arm_states(env, robot_name) + eef_qpos = final_action[arm_dof : arm_dof + len(eef_joints)] + state_dof = max( + int(torch.as_tensor(select_arm_current_gripper_state).numel()), 1 + ) + if len(eef_qpos) >= state_dof: + gripper_qpos = eef_qpos[:state_dof] + else: + gripper_qpos = np.resize(eef_qpos, state_dof) + + current_gripper_state = torch.as_tensor(select_arm_current_gripper_state) + env.set_current_gripper_state_agent( + torch.as_tensor( + gripper_qpos, + dtype=current_gripper_state.dtype, + device=current_gripper_state.device, + ), + is_left=is_left, + ) + return + + raise ValueError(f"Unsupported public qpos control_part: {control_part}.") + + +def _current_agent_action(env, robot_name): + _, _, arm_qpos, _, gripper_state = get_arm_states(env, robot_name) + _, _, _, arm_joints, eef_joints = _select_arm_parts(env, robot_name) + device = env.robot.device + action = torch.cat( + [ + torch.as_tensor(arm_qpos, dtype=torch.float32, device=device).flatten(), + _state_to_hand_qpos(gripper_state, len(eef_joints), device), + ], + dim=0, + ) + expected_width = len(arm_joints) + len(eef_joints) + if action.numel() != expected_width: + raise ValueError( + f"Current agent action width {action.numel()} does not match " + f"configured arm+eef joints ({expected_width})." + ) + return action.unsqueeze(0).detach().cpu().numpy().astype(np.float32) + + +def _public_qpos_move_action( + *, + env, + robot_name: str, + control_part: str, + target_qpos, + start_qpos, + sample_num: int, + kwargs: dict, + log_name: str, +): + if env is None: + raise ValueError("Public qpos MoveAction requires env.") + + device = env.robot.device + cfg = MoveActionCfg( + name="move", + control_part=control_part, + sample_interval=int(sample_num), + ) + engine = _make_atomic_engine(env, cfg) + action = engine._actions["move"] + is_success, trajectory, joint_ids = action.execute( + target=torch.as_tensor(target_qpos, dtype=torch.float32, device=device), + start_qpos=torch.as_tensor(start_qpos, dtype=torch.float32, device=device), + ) + if not is_success: + raise RuntimeError(f"Public qpos MoveAction failed for {log_name}.") + + action_np = _trajectory_to_agent_action(env, robot_name, trajectory, joint_ids) + _log_public_atomic_backend( + wrapper_name=log_name, + action=action, + cfg=cfg, + target_kind="qpos", + control_part=control_part, + steps=len(action_np), + ) + _sync_agent_state_from_public_qpos_action(env, robot_name, action_np, control_part) + return action_np + + +def _semantic_public_grasp_enabled(kwargs): + return ( + kwargs.get("use_public_grasp_semantics", True) is True + or kwargs.get("public_grasp_strategy", None) == "semantic" + ) + + +def _cfg_supported_kwargs(cfg_cls, values): + supported = set() + for cls in reversed(cfg_cls.__mro__): + supported.update(getattr(cls, "__annotations__", {}).keys()) + return {key: value for key, value in values.items() if key in supported} + + +def _public_grasp_cache_path(mesh_vertices, mesh_triangles): + vert_bytes = mesh_vertices.to("cpu").numpy().tobytes() + face_bytes = mesh_triangles.to("cpu").numpy().tobytes() + md5_hash = hashlib.md5(vert_bytes + face_bytes).hexdigest() + return os.path.join(GRASP_ANNOTATOR_CACHE_DIR, f"antipodal_cache_{md5_hash}.npy") + + +def _build_public_grasp_semantics(env, obj_name: str, kwargs: dict): + """Build ObjectSemantics for tutorial-style AntipodalAffordance grasp.""" + target_obj = env.sim.get_rigid_object(obj_name) + if target_obj is None: + raise ValueError(f"No rigid object found for {obj_name}.") + + mesh_vertices = target_obj.get_vertices(env_ids=[0], scale=True)[0] + mesh_triangles = target_obj.get_triangles(env_ids=[0])[0] + mesh_vertices = torch.as_tensor(mesh_vertices, dtype=torch.float32) + mesh_triangles = torch.as_tensor(mesh_triangles, dtype=torch.int64) + if ( + mesh_vertices.numel() == 0 + or mesh_triangles.numel() == 0 + or mesh_vertices.shape[-1] != 3 + or mesh_triangles.shape[-1] != 3 + ): + raise ValueError(f"Object {obj_name} has empty or invalid mesh geometry.") + + allow_annotation = bool(kwargs.get("allow_public_grasp_annotation", True)) + force_reannotate = bool(kwargs.get("force_public_grasp_reannotate", False)) + if force_reannotate and not allow_annotation: + raise RuntimeError( + "Public semantic grasp requested force re-annotation without " + "allow_public_grasp_annotation=True." + ) + + cache_path = _public_grasp_cache_path(mesh_vertices, mesh_triangles) + if not os.path.exists(cache_path) and not allow_annotation: + raise RuntimeError( + "Public semantic grasp cache is missing and annotation is disabled; " + "set allow_public_grasp_annotation=True or use public grasp_pose_obj mode." + ) + + antipodal_sampler_cfg = AntipodalSamplerCfg( + **_cfg_supported_kwargs( + AntipodalSamplerCfg, + { + "n_sample": int(kwargs.get("grasp_antipodal_n_sample", 20000)), + "max_angle": kwargs.get("grasp_antipodal_max_angle", np.pi / 12), + "max_length": kwargs.get("grasp_max_open_length", 0.088), + "min_length": kwargs.get("grasp_min_open_length", 0.003), + }, + ) + ) + generator_cfg = GraspGeneratorCfg( + **_cfg_supported_kwargs( + GraspGeneratorCfg, + { + "viser_port": int(kwargs.get("public_grasp_viser_port", 11801)), + "antipodal_sampler_cfg": antipodal_sampler_cfg, + "max_deviation_angle": kwargs.get( + "grasp_max_deviation_angle", np.pi / 6 + ), + }, + ) + ) + gripper_collision_cfg = GripperCollisionCfg( + **_cfg_supported_kwargs( + GripperCollisionCfg, + { + "max_open_length": kwargs.get("grasp_max_open_length", 0.088), + "finger_length": kwargs.get("grasp_finger_length", 0.078), + "point_sample_dense": kwargs.get("grasp_point_sample_dense", 0.012), + }, + ) + ) + + affordance = AntipodalAffordance( + object_label=obj_name, + force_reannotate=force_reannotate, + custom_config={ + "gripper_collision_cfg": gripper_collision_cfg, + "generator_cfg": generator_cfg, + }, + ) + return ObjectSemantics( + label=obj_name, + geometry={ + "mesh_vertices": mesh_vertices, + "mesh_triangles": mesh_triangles, + }, + affordance=affordance, + entity=target_obj, + ) + + +def _public_semantic_grasp_action( + *, + env, + robot_name: str, + obj_name: str, + pre_grasp_dis: float, + kwargs: dict, +): + if env is None: + raise ValueError("Public semantic grasp requires env.") + if not _semantic_public_grasp_enabled(kwargs): + raise RuntimeError("Public semantic grasp is disabled.") + + target = _build_public_grasp_semantics(env, obj_name, kwargs) + is_left, arm_part, hand_part, arm_joints, _ = _select_arm_parts(env, robot_name) + device = env.robot.device + hand_dof = len(env.left_eef_joints if is_left else env.right_eef_joints) + approach_direction = torch.as_tensor( + kwargs.get("public_grasp_approach_direction", [0, 0, -1]), + dtype=torch.float32, + device=device, + ) + cfg = PickUpActionCfg( + control_part=arm_part, + hand_control_part=hand_part, + hand_open_qpos=_state_to_hand_qpos(env.open_state, hand_dof, device), + hand_close_qpos=_state_to_hand_qpos(env.close_state, hand_dof, device), + pre_grasp_distance=pre_grasp_dis, + lift_height=kwargs.get("lift_height", 0.1), + approach_direction=approach_direction, + sample_interval=int( + kwargs.get("sample_interval", kwargs.get("sample_num", 80)) + ), + hand_interp_steps=int(kwargs.get("hand_interp_steps", 5)), + ) + engine = _make_atomic_engine(env, cfg) + action = engine._actions[cfg.name] + start_qpos = env.left_arm_current_qpos if is_left else env.right_arm_current_qpos + start_qpos = torch.as_tensor( + start_qpos, dtype=torch.float32, device=device + ).reshape(1, len(arm_joints)) + + is_success, trajectory, joint_ids = action.execute( + target=target, + start_qpos=start_qpos, + ) + if not is_success: + raise RuntimeError("Public semantic grasp action failed.") + + action_np = _trajectory_to_agent_action(env, robot_name, trajectory, joint_ids) + _log_public_atomic_backend( + wrapper_name="grasp", + action=action, + cfg=cfg, + target_kind="ObjectSemantics(AntipodalAffordance)", + control_part=arm_part, + steps=len(action_np), + ) + _sync_agent_state_from_public_action(env, robot_name, action_np) + return action_np + + +def _public_move_action( + env, robot_name, target_pose, public_sample_num, action_name, **kwargs +): + if env is None: + raise ValueError("Public MoveAction requires env.") + + _, arm_part, _, _, _ = _select_arm_parts(env, robot_name) + ( + _, + _, + select_arm_current_qpos, + _, + _, + ) = get_arm_states(env, robot_name) + target_pose = torch.as_tensor( + target_pose, dtype=torch.float32, device=env.robot.device + ) + start_qpos = torch.as_tensor( + select_arm_current_qpos, + dtype=torch.float32, + device=env.robot.device, + ) + + cfg = MoveActionCfg( + control_part=arm_part, + sample_interval=int(public_sample_num), + ) + engine = _make_atomic_engine(env, cfg) + action = engine._actions[cfg.name] + is_success, trajectory, joint_ids = action.execute( + target=target_pose, + start_qpos=start_qpos, + ) + if not is_success: + raise RuntimeError(f"Public atomic action failed for {action_name}.") + + action_np = _trajectory_to_agent_action(env, robot_name, trajectory, joint_ids) + _log_public_atomic_backend( + wrapper_name=action_name, + action=action, + cfg=cfg, + target_kind="pose", + control_part=arm_part, + steps=len(action_np), + ) + _sync_agent_state_from_public_action(env, robot_name, action_np) + return action_np + + +def _public_pickup_action( + env, + robot_name, + obj_name, + target_obj_pose, + pre_grasp_dis, + **kwargs, +): + if kwargs.get("use_public_grasp_action", False) is not True: + raise RuntimeError("Public PickUpAction with grasp_pose_obj is disabled.") + + is_left, arm_part, hand_part, _, eef_joints = _select_arm_parts(env, robot_name) + ( + _, + _, + select_arm_current_qpos, + _, + _, + ) = get_arm_states(env, robot_name) + + grasp_pose_object = env.obj_info.get(obj_name, {}).get("grasp_pose_obj") + if grasp_pose_object is None: + raise ValueError(f"No grasp_pose_obj found for object {obj_name}.") + + device = env.robot.device + target_obj_pose = torch.as_tensor( + target_obj_pose, dtype=torch.float32, device=device + ) + grasp_pose_object = torch.as_tensor( + grasp_pose_object, dtype=torch.float32, device=device + ) + + select_arm_base_pose = ( + env.left_arm_base_pose if is_left else env.right_arm_base_pose + ) + select_arm_base_pose = torch.as_tensor( + select_arm_base_pose, dtype=torch.float32, device=device + ) + delta_xy = target_obj_pose[:2, 3] - select_arm_base_pose[:2, 3] + aim_horizontal_angle = float( + torch.atan2(delta_xy[1], delta_xy[0]).detach().cpu() + ) + _get_arm_aim_yaw_offset(env, robot_name) + if bool((grasp_pose_object[0, 2] > 0.5).item()): + target_obj_pose = torch.as_tensor( + get_rotation_replaced_pose( + target_obj_pose.detach().cpu().numpy(), + aim_horizontal_angle, + "z", + "intrinsic", + ), + dtype=torch.float32, + device=device, + ) + grasp_pose = target_obj_pose @ grasp_pose_object + + cfg = PickUpActionCfg( + control_part=arm_part, + hand_control_part=hand_part, + hand_open_qpos=_state_to_hand_qpos(env.open_state, len(eef_joints), device), + hand_close_qpos=_state_to_hand_qpos(env.close_state, len(eef_joints), device), + pre_grasp_distance=pre_grasp_dis, + sample_interval=int(kwargs.get("sample_num", 80)), + ) + engine = _make_atomic_engine(env, cfg) + action = engine._actions[cfg.name] + is_success, trajectory, joint_ids = action.execute( + target=grasp_pose, + start_qpos=torch.as_tensor( + select_arm_current_qpos, dtype=torch.float32, device=device + ), + ) + if not is_success: + raise RuntimeError("Public atomic action failed for grasp.") + + action_np = _trajectory_to_agent_action(env, robot_name, trajectory, joint_ids) + _log_public_atomic_backend( + wrapper_name="grasp", + action=action, + cfg=cfg, + target_kind="pose(grasp_pose_obj)", + control_part=arm_part, + steps=len(action_np), + ) + _sync_agent_state_from_public_action(env, robot_name, action_np) + return action_np + + +def _public_place_action( + env, + robot_name, + target_pose, + pre_place_dis, + **kwargs, +): + if kwargs.get("use_public_place_action", True) is not True: + raise RuntimeError("Public PlaceAction is disabled.") + + _, arm_part, hand_part, _, eef_joints = _select_arm_parts(env, robot_name) + ( + _, + _, + select_arm_current_qpos, + _, + _, + ) = get_arm_states(env, robot_name) + device = env.robot.device + + cfg = PlaceActionCfg( + control_part=arm_part, + hand_control_part=hand_part, + hand_open_qpos=_state_to_hand_qpos(env.open_state, len(eef_joints), device), + hand_close_qpos=_state_to_hand_qpos(env.close_state, len(eef_joints), device), + lift_height=pre_place_dis, + sample_interval=int(kwargs.get("sample_num", 80)), + ) + engine = _make_atomic_engine(env, cfg) + action = engine._actions[cfg.name] + is_success, trajectory, joint_ids = action.execute( + target=torch.as_tensor(target_pose, dtype=torch.float32, device=device), + start_qpos=torch.as_tensor( + select_arm_current_qpos, dtype=torch.float32, device=device + ), + ) + if not is_success: + raise RuntimeError("Public atomic action failed for place on table.") + + action_np = _trajectory_to_agent_action(env, robot_name, trajectory, joint_ids) + _log_public_atomic_backend( + wrapper_name="place_on_table", + action=action, + cfg=cfg, + target_kind="pose", + control_part=arm_part, + steps=len(action_np), + ) + _sync_agent_state_from_public_action(env, robot_name, action_np) + return action_np + + +def move_to_target_pose( + robot_name: str, + target_pose=None, + sample_num: int = 20, + env=None, + **kwargs, +): + actions = _public_move_action( + env, + robot_name, + target_pose, + sample_num, + "move to target", + **kwargs, + ) + log_info( + f"Total generated trajectory number for move to target: {len(actions)}.", + color="green", + ) + + return actions + + +def grasp( + robot_name: str, + obj_name: str, + pre_grasp_dis: float = 0.05, + env=None, + **kwargs, +): + obj_uids = env.sim.get_rigid_object_uid_list() + if obj_name in obj_uids: + target_obj = env.sim.get_rigid_object(obj_name) + else: + log_error(f"No matched object {obj_uids}.") + target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0) + + if _semantic_public_grasp_enabled(kwargs): + actions = _public_semantic_grasp_action( + env=env, + robot_name=robot_name, + obj_name=obj_name, + pre_grasp_dis=pre_grasp_dis, + kwargs=kwargs, + ) + log_info( + "Total generated trajectory number for public semantic grasp: " + f"{len(actions)}.", + color="green", + ) + return actions + + actions = _public_pickup_action( + env, + robot_name, + obj_name, + target_obj_pose, + pre_grasp_dis, + **kwargs, + ) + + log_info( + f"Total generated trajectory number for grasp: {len(actions)}.", color="green" + ) + + return actions + + +def place_on_table( + robot_name: str, + obj_name: str, + x: float = None, + y: float = None, + pre_place_dis: float = 0.08, + env=None, + **kwargs, +): + + init_obj_height = env.obj_info.get(obj_name).get("height") + height = init_obj_height + kwargs.get("eps", 0.03) + + ( + _, + _, + _, + select_arm_current_pose, + _, + ) = get_arm_states(env, robot_name) + place_pose = deepcopy(select_arm_current_pose) + if x is not None: + place_pose[0, 3] = x + if y is not None: + place_pose[1, 3] = y + place_pose[2, 3] = height + + actions = _public_place_action( + env, + robot_name, + place_pose, + pre_place_dis, + **kwargs, + ) + + log_info( + f"Total generated trajectory number for place on table: {len(actions)}.", + color="green", + ) + + return actions + + +def move_relative_to_object( + robot_name: str, + obj_name: str, + x_offset: float = 0, + y_offset: float = 0, + z_offset: float = 0, + env=None, + **kwargs, +): + + ( + _, + _, + _, + select_arm_current_pose, + _, + ) = get_arm_states(env, robot_name) + + obj_uids = env.sim.get_rigid_object_uid_list() + if obj_name in obj_uids: + target_obj = env.sim.get_rigid_object(obj_name) + else: + log_error("No matched object.") + + target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0) + move_target_pose = deepcopy(select_arm_current_pose) + move_target_pose[:3, 3] = target_obj_pose[:3, 3] + move_target_pose[0, 3] += x_offset + move_target_pose[1, 3] += y_offset + move_target_pose[2, 3] += z_offset + + actions = _public_move_action( + env, + robot_name, + move_target_pose, + kwargs.get("sample_num", 30), + "move relative to object", + **kwargs, + ) + + log_info( + f"Total generated trajectory number for move relative to object: {len(actions)}.", + color="green", + ) + + return actions + + +def move_to_absolute_position( + robot_name: str, + x: float = None, + y: float = None, + z: float = None, + env=None, + **kwargs, +): + + ( + _, + _, + _, + select_arm_current_pose, + _, + ) = get_arm_states(env, robot_name) + + move_pose = deepcopy(select_arm_current_pose) + + current_xyz = move_pose[:3, 3].clone() + + target_xyz = current_xyz.clone() + if x is not None: + target_xyz[0] = x + if y is not None: + target_xyz[1] = y + if z is not None: + target_xyz[2] = z + + move_pose[:3, 3] = target_xyz + + actions = _public_move_action( + env, + robot_name, + move_pose, + kwargs.get("sample_num", 30), + "move to absolute position", + **kwargs, + ) + + log_info( + f"Total generated trajectory number for move to absolute position: {len(actions)}.", + color="green", + ) + + return actions + + +def move_by_relative_offset( + robot_name: str, + dx: float = 0.0, + dy: float = 0.0, + dz: float = 0.0, + mode: str = "extrinsic", + env=None, + **kwargs, +): + + ( + _, + _, + _, + select_arm_current_pose, + _, + ) = get_arm_states(env, robot_name) + + move_pose = deepcopy(select_arm_current_pose) + + move_pose = get_offset_pose(move_pose, dx, "x", mode) + move_pose = get_offset_pose(move_pose, dy, "y", mode) + move_pose = get_offset_pose(move_pose, dz, "z", mode) + + actions = _public_move_action( + env, + robot_name, + move_pose, + kwargs.get("sample_num", 20), + "move by relative offset", + **kwargs, + ) + + log_info( + f"Total generated trajectory number for move by relative offset: {len(actions)}.", + color="green", + ) + + return actions + + +def back_to_initial_pose(robot_name: str, env=None, **kwargs): + + ( + is_left, + select_arm, + select_arm_current_qpos, + _, + _, + ) = get_arm_states(env, robot_name) + + target_qpos = env.left_arm_init_qpos if is_left else env.right_arm_init_qpos + target_qpos = torch.as_tensor(target_qpos, dtype=torch.float32) + + sample_num = kwargs.get("sample_num", 30) + actions = _public_qpos_move_action( + env=env, + robot_name=robot_name, + control_part=select_arm, + target_qpos=target_qpos.to(env.robot.device), + start_qpos=torch.as_tensor( + select_arm_current_qpos, + dtype=torch.float32, + device=env.robot.device, + ).reshape(1, -1), + sample_num=sample_num, + kwargs=kwargs, + log_name="back to initial pose", + ) + + log_info( + f"Total generated trajectory number for back to initial pose: {len(actions)}.", + color="green", + ) + + return actions + + +def rotate_eef(robot_name: str, degree: float = 0, env=None, **kwargs): + + ( + _, + select_arm, + select_arm_current_qpos, + _, + _, + ) = get_arm_states(env, robot_name) + + rotated_qpos = deepcopy(select_arm_current_qpos) + rotated_qpos[5] += np.deg2rad(degree) + sample_num = kwargs.get("sample_num", 20) + actions = _public_qpos_move_action( + env=env, + robot_name=robot_name, + control_part=select_arm, + target_qpos=rotated_qpos, + start_qpos=torch.as_tensor( + select_arm_current_qpos, + dtype=torch.float32, + device=env.robot.device, + ).reshape(1, -1), + sample_num=sample_num, + kwargs=kwargs, + log_name="rotate eef", + ) + + log_info( + f"Total generated trajectory number for rotate eef: {len(actions)}.", + color="green", + ) + + return actions + + +def orient_eef( + robot_name: str, + direction: str = "front", # 'front' or 'down' + env=None, + **kwargs, +): + # Get arm state + ( + _, + _, + _, + select_arm_current_pose, + _, + ) = get_arm_states(env, robot_name) + + # ---------------------------------------- Pose ---------------------------------------- + # Generate replacement rotation matrix + replaced_rotation_matrix = np.eye(4) + if direction == "front": + rotation_matrix = R.from_euler("xyz", [180, -90, 0], degrees=True).as_matrix() + replaced_rotation_matrix[:3, :3] = ( + rotation_matrix @ replaced_rotation_matrix[:3, :3] + ) + elif direction == "down": + rotation_matrix = R.from_euler("x", 180, degrees=True).as_matrix() + replaced_rotation_matrix[:3, :3] = ( + rotation_matrix @ replaced_rotation_matrix[:3, :3] + ) + else: + log_error("Rotation direction must be 'front' or 'down'.") + + rotation_replaced_pose = deepcopy(select_arm_current_pose) + rot_torch = torch.as_tensor( + replaced_rotation_matrix[:3, :3], + dtype=rotation_replaced_pose.dtype, + device=rotation_replaced_pose.device, + ) + rotation_replaced_pose[:3, :3] = rot_torch + + sample_num = kwargs.get("sample_num", 20) + actions = _public_move_action( + env, + robot_name, + rotation_replaced_pose, + sample_num, + "orient eef", + **kwargs, + ) + + log_info( + f"Total generated trajectory number for orient eef: {len(actions)}.", + color="green", + ) + + return actions + + +def close_gripper(robot_name: str, env=None, **kwargs): + + ( + _, + _, + _, + _, + select_arm_current_gripper_state, + ) = get_arm_states(env, robot_name) + + sample_num = kwargs.get("sample_num", 15) + _, _, hand_part, _, eef_joints = _select_arm_parts(env, robot_name) + hand_dof = len(eef_joints) + actions = _public_qpos_move_action( + env=env, + robot_name=robot_name, + control_part=hand_part, + target_qpos=_state_to_hand_qpos(env.close_state, hand_dof, env.robot.device), + start_qpos=_state_to_hand_qpos( + select_arm_current_gripper_state, hand_dof, env.robot.device + ).reshape(1, hand_dof), + sample_num=sample_num, + kwargs=kwargs, + log_name="close gripper", + ) + + log_info( + f"Total generated trajectory number for close gripper: {len(actions)}.", + color="green", + ) + + return actions + + +def open_gripper(robot_name: str, env=None, **kwargs): + + ( + _, + _, + _, + _, + select_arm_current_gripper_state, + ) = get_arm_states(env, robot_name) + + if _is_current_gripper_open( + env, + select_arm_current_gripper_state, + kwargs.get("open_threshold", 0.01), + ): + actions = _current_agent_action(env, robot_name) + log_info( + "Skip open gripper because current gripper state already satisfies the skip condition.", + color="green", + ) + return actions + + sample_num = kwargs.get("sample_num", 15) + _, _, hand_part, _, eef_joints = _select_arm_parts(env, robot_name) + hand_dof = len(eef_joints) + actions = _public_qpos_move_action( + env=env, + robot_name=robot_name, + control_part=hand_part, + target_qpos=_state_to_hand_qpos(env.open_state, hand_dof, env.robot.device), + start_qpos=_state_to_hand_qpos( + select_arm_current_gripper_state, hand_dof, env.robot.device + ).reshape(1, hand_dof), + sample_num=sample_num, + kwargs=kwargs, + log_name="open gripper", + ) + + log_info( + f"Total generated trajectory number for open gripper: {len(actions)}.", + color="green", + ) + + return actions + + +def drive( + left_arm_action=None, + right_arm_action=None, + monitor_sequences=None, + env=None, + return_result=False, + interactive_error_injection=False, + **kwargs, +): + left_arm_action = resolve_action(left_arm_action, env, kwargs) + right_arm_action = resolve_action(right_arm_action, env, kwargs) + + left_arm_action = _as_2d_action(left_arm_action, "left_arm_action") + right_arm_action = _as_2d_action(right_arm_action, "right_arm_action") + arm_actions = {"left": left_arm_action, "right": right_arm_action} + + if all(action is None for action in arm_actions.values()): + log_error("At least one arm action should be provided.") + + action_len = max( + len(action) for action in arm_actions.values() if action is not None + ) + for side, action in arm_actions.items(): + if action is not None and len(action) < action_len: + diff = action_len - len(action) + padding = np.repeat(action[-1:], diff, axis=0) + arm_actions[side] = np.concatenate([action, padding], axis=0) + + current_qpos = ( + env.robot.get_qpos().squeeze(0).detach().cpu().numpy().astype(np.float32) + ) + actions = np.repeat(current_qpos[None, :], action_len, axis=0) + + for side, action in arm_actions.items(): + if action is None: + continue + + arm_index = list(getattr(env, f"{side}_arm_joints", [])) + list( + getattr(env, f"{side}_eef_joints", []) + ) + if not arm_index: + log_error( + f"{side}_arm_action was provided, but {side}_arm is not configured " + f"on robot control parts {getattr(env.robot, 'control_parts', None)}." + ) + if action.shape[-1] != len(arm_index): + log_error( + f"{side}_arm_action width {action.shape[-1]} does not match " + f"{side}_arm joints plus eef joints ({len(arm_index)})." + ) + actions[:, arm_index] = action + + actions = torch.from_numpy(actions).to(dtype=torch.float32).unsqueeze(1) + actions = list(actions.unbind(dim=0)) + + interactive_input = setup_interactive_error_input(interactive_error_injection) + try: + for i in tqdm(range(len(actions))): + action = actions[i] + + env.step(action) + + if interactive_error_requested(interactive_input): + restore_interactive_error_input(interactive_input) + interactive_input = None + inject_interactive_error(env) + interactive_input = setup_interactive_error_input( + interactive_error_injection + ) + + if monitor_sequences is not None: + for monitor_idx, monitor_sequence in enumerate(monitor_sequences): + for function in monitor_sequence: + result = function() + if result == True: + env.update_obj_info() + function_name = getattr( + function.func, "__name__", function.__class__.__name__ + ) + log_warning( + f"Monitor function {function_name} triggered at step {i}." + ) + + if return_result: + sync_agent_state_from_robot(env) + return { + "actions": actions[: i + 1], + "monitor_index": monitor_idx, + "monitor_name": function_name, + "step_index": i, + } + + return actions + + env.update_obj_info() + finally: + restore_interactive_error_input(interactive_input) + + if monitor_sequences is not None: + log_info("No monitor sequences triggered during execution.") + if return_result: + return { + "actions": actions, + "monitor_index": None, + "monitor_name": None, + "step_index": None, + } + return actions diff --git a/embodichain/agents/hierarchy/__init__.py b/embodichain/gen_sim/action_agent_pipeline/cli/__init__.py similarity index 88% rename from embodichain/agents/hierarchy/__init__.py rename to embodichain/gen_sim/action_agent_pipeline/cli/__init__.py index 02d7359c..015c4151 100644 --- a/embodichain/agents/hierarchy/__init__.py +++ b/embodichain/gen_sim/action_agent_pipeline/cli/__init__.py @@ -14,6 +14,6 @@ # limitations under the License. # ---------------------------------------------------------------------------- -from langchain_openai import AzureChatOpenAI -from langchain_openai import ChatOpenAI -import os +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/lab/scripts/run_agent.py b/embodichain/gen_sim/action_agent_pipeline/cli/run_agent.py similarity index 78% rename from embodichain/lab/scripts/run_agent.py rename to embodichain/gen_sim/action_agent_pipeline/cli/run_agent.py index cbceb928..15a2fed2 100644 --- a/embodichain/lab/scripts/run_agent.py +++ b/embodichain/gen_sim/action_agent_pipeline/cli/run_agent.py @@ -14,20 +14,26 @@ # limitations under the License. # ---------------------------------------------------------------------------- +from __future__ import annotations + +import argparse + import gymnasium import numpy as np -import argparse import torch -from embodichain.utils.utility import load_config from embodichain.lab.gym.utils.gym_utils import ( add_env_launcher_args_to_parser, build_env_cfg_from_args, ) +from embodichain.lab.scripts.run_env import main from embodichain.utils.logger import log_error -from .run_env import main +from embodichain.utils.utility import load_config -if __name__ == "__main__": +__all__ = ["cli"] + + +def cli() -> None: np.set_printoptions(5, suppress=True) torch.set_printoptions(precision=5, sci_mode=False) @@ -51,19 +57,28 @@ help="Whether to regenerate code if already existed.", default=False, ) + parser.add_argument( + "--recovery", + action="store_true", + help="Whether to generate recovery actions.", + default=False, + ) + parser.add_argument( + "--interactive_error_injection", + action="store_true", + help="Whether to enable terminal-triggered interactive error injection during drive execution.", + default=False, + ) args = parser.parse_args() - # Validate arguments if args.num_envs != 1: log_error(f"Currently only support num_envs=1, but got {args.num_envs}.") - exit(1) + raise SystemExit(1) - # Load configurations - env_cfg, gym_config, action_config = build_env_cfg_from_args(args) + env_cfg, gym_config, _ = build_env_cfg_from_args(args) agent_config = load_config(args.agent_config) - # Create environment env = gymnasium.make( id=gym_config["id"], cfg=env_cfg, @@ -72,8 +87,11 @@ task_name=args.task_name, ) - # Run main function main(args, env, gym_config) if args.headless: env.reset(options={"final": True}) + + +if __name__ == "__main__": + cli() diff --git a/configs/gym/agent/rearrangement_agent/agent_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/agent_config.json similarity index 51% rename from configs/gym/agent/rearrangement_agent/agent_config.json rename to embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/agent_config.json index 6dbe2f5e..f9786aa4 100644 --- a/configs/gym/agent/rearrangement_agent/agent_config.json +++ b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/agent_config.json @@ -1,8 +1,12 @@ -{ "TaskAgent": { - "prompt_name": "one_stage_prompt" +{ + "TaskAgent": { + "prompt_name": "generate_task_graph" }, - "CodeAgent": { - "prompt_name": "one_stage_prompt_according_to_task_plan" + "RecoveryAgent": { + "prompt_name": "augment_task_graph" + }, + "CompileAgent": { + "prompt_name": "compile_agent_graph" }, "Agent": { "prompt_kwargs": { @@ -18,13 +22,17 @@ "type": "text", "name": "atom_actions.txt" }, - "code_prompt": { + "error_functions": { + "type": "text", + "name": "error_functions.txt" + }, + "monitor_functions": { "type": "text", - "name": "code_prompt.txt" + "name": "monitor_functions.txt" }, - "code_example": { + "recovery_rules": { "type": "text", - "name": "code_example.txt" + "name": "recovery_rules.txt" } } } diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/atom_actions.txt b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/atom_actions.txt new file mode 100644 index 00000000..2d15619b --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/atom_actions.txt @@ -0,0 +1,95 @@ +### Atom Functions for UR10 Single-Arm Apple-to-Mug Placement + +Each atomic function returns a list of joint-space trajectories (`list[np.ndarray]`). +Only use `robot_name="right_arm"`. Never use `robot_name="left_arm"` or +`robot_name="arm"`. + +All atom functions are public-only wrappers backed by +`embodichain.lab.sim.atomic_actions.AtomicActionEngine`. Public action failures +raise immediately. Use only the parameters listed below. + +When generating graph edges, always set `left_arm_action` to null and put the +actual action in `right_arm_action`. + +Use the following functions exactly as defined. Do not invent new APIs or +parameters. + +"grasp": + def grasp(robot_name: str, obj_name: str, pre_grasp_dis: float, **kwargs) -> list[np.ndarray] + + Moves the right arm to the target object's grasp pose and closes the + gripper. With public semantic grasp enabled, this action includes + approach, gripper closing, and a small post-grasp lift. For this task, use + `obj_name="apple"` and `pre_grasp_dis=0.08`. + + Example: + grasp(robot_name="right_arm", obj_name="apple", pre_grasp_dis=0.08, sample_num=80) + +"move_by_relative_offset": + def move_by_relative_offset(robot_name: str, + dx=0.0, dy=0.0, dz=0.0, mode="extrinsic", + **kwargs) -> list[np.ndarray] + + Moves the right end-effector by a relative translation. The offset is applied + along world axes when `mode="extrinsic"`. For the apple-to-mug task, use + this only for a short optional retreat after releasing the apple. Do not use + it to split the carry motion into staged horizontal moves. + + Example: + move_by_relative_offset(robot_name="right_arm", dx=0.0, dy=0.0, dz=0.10, + mode="extrinsic", sample_num=40) + +"move_relative_to_object": + def move_relative_to_object(robot_name: str, + obj_name: str, + x_offset=0.0, y_offset=0.0, z_offset=0.0, + **kwargs) -> list[np.ndarray] + + Moves the right end-effector to a pose centered on a target object plus a + world-frame xyz offset while preserving the current end-effector + orientation. For this task, use `obj_name="mug"` to move the held apple + directly to an elevated release pose above the mug center. This follows the + current mug object pose from the environment config; do not hard-code mug or + book coordinates from previous configs. + + Examples: + move_relative_to_object(robot_name="right_arm", obj_name="mug", + x_offset=0.0, y_offset=0.0, z_offset=0.14, + sample_num=80) + +"move_to_absolute_position": + def move_to_absolute_position(robot_name: str, + x=None, y=None, z=None, + **kwargs) -> list[np.ndarray] + + Moves the right end-effector to an absolute world position while preserving + its current orientation. Prefer `move_relative_to_object` for mug placement + so the graph follows the mug pose. + + Do not use this for nominal mug placement. + +"back_to_initial_pose": + def back_to_initial_pose(robot_name: str, **kwargs) -> list[np.ndarray] + + Returns the right arm to its predefined initial joint configuration. Do not + use this for the nominal apple-to-mug graph, because the task goal is simply + the apple placed in the mug. + + Example: + back_to_initial_pose(robot_name="right_arm") + +"open_gripper": + def open_gripper(robot_name: str, **kwargs) -> list[np.ndarray] + + Opens the right gripper. + + Example: + open_gripper(robot_name="right_arm") + +"close_gripper": + def close_gripper(robot_name: str, **kwargs) -> list[np.ndarray] + + Closes the right gripper. + + Example: + close_gripper(robot_name="right_arm") diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/basic_background.txt b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/basic_background.txt new file mode 100644 index 00000000..68433dab --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/basic_background.txt @@ -0,0 +1,15 @@ +The robot is a single UR10 manipulator with a parallel gripper. +Only the right_arm graph slot is available in this task. + +The scene comes from the exported 1779962412_gym_project mesh environment. The +interactive objects are: +- apple: the target object to grasp and place. +- mug: the target container for the apple. Plan to the current `mug` object pose + from the environment config. +- book: a distractor object on the table. Do not infer the mug location from the + book. + +Use the `grasp` atomic function for the apple, then move the held apple directly +to an elevated mug-centered release pose with `move_relative_to_object`, and +open the gripper. A short upward `move_by_relative_offset` retreat after release +is optional. Keep left_arm_action null for every graph edge. diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/fast_gym_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/fast_gym_config.json new file mode 100644 index 00000000..80f799e0 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/fast_gym_config.json @@ -0,0 +1,361 @@ +{ + "id": "AtomicActionsAgent-v3", + "max_episodes": 1, + "max_episode_steps": 600, + "env": { + "extensions": { + "single_arm_name": "right_arm", + "single_eef_name": "right_eef", + "agent_arm_slots": { + "right": { + "arm": "right_arm", + "eef": "right_eef" + } + }, + "arm_aim_yaw_offset": 3.141592653589793, + "gripper_open_state": [0.0], + "gripper_close_state": [0.04], + "agent_success_object": "apple", + "agent_success_container": "mug", + "agent_success_position": null, + "agent_success_tolerance": 0.05, + "container_success_radius": 0.12, + "container_success_min_z_offset": -0.03, + "container_success_max_z_offset": 0.25, + "agent_success": { + "type": "object_in_container", + "object": "apple", + "container": "mug", + "radius": 0.12, + "min_z_offset": -0.03, + "max_z_offset": 0.25 + }, + "success_xy_tolerance": 0.18, + "success_min_lift_height": 0.15, + "enable_apple_grasp_assist": false, + "grasp_assist_attach_position_tolerance": 0.1, + "grasp_assist_attach_orientation_tolerance": 0.6, + "ignore_terminations_during_agent": true, + "force_apple_top_down_grasp_pose": true, + "apple_top_down_grasp_height_offset": 0.035 + }, + "events": { + "record_camera": { + "func": "record_camera_data", + "mode": "interval", + "interval_step": 1, + "params": { + "name": "cam1", + "resolution": [320, 240], + "eye": [-1.25, 1.05, 1.55], + "target": [0.12, 1.84, 0.78] + } + }, + "validation_cameras": { + "func": "validation_cameras", + "mode": "trigger", + "params": { + "cameras": [ + { + "uid": "valid_cam_1", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [-1.25, 1.05, 1.55], + "target": [0.12, 1.84, 0.78] + } + }, + { + "uid": "valid_cam_2", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [1.1, 2.65, 1.35], + "target": [0.12, 1.84, 0.78] + } + } + ] + } + }, + "prepare_extra_attr": { + "func": "prepare_extra_attr", + "mode": "reset", + "params": { + "attrs": [ + { + "name": "object_lengths", + "mode": "callable", + "entity_uids": "all_objects", + "func_name": "compute_object_length", + "func_kwargs": { + "is_svd_frame": true, + "sample_points": 5000 + } + }, + { + "name": "grasp_pose_object", + "mode": "static", + "entity_cfg": { + "uid": "apple" + }, + "value": [[ + [0.0, 0.0, 1.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.035], + [0.0, 0.0, 0.0, 1.0] + ]] + } + ] + } + }, + "register_info_to_env": { + "func": "register_info_to_env", + "mode": "reset", + "params": { + "registry": [ + { + "entity_cfg": { + "uid": "apple" + }, + "pose_register_params": { + "compute_relative": false, + "compute_pose_object_to_arena": true, + "to_matrix": true + } + }, + { + "entity_cfg": { + "uid": "mug" + }, + "pose_register_params": { + "compute_relative": false, + "compute_pose_object_to_arena": true, + "to_matrix": true + } + } + ], + "registration": "affordance_datas", + "sim_update": true + } + } + }, + "observations": { + "norm_robot_eef_joint": { + "func": "normalize_robot_joint_data", + "mode": "modify", + "name": "robot/qpos", + "params": { + "joint_ids": [6, 7] + } + } + }, + "dataset": { + "lerobot": { + "func": "LeRobotRecorder", + "mode": "save", + "params": { + "robot_meta": { + "robot_type": "UR10", + "control_freq": 25 + }, + "instruction": { + "lang": "Pick up the apple from the table with the UR10 arm and place it into the mug." + }, + "extra": { + "scene_type": "1779962412_gym_project", + "task_description": "UR10 apple-to-mug placement", + "data_type": "sim" + }, + "use_videos": true + } + } + } + }, + "robot": { + "uid": "UR10", + "urdf_cfg": { + "components": [ + { + "component_type": "arm", + "urdf_path": "UniversalRobots/UR10/UR10.urdf" + }, + { + "component_type": "hand", + "urdf_path": "DH_PGI_140_80/DH_PGI_140_80.urdf" + } + ] + }, + "drive_pros": { + "stiffness": { + "JOINT[1-6]": 10000.0, + "GRIPPER_FINGER[1-2]_JOINT_1": 100.0 + }, + "damping": { + "JOINT[1-6]": 1000.0, + "GRIPPER_FINGER[1-2]_JOINT_1": 10.0 + }, + "max_effort": { + "JOINT[1-6]": 100000.0, + "GRIPPER_FINGER[1-2]_JOINT_1": 1000.0 + } + }, + "control_parts": { + "right_arm": ["JOINT[1-6]"], + "right_eef": ["GRIPPER_FINGER[1-2]_JOINT_1"] + }, + "solver_cfg": { + "right_arm": { + "class_type": "PytorchSolver", + "end_link_name": "ee_link", + "root_link_name": "base_link", + "tcp": [ + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.16], + [0.0, 0.0, 0.0, 1.0] + ] + } + }, + "init_qpos": [0.0, -1.5707963268, -1.5707963268, 1.5707963268, -1.5707963268, 0.0, 0.0, 0.0], + "init_pos": [0.12261117696922513, 2.5676632450195546, 0.4], + "init_rot": [0.0, 0.0, -90.0] + }, + "sensor": [ + { + "sensor_type": "Camera", + "uid": "cam_high", + "width": 960, + "height": 540, + "intrinsics": [488.1665344238281, 488.1665344238281, 480, 270], + "extrinsics": { + "eye": [-1.25, 1.05, 1.55], + "target": [0.12, 1.84, 0.78], + "up": [0.0, 0.0, 1.0] + } + } + ], + "light": { + "direct": [ + { + "uid": "main_light", + "light_type": "point", + "color": [1.0, 1.0, 1.0], + "intensity": 80.0, + "init_pos": [0.15, 1.95, 2.2], + "radius": 10.0 + } + ] + }, + "background": [ + { + "uid": "table", + "shape": { + "shape_type": "Mesh", + "fpath": "1779962412_gym_project/mesh_assets/table/table_1.glb", + "compute_uv": false + }, + "attrs": { + "mass": 10.0, + "static_friction": 0.95, + "dynamic_friction": 0.9, + "linear_damping": 0.5, + "angular_damping": 0.5, + "contact_offset": 0.002, + "rest_offset": 0.001, + "restitution": 0.01, + "max_depenetration_velocity": 3.0, + "max_linear_velocity": 2.0, + "max_angular_velocity": 2.0 + }, + "body_scale": [1.0, 1.0, 1.0], + "init_pos": [0.03896546771464756, 1.7457992850796398, 0.4], + "init_rot": [131.05959670540122, -0.5408456205882278, -0.9485794437136701], + "body_type": "kinematic", + "max_convex_hull_num": 20 + } + ], + "rigid_object": [ + { + "uid": "apple", + "shape": { + "shape_type": "Mesh", + "fpath": "1779962412_gym_project/mesh_assets/apple/apple_0/apple_0.glb", + "compute_uv": false + }, + "attrs": { + "mass": 0.1, + "static_friction": 0.8, + "dynamic_friction": 0.7, + "linear_damping": 0.5, + "angular_damping": 0.5, + "contact_offset": 0.002, + "rest_offset": 0.001, + "restitution": 0.1, + "max_depenetration_velocity": 3.0, + "max_linear_velocity": 2.0, + "max_angular_velocity": 2.0 + }, + "body_scale": [0.5, 0.5, 0.5], + "init_pos": [-0.04736585442532049, 1.8148307472408205, 0.747], + "init_rot": [125.08492737084423, 1.0374067602823314, -1.2144384437679427], + "body_type": "dynamic", + "max_convex_hull_num": 8 + }, + { + "uid": "book", + "shape": { + "shape_type": "Mesh", + "fpath": "1779962412_gym_project/mesh_assets/book/book_3/book_3.glb", + "compute_uv": false + }, + "attrs": { + "mass": 1.0, + "static_friction": 0.8, + "dynamic_friction": 0.7, + "linear_damping": 0.5, + "angular_damping": 0.5, + "contact_offset": 0.002, + "rest_offset": 0.001, + "restitution": 0.1, + "max_depenetration_velocity": 3.0, + "max_linear_velocity": 2.0, + "max_angular_velocity": 2.0 + }, + "body_scale": [1.0, 1.0, 1.0], + "init_pos": [-0.4185391008388205, 1.8600997137531323, 0.78], + "init_rot": [118.25698889958119, 2.564185886002991, -1.449663917070915], + "body_type": "dynamic", + "max_convex_hull_num": 8 + }, + { + "uid": "mug", + "shape": { + "shape_type": "Mesh", + "fpath": "1779962412_gym_project/mesh_assets/mug/mug_2/mug_2.glb", + "compute_uv": false + }, + "attrs": { + "mass": 1.0, + "static_friction": 0.8, + "dynamic_friction": 0.7, + "linear_damping": 0.5, + "angular_damping": 0.5, + "contact_offset": 0.002, + "rest_offset": 0.001, + "restitution": 0.1, + "max_depenetration_velocity": 3.0, + "max_linear_velocity": 2.0, + "max_angular_velocity": 2.0 + }, + "body_scale": [1.0, 1.0, 1.0], + "init_pos": [0.25, 1.6, 0.752], + "init_rot": [128.7027542588752, -3.0681629006467284, 2.2439660166022253], + "body_type": "dynamic", + "max_convex_hull_num": 8 + } + ] +} diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/task_prompt.txt b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/task_prompt.txt new file mode 100644 index 00000000..ebd1b41a --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/apple_pick_agent/task_prompt.txt @@ -0,0 +1,40 @@ +Task: +Use the UR10 right arm to pick up the apple from the exported +1779962412_gym_project tabletop scene and place it into the mug. + +Only the right_arm is available. Keep every left_arm_action as null/None. +Plan to the current `mug` object pose from the environment config. Do not use +memorized mug or book coordinates from previous configs. + +Generate the shortest deterministic nominal graph that completes the task. +Prefer exactly three semantic edges. Allow a fourth edge only for a short +post-release retreat if needed. Do not add intermediate waypoints, staged +horizontal transport, coarse alignment steps, separate lift steps, or +return-to-initial steps. + +The public semantic grasp action already includes approach, gripper closing, and +a small post-grasp lift. Do not add an extra lift edge after grasp. + +1. Grasp the apple with the right gripper: + - right_arm_action: grasp(robot_name="right_arm", obj_name="apple", + pre_grasp_dis=0.08, sample_num=80) + - left_arm_action: null + +2. Move the held apple directly to an elevated release pose near the mug center: + - right_arm_action: move_relative_to_object(robot_name="right_arm", + obj_name="mug", x_offset=0.0, y_offset=0.0, z_offset=0.14, + sample_num=80) + - left_arm_action: null + +3. Release the apple into the mug: + - right_arm_action: open_gripper(robot_name="right_arm", sample_num=25) + - left_arm_action: null + +Optional 4th edge only if the graph needs a clear final gripper state: + - right_arm_action: move_by_relative_offset(robot_name="right_arm", + dx=0.0, dy=0.0, dz=0.10, mode="extrinsic", sample_num=40) + - left_arm_action: null + +The apple should be released above the mug center, not low on the rim and not +offset to the side. The goal state is the apple resting inside the mug. Do not +split the transport into multiple `move_by_relative_offset` edges. diff --git a/configs/gym/agent/pour_water_agent/agent_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/agent_config.json similarity index 51% rename from configs/gym/agent/pour_water_agent/agent_config.json rename to embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/agent_config.json index b54f90c1..f9786aa4 100644 --- a/configs/gym/agent/pour_water_agent/agent_config.json +++ b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/agent_config.json @@ -1,8 +1,12 @@ -{ "TaskAgent": { - "prompt_name": "one_stage_prompt" +{ + "TaskAgent": { + "prompt_name": "generate_task_graph" }, - "CodeAgent": { - "prompt_name": "one_stage_prompt_according_to_task_plan" + "RecoveryAgent": { + "prompt_name": "augment_task_graph" + }, + "CompileAgent": { + "prompt_name": "compile_agent_graph" }, "Agent": { "prompt_kwargs": { @@ -18,14 +22,18 @@ "type": "text", "name": "atom_actions.txt" }, - "code_prompt": { + "error_functions": { + "type": "text", + "name": "error_functions.txt" + }, + "monitor_functions": { "type": "text", - "name": "code_prompt.txt" + "name": "monitor_functions.txt" }, - "code_example": { + "recovery_rules": { "type": "text", - "name": "code_example.txt" + "name": "recovery_rules.txt" } } } -} \ No newline at end of file +} diff --git a/configs/gym/agent/pour_water_agent/agent_config_dual.json b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/agent_config_dual.json similarity index 51% rename from configs/gym/agent/pour_water_agent/agent_config_dual.json rename to embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/agent_config_dual.json index b243bb65..5391d2f2 100644 --- a/configs/gym/agent/pour_water_agent/agent_config_dual.json +++ b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/agent_config_dual.json @@ -1,8 +1,12 @@ -{ "TaskAgent": { - "prompt_name": "one_stage_prompt" +{ + "TaskAgent": { + "prompt_name": "generate_task_graph" }, - "CodeAgent": { - "prompt_name": "one_stage_prompt_according_to_task_plan" + "RecoveryAgent": { + "prompt_name": "augment_task_graph" + }, + "CompileAgent": { + "prompt_name": "compile_agent_graph" }, "Agent": { "prompt_kwargs": { @@ -18,14 +22,18 @@ "type": "text", "name": "atom_actions.txt" }, - "code_prompt": { + "error_functions": { + "type": "text", + "name": "error_functions.txt" + }, + "monitor_functions": { "type": "text", - "name": "code_prompt.txt" + "name": "monitor_functions.txt" }, - "code_example": { + "recovery_rules": { "type": "text", - "name": "code_example.txt" + "name": "recovery_rules.txt" } } } -} \ No newline at end of file +} diff --git a/configs/gym/agent/pour_water_agent/fast_gym_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/fast_gym_config.json similarity index 86% rename from configs/gym/agent/pour_water_agent/fast_gym_config.json rename to embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/fast_gym_config.json index cdf0a685..16188047 100644 --- a/configs/gym/agent/pour_water_agent/fast_gym_config.json +++ b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/fast_gym_config.json @@ -1,15 +1,59 @@ { "id": "PourWaterAgent-v3", - "max_episodes": 5, + "max_episodes": 1, + "max_episode_steps": 300, "env": { "events": { - "init_table_pose": { - "func": "randomize_rigid_object_pose", - "mode": "reset", + "record_camera": { + "func": "record_camera_data", + "mode": "interval", + "interval_step": 1, "params": { - "entity_cfg": {"uid": "table"}, - "position_range": [[0.0, 0.0, -0.04], [0.0, 0.0, 0.04]], - "relative_position": true + "name": "cam1", + "resolution": [320, 240], + "eye": [2, 0, 2], + "target": [0.5, 0, 1] + } + }, + "validation_cameras": { + "func": "validation_cameras", + "mode": "trigger", + "params": { + "cameras": [ + { + "uid": "valid_cam_1", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [1.7, 0, 2.3], + "target": [0.6, 0, 0.8] + } + }, + { + "uid": "valid_cam_2", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [2.0, 0, 1.8], + "target": [0.7, 0, 0.9] + } + }, + { + "uid": "valid_cam_3", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [2.0, 0, 1.3], + "target": [0.7, 0, 0.9] + } + } + ] } }, "init_bottle_pose": { @@ -52,8 +96,8 @@ "uid": "bottle" }, "value": [[ - [0.32243, 0.03245, 0.94604, 0.025], - [0.00706, -0.99947, 0.03188, -0.0 ], + [0.32243, 0.03245, 0.94604, 0.025 ], + [0.00706, -0.99947, 0.03188, 0.0 ], [0.94657, -0.0036 , -0.32249, 0.0 ], [0.0 , 0.0 , 0.0 , 1.0 ] ]] @@ -191,57 +235,15 @@ "entity_cfg": {"uid": "CobotMagic", "control_parts": ["left_arm", "right_arm"]}, "position_range": [[-0.01, -0.01, -0.01], [0.01, 0.01, 0]] } - }, - "record_camera": { - "func": "record_camera_data", - "mode": "interval", - "interval_step": 1, - "params": { - "name": "cam1", - "resolution": [320, 240], - "eye": [2, 0, 2], - "target": [0.5, 0, 1] - } - }, - "validation_cameras": { - "func": "validation_cameras", - "mode": "reset", + } + }, + "observations": { + "norm_robot_eef_joint": { + "func": "normalize_robot_joint_data", + "mode": "modify", + "name": "robot/qpos", "params": { - "cameras": [ - { - "uid": "valid_cam_1", - "width": 1280, - "height": 960, - "enable_mask": false, - "intrinsics": [1400, 1400, 640, 480], - "extrinsics": { - "eye": [1.7, 0, 2.3], - "target": [0.6, 0, 0.8] - } - }, - { - "uid": "valid_cam_2", - "width": 1280, - "height": 960, - "enable_mask": false, - "intrinsics": [1400, 1400, 640, 480], - "extrinsics": { - "eye": [2.0, 0, 1.8], - "target": [0.7, 0, 0.9] - } - }, - { - "uid": "valid_cam_3", - "width": 1280, - "height": 960, - "enable_mask": false, - "intrinsics": [1400, 1400, 640, 480], - "extrinsics": { - "eye": [2.0, 0, 1.3], - "target": [0.7, 0, 0.9] - } - } - ] + "joint_ids": [12, 13, 14, 15] } } }, @@ -251,17 +253,20 @@ "mode": "save", "params": { "robot_meta": { + "robot_type": "CobotMagic", "control_freq": 25 }, "instruction": { - "lang": "Pour water from the bottle into the mug." - } + "lang": "Pour water from bottle to cup" + }, + "extra": { + "scene_type": "Commercial", + "task_description": "Pour water", + "data_type": "sim" + }, + "use_videos": true } } - }, - "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"], - "success_params": { - "strict": false } }, "robot": { @@ -272,46 +277,16 @@ }, "sensor": [ { - "sensor_type": "StereoCamera", + "sensor_type": "Camera", "uid": "cam_high", "width": 960, "height": 540, - "enable_mask": false, - "enable_depth": false, - "left_to_right_pos": [0.059684025824163614, 0, 0], - "intrinsics": [453.851402686215, 453.8347628855552, 469.827725021235, 258.6656181845155], - "intrinsics_right": [453.4536601653505, 453.3306024582175, 499.13697412367776, 297.7176248477935], + "intrinsics": [488.1665344238281, 488.1665344238281, 480, 270], "extrinsics": { "eye": [0.35368482807598, 0.014695524383058989, 1.4517046071614774], - "target": [0.7186357573287919, -0.054534732904795505, 0.5232553674540066], + "target": [0.8586357573287919, 0, 0.5232553674540066], "up": [0.9306678549330372, -0.0005600064212467153, 0.3658647703553347] } - }, - { - "sensor_type": "Camera", - "uid": "cam_right_wrist", - "width": 640, - "height": 480, - "enable_mask": false, - "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812], - "extrinsics": { - "parent": "right_link6", - "pos": [-0.08, 0.0, 0.04], - "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635] - } - }, - { - "sensor_type": "Camera", - "uid": "cam_left_wrist", - "width": 640, - "height": 480, - "enable_mask": false, - "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812], - "extrinsics": { - "parent": "left_link6", - "pos": [-0.08, 0.0, 0.04], - "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635] - } } ], "light": { @@ -320,9 +295,9 @@ "uid": "light_1", "light_type": "point", "color": [1.0, 1.0, 1.0], - "intensity": 100.0, + "intensity": 50.0, "init_pos": [2, 0, 2], - "radius": 20.0 + "radius": 10.0 } ] }, diff --git a/configs/gym/agent/pour_water_agent/task_prompt.txt b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/task_prompt.txt similarity index 100% rename from configs/gym/agent/pour_water_agent/task_prompt.txt rename to embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/task_prompt.txt diff --git a/configs/gym/agent/pour_water_agent/task_prompt_dual.txt b/embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/task_prompt_dual.txt similarity index 100% rename from configs/gym/agent/pour_water_agent/task_prompt_dual.txt rename to embodichain/gen_sim/action_agent_pipeline/configs/pour_water_agent/task_prompt_dual.txt diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/agent_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/agent_config.json new file mode 100644 index 00000000..f9786aa4 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/agent_config.json @@ -0,0 +1,39 @@ +{ + "TaskAgent": { + "prompt_name": "generate_task_graph" + }, + "RecoveryAgent": { + "prompt_name": "augment_task_graph" + }, + "CompileAgent": { + "prompt_name": "compile_agent_graph" + }, + "Agent": { + "prompt_kwargs": { + "task_prompt": { + "type": "text", + "name": "task_prompt.txt" + }, + "basic_background": { + "type": "text", + "name": "basic_background.txt" + }, + "atom_actions": { + "type": "text", + "name": "atom_actions.txt" + }, + "error_functions": { + "type": "text", + "name": "error_functions.txt" + }, + "monitor_functions": { + "type": "text", + "name": "monitor_functions.txt" + }, + "recovery_rules": { + "type": "text", + "name": "recovery_rules.txt" + } + } + } +} diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/atom_actions.txt b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/atom_actions.txt new file mode 100644 index 00000000..b39393cd --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/atom_actions.txt @@ -0,0 +1,48 @@ +### Atom Functions for Dual-Arm Rearrangement + +Each atomic function returns a list of joint-space trajectories (`list[np.ndarray]`). +Use `robot_name="left_arm"` for the fork and `robot_name="right_arm"` for the +spoon. The graph runtime can execute `left_arm_action` and `right_arm_action` +on the same edge simultaneously. + +All atom functions are public-only wrappers backed by +`embodichain.lab.sim.atomic_actions.AtomicActionEngine`. Public action failures +raise immediately. Use only the parameters listed below. + +Use the following functions exactly as defined. Do not invent new APIs or +parameters. + +"grasp": + def grasp(robot_name: str, obj_name: str, pre_grasp_dis: float, **kwargs) -> list[np.ndarray] + + Moves the selected arm to the target object's grasp pose and closes the + gripper. Use `obj_name="fork"` for the left arm and `obj_name="spoon"` for + the right arm. + +"orient_eef": + def orient_eef(robot_name: str, direction: str, **kwargs) -> list[np.ndarray] + + Reorients the selected end-effector while preserving its current position. + For this task, use `direction="down"` before moving to the plate targets. + +"move_relative_to_object": + def move_relative_to_object(robot_name: str, + obj_name: str, + x_offset=0.0, + y_offset=0.0, + z_offset=0.0, + **kwargs) -> list[np.ndarray] + + Moves the selected end-effector to a pose relative to `obj_name` while + preserving orientation. Use `obj_name="plate"`, `y_offset=0.16` for the + fork, and `y_offset=-0.16` for the spoon. + +"open_gripper": + def open_gripper(robot_name: str, **kwargs) -> list[np.ndarray] + + Opens the selected gripper to release the held object. + +"back_to_initial_pose": + def back_to_initial_pose(robot_name: str, **kwargs) -> list[np.ndarray] + + Returns the selected arm to its predefined initial joint configuration. diff --git a/configs/gym/agent/rearrangement_agent/fast_gym_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/fast_gym_config.json similarity index 86% rename from configs/gym/agent/rearrangement_agent/fast_gym_config.json rename to embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/fast_gym_config.json index 2fc603d0..d8355d2b 100644 --- a/configs/gym/agent/rearrangement_agent/fast_gym_config.json +++ b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/fast_gym_config.json @@ -1,15 +1,82 @@ { - "id": "RearrangementAgent-v3", - "max_episodes": 10, + "id": "AtomicActionsAgent-v3", + "max_episodes": 1, + "max_episode_steps": 300, "env": { + "extensions": { + "agent_success": { + "op": "all", + "terms": [ + { + "type": "object_axis_offset_near", + "object": "spoon", + "reference": "plate", + "axis": "y", + "offset": -0.16, + "tolerance": 0.02 + }, + { + "type": "object_axis_offset_near", + "object": "fork", + "reference": "plate", + "axis": "y", + "offset": 0.16, + "tolerance": 0.02 + } + ] + } + }, "events": { - "init_table_pose": { - "func": "randomize_rigid_object_pose", - "mode": "reset", + "record_camera": { + "func": "record_camera_data", + "mode": "interval", + "interval_step": 1, "params": { - "entity_cfg": {"uid": "table"}, - "position_range": [[0.0, 0.0, -0.04], [0.0, 0.0, 0.04]], - "relative_position": true + "name": "cam1", + "resolution": [320, 240], + "eye": [2, 0, 2], + "target": [0.5, 0, 1] + } + }, + "validation_cameras": { + "func": "validation_cameras", + "mode": "trigger", + "params": { + "cameras": [ + { + "uid": "valid_cam_1", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [1.7, 0, 2.3], + "target": [0.6, 0, 0.8] + } + }, + { + "uid": "valid_cam_2", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [2.0, 0, 1.8], + "target": [0.7, 0, 0.9] + } + }, + { + "uid": "valid_cam_3", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [2.0, 0, 1.3], + "target": [0.7, 0, 0.9] + } + } + ] } }, "init_fork_pose": { @@ -176,57 +243,15 @@ "entity_cfg": {"uid": "CobotMagic", "control_parts": ["left_arm", "right_arm"]}, "position_range": [[-0.01, -0.01, -0.01], [0.01, 0.01, 0]] } - }, - "record_camera": { - "func": "record_camera_data", - "mode": "interval", - "interval_step": 1, - "params": { - "name": "cam1", - "resolution": [320, 240], - "eye": [2, 0, 2], - "target": [0.5, 0, 1] - } - }, - "validation_cameras": { - "func": "validation_cameras", - "mode": "trigger", + } + }, + "observations": { + "norm_robot_eef_joint": { + "func": "normalize_robot_joint_data", + "mode": "modify", + "name": "robot/qpos", "params": { - "cameras": [ - { - "uid": "valid_cam_1", - "width": 1280, - "height": 960, - "enable_mask": false, - "intrinsics": [1400, 1400, 640, 480], - "extrinsics": { - "eye": [1.7, 0, 2.3], - "target": [0.6, 0, 0.8] - } - }, - { - "uid": "valid_cam_2", - "width": 1280, - "height": 960, - "enable_mask": false, - "intrinsics": [1400, 1400, 640, 480], - "extrinsics": { - "eye": [2.0, 0, 1.8], - "target": [0.7, 0, 0.9] - } - }, - { - "uid": "valid_cam_3", - "width": 1280, - "height": 960, - "enable_mask": false, - "intrinsics": [1400, 1400, 640, 480], - "extrinsics": { - "eye": [2.0, 0, 1.3], - "target": [0.7, 0, 0.9] - } - } - ] + "joint_ids": [12, 13, 14, 15] } } }, @@ -236,17 +261,20 @@ "mode": "save", "params": { "robot_meta": { + "robot_type": "CobotMagic", "control_freq": 25 }, "instruction": { "lang": "Place the spoon and fork neatly into the plate on the table." - } + }, + "extra": { + "scene_type": "Commercial", + "task_description": "Rearrangement", + "data_type": "sim" + }, + "use_videos": true } } - }, - "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"], - "success_params": { - "strict": false } }, "robot": { @@ -257,46 +285,16 @@ }, "sensor": [ { - "sensor_type": "StereoCamera", + "sensor_type": "Camera", "uid": "cam_high", "width": 960, "height": 540, - "enable_mask": false, - "enable_depth": false, - "left_to_right_pos": [0.059684025824163614, 0, 0], - "intrinsics": [453.851402686215, 453.8347628855552, 469.827725021235, 258.6656181845155], - "intrinsics_right": [453.4536601653505, 453.3306024582175, 499.13697412367776, 297.7176248477935], + "intrinsics": [488.1665344238281, 488.1665344238281, 480, 270], "extrinsics": { "eye": [0.35368482807598, 0.014695524383058989, 1.4517046071614774], - "target": [0.7186357573287919, -0.054534732904795505, 0.5232553674540066], + "target": [0.8586357573287919, 0, 0.5232553674540066], "up": [0.9306678549330372, -0.0005600064212467153, 0.3658647703553347] } - }, - { - "sensor_type": "Camera", - "uid": "cam_right_wrist", - "width": 640, - "height": 480, - "enable_mask": false, - "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812], - "extrinsics": { - "parent": "right_link6", - "pos": [-0.08, 0.0, 0.04], - "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635] - } - }, - { - "sensor_type": "Camera", - "uid": "cam_left_wrist", - "width": 640, - "height": 480, - "enable_mask": false, - "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812], - "extrinsics": { - "parent": "left_link6", - "pos": [-0.08, 0.0, 0.04], - "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635] - } } ], "light": { @@ -305,9 +303,9 @@ "uid": "light_1", "light_type": "point", "color": [1.0, 1.0, 1.0], - "intensity": 100.0, + "intensity": 50.0, "init_pos": [2, 0, 2], - "radius": 20.0 + "radius": 10.0 } ] }, diff --git a/configs/gym/agent/rearrangement_agent/task_prompt.txt b/embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/task_prompt.txt similarity index 100% rename from configs/gym/agent/rearrangement_agent/task_prompt.txt rename to embodichain/gen_sim/action_agent_pipeline/configs/rearrangement_agent/task_prompt.txt diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/agent_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/agent_config.json new file mode 100644 index 00000000..f9786aa4 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/agent_config.json @@ -0,0 +1,39 @@ +{ + "TaskAgent": { + "prompt_name": "generate_task_graph" + }, + "RecoveryAgent": { + "prompt_name": "augment_task_graph" + }, + "CompileAgent": { + "prompt_name": "compile_agent_graph" + }, + "Agent": { + "prompt_kwargs": { + "task_prompt": { + "type": "text", + "name": "task_prompt.txt" + }, + "basic_background": { + "type": "text", + "name": "basic_background.txt" + }, + "atom_actions": { + "type": "text", + "name": "atom_actions.txt" + }, + "error_functions": { + "type": "text", + "name": "error_functions.txt" + }, + "monitor_functions": { + "type": "text", + "name": "monitor_functions.txt" + }, + "recovery_rules": { + "type": "text", + "name": "recovery_rules.txt" + } + } + } +} diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/atom_actions.txt b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/atom_actions.txt new file mode 100644 index 00000000..cea04d7a --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/atom_actions.txt @@ -0,0 +1,93 @@ +### Atom Functions for UR10 Single-Arm Cup Transfer +Each atomic function returns a list of joint-space trajectories (list[np.ndarray]). +Only use robot_name='right_arm'. Never use robot_name='left_arm'. +When generating graph edges, always set left_arm_action to null and put the actual action in right_arm_action. + +All atom functions are public-only wrappers backed by +`embodichain.lab.sim.atomic_actions.AtomicActionEngine`. +If a public atomic action cannot resolve the target, solve IK, or plan a +trajectory, it raises an error immediately. Use only the parameters listed below. + +For this task, grasp defaults to public semantic grasp. It uses public +`PickUpAction` with `ObjectSemantics(AntipodalAffordance)`; the caller does not +need to pass any public backend flags. + +Use the following functions exactly as defined. Do not invent new APIs or parameters. + +"grasp": + def grasp(robot_name: str, obj_name: str, pre_grasp_dis: float, **kwargs) -> list[np.ndarray] + + Moves the right arm to the target object's public grasp target and closes the gripper. + + Example: + grasp(robot_name='right_arm', obj_name='cup', pre_grasp_dis=0.08) + +"place_on_table": + def place_on_table(robot_name: str, obj_name: str, x: float, y: float, pre_place_dis: float, **kwargs) -> list[np.ndarray] + + Moves the right arm with the held object to the desired [x, y] table location and opens the gripper to place it using public PlaceAction. + The z-coordinate is automatically adjusted based on the object height. + For this task, place the cup at x=0.75 and y=-0.18. + + Example: + place_on_table(robot_name='right_arm', obj_name='cup', x=0.75, y=-0.18, pre_place_dis=0.08) + +"move_relative_to_object": + def move_relative_to_object(robot_name: str, obj_name: str, + x_offset=0, y_offset=0, z_offset=0, + **kwargs) -> list[np.ndarray] + + Moves the end-effector to a pose defined relative to the target object: + target = object_position + [x_offset, y_offset, z_offset] + Orientation is preserved. + + Example: + move_relative_to_object(robot_name='right_arm', obj_name='cup', + x_offset=0.0, y_offset=0.0, z_offset=0.15) + +"move_to_absolute_position": + def move_to_absolute_position(robot_name: str, + x=None, y=None, z=None, + **kwargs) -> list[np.ndarray] + + Moves the end-effector to an absolute (x, y, z) position in world coordinates. + Any coordinate set to None remains unchanged. Orientation is preserved. + + Example: + move_to_absolute_position(robot_name='right_arm', x=0.75, y=-0.18, z=None) + +"move_by_relative_offset": + def move_by_relative_offset(robot_name: str, + dx=0.0, dy=0.0, dz=0.0, mode='extrinsic', + **kwargs) -> list[np.ndarray] + + Moves the end-effector by a relative translation: + new_position = current_position + [dx, dy, dz] + The offset is applied along world axes when mode='extrinsic' and along end-effector axes when mode='intrinsic'. + + Example: + move_by_relative_offset(robot_name='right_arm', dx=0.0, dy=0.0, dz=0.10, mode='extrinsic') + +"back_to_initial_pose": + def back_to_initial_pose(robot_name: str, **kwargs) -> list[np.ndarray] + + Returns the right arm to its predefined initial joint configuration. + + Example: + back_to_initial_pose(robot_name='right_arm') + +"close_gripper": + def close_gripper(robot_name: str, **kwargs) -> list[np.ndarray] + + Closes the right gripper using a short gripper-only trajectory. + + Example: + close_gripper(robot_name='right_arm') + +"open_gripper": + def open_gripper(robot_name: str, **kwargs) -> list[np.ndarray] + + Opens the right gripper using a short gripper-only trajectory. + + Example: + open_gripper(robot_name='right_arm') diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/basic_background.txt b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/basic_background.txt new file mode 100644 index 00000000..36762856 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/basic_background.txt @@ -0,0 +1,26 @@ +The environment uses a right-handed world coordinate system, where 1 unit equals 1 meter. +All robot poses are represented as 4x4 homogeneous transformation matrices. + +The robot base coordinate frame is the authoritative frame for all spatial reasoning, planning, and action generation. + +ROBOT BASE COORDINATE DEFINITIONS + +All directions below are defined strictly in the robot base frame: + +* Moving forward increases x +* Moving backward decreases x +* Moving left increases y +* Moving right decreases y +* Moving up increases z +* Moving down decreases z + +ROBOT INITIALIZATION AND TERMINATION + +The robot is a single UR10 manipulator with a parallel gripper. +The only available manipulator is named right_arm. +The only available gripper is named right_eef. +There is no left arm in this task. Do not generate any left_arm action. + +The cup starts near [0.75, 0.18] on the positive-y side of the table. +The target placement location is [0.75, -0.18] on the negative-y side of the table. +At task completion, the cup should be placed upright at the target location and the right_arm should be returned to its initial pose. diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/fast_gym_config.json b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/fast_gym_config.json new file mode 100644 index 00000000..65930a92 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/fast_gym_config.json @@ -0,0 +1,325 @@ +{ + "id": "AtomicActionsAgent-v3", + "max_episodes": 1, + "max_episode_steps": 300, + "env": { + "events": { + "record_camera": { + "func": "record_camera_data", + "mode": "interval", + "interval_step": 1, + "params": { + "name": "cam1", + "resolution": [320, 240], + "eye": [1.7, 0.0, 2.1], + "target": [0.7, 0.0, 0.9] + } + }, + "validation_cameras": { + "func": "validation_cameras", + "mode": "trigger", + "params": { + "cameras": [ + { + "uid": "valid_cam_1", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [1.7, 0.0, 2.3], + "target": [0.7, 0.0, 0.85] + } + }, + { + "uid": "valid_cam_2", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [2.0, -0.45, 1.8], + "target": [0.75, 0.0, 0.9] + } + }, + { + "uid": "valid_cam_3", + "width": 1280, + "height": 960, + "enable_mask": false, + "intrinsics": [1400, 1400, 640, 480], + "extrinsics": { + "eye": [2.0, 0.45, 1.6], + "target": [0.75, 0.0, 0.9] + } + } + ] + } + }, + "init_cup_pose": { + "func": "randomize_rigid_object_pose", + "mode": "reset", + "params": { + "entity_cfg": {"uid": "cup"}, + "position_range": [[-0.0, -0.1, 0.0], [-0.0, -0.1, 0.0]], + "rotation_range": [[0, 0, 0], [0, 0, 0]], + "relative_position": true, + "relative_rotation": true + } + }, + "prepare_extra_attr": { + "func": "prepare_extra_attr", + "mode": "reset", + "params": { + "attrs": [ + { + "name": "object_lengths", + "mode": "callable", + "entity_uids": "all_objects", + "func_name": "compute_object_length", + "func_kwargs": { + "is_svd_frame": true, + "sample_points": 5000 + } + }, + { + "name": "grasp_pose_object", + "mode": "static", + "entity_cfg": { + "uid": "cup" + }, + "value": [[ + [0.0, 0.0, 1.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.04], + [0.0, 0.0, 0.0, 1.0] + ]] + } + ] + } + }, + "register_info_to_env": { + "func": "register_info_to_env", + "mode": "reset", + "params": { + "registry": [ + { + "entity_cfg": { + "uid": "cup" + }, + "pose_register_params": { + "compute_relative": false, + "compute_pose_object_to_arena": true, + "to_matrix": true + } + } + ], + "registration": "affordance_datas", + "sim_update": true + } + }, + "random_table_material": { + "func": "randomize_visual_material", + "mode": "reset", + "interval_step": 2, + "params": { + "entity_cfg": {"uid": "table"}, + "random_texture_prob": 1.0, + "texture_path": "CocoBackground/coco" + } + }, + "random_cup_material": { + "func": "randomize_visual_material", + "mode": "reset", + "interval_step": 2, + "params": { + "entity_cfg": {"uid": "cup"}, + "random_texture_prob": 1.0, + "texture_path": "CocoBackground/coco" + } + } + }, + "observations": { + "norm_robot_eef_joint": { + "func": "normalize_robot_joint_data", + "mode": "modify", + "name": "robot/qpos", + "params": { + "joint_ids": [6, 7] + } + } + }, + "dataset": { + "lerobot": { + "func": "LeRobotRecorder", + "mode": "save", + "params": { + "robot_meta": { + "robot_type": "UR10", + "control_freq": 25 + }, + "instruction": { + "lang": "Pick up the cup and move it across the table." + }, + "extra": { + "scene_type": "Commercial", + "task_description": "UR10 cup transfer", + "data_type": "sim" + }, + "use_videos": true + } + } + }, + "extensions": { + "single_arm_name": "right_arm", + "single_eef_name": "right_eef", + "agent_arm_slots": { + "right": { + "arm": "right_arm", + "eef": "right_eef" + } + }, + "arm_aim_yaw_offset": 3.141592653589793, + "gripper_open_state": [0.0], + "gripper_close_state": [0.04], + "target_xy": [0.75, -0.18], + "success_tolerance": 0.04, + "ignore_terminations_during_agent": true, + "agent_success": { + "op": "all", + "terms": [ + { + "type": "object_xy_near", + "object": "cup", + "target_xy": [0.75, -0.18], + "tolerance": 0.04 + }, + { + "type": "object_not_fallen", + "object": "cup", + "max_tilt": 0.7853981633974483 + } + ] + } + } + }, + "robot": { + "uid": "UR10", + "urdf_cfg": { + "components": [ + { + "component_type": "arm", + "urdf_path": "UniversalRobots/UR10/UR10.urdf" + }, + { + "component_type": "hand", + "urdf_path": "DH_PGI_140_80/DH_PGI_140_80.urdf" + } + ] + }, + "init_pos": [0.0, 0.0, 0.7775], + "init_rot": [0.0, 0.0, 0.0], + "init_qpos": [3.14159, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0], + "drive_pros": { + "stiffness": { + "JOINT[1-6]": 10000.0, + "GRIPPER_FINGER[1-2]_JOINT_1": 100.0 + }, + "damping": { + "JOINT[1-6]": 1000.0, + "GRIPPER_FINGER[1-2]_JOINT_1": 10.0 + }, + "max_effort": { + "JOINT[1-6]": 100000.0, + "GRIPPER_FINGER[1-2]_JOINT_1": 1000.0 + } + }, + "solver_cfg": { + "right_arm": { + "class_type": "PytorchSolver", + "end_link_name": "ee_link", + "root_link_name": "base_link", + "tcp": [ + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.16], + [0.0, 0.0, 0.0, 1.0] + ] + } + }, + "control_parts": { + "right_arm": ["JOINT[1-6]"], + "right_eef": ["GRIPPER_FINGER[1-2]_JOINT_1"] + } + }, + "sensor": [ + { + "sensor_type": "Camera", + "uid": "cam_high", + "width": 960, + "height": 540, + "intrinsics": [488.1665344238281, 488.1665344238281, 480, 270], + "extrinsics": { + "eye": [0.35368482807598, 0.014695524383058989, 1.4517046071614774], + "target": [0.8586357573287919, 0.0, 0.5232553674540066], + "up": [0.9306678549330372, -0.0005600064212467153, 0.3658647703553347] + } + } + ], + "light": { + "direct": [ + { + "uid": "light_1", + "light_type": "point", + "color": [1.0, 1.0, 1.0], + "intensity": 50.0, + "init_pos": [2, 0, 2], + "radius": 10.0 + } + ] + }, + "background": [ + { + "uid": "table", + "shape": { + "shape_type": "Mesh", + "fpath": "CircleTableSimple/circle_table_simple.ply", + "compute_uv": true + }, + "attrs": { + "mass": 10.0, + "static_friction": 0.95, + "dynamic_friction": 0.9, + "restitution": 0.01 + }, + "body_scale": [1, 1, 1], + "body_type": "kinematic", + "init_pos": [0.725, 0.0, 0.825], + "init_rot": [0, 90, 0] + } + ], + "rigid_object": [ + { + "uid": "cup", + "shape": { + "shape_type": "Mesh", + "fpath": "PaperCup/paper_cup.ply", + "compute_uv": true + }, + "attrs": { + "mass": 0.01, + "static_friction": 2.0, + "dynamic_friction": 1.5, + "contact_offset": 0.003, + "rest_offset": 0.001, + "restitution": 0.01, + "max_depenetration_velocity": 10.0, + "min_position_iters": 32, + "min_velocity_iters": 8 + }, + "init_pos": [0.75, 0.18, 0.9], + "body_scale": [0.75, 0.75, 1.0], + "max_convex_hull_num": 8 + } + ] +} diff --git a/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/task_prompt.txt b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/task_prompt.txt new file mode 100644 index 00000000..a7aa1f5c --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/configs/ur10_cup_transfer_agent/task_prompt.txt @@ -0,0 +1,4 @@ +Task: +Use the UR10 right arm to pick up the cup from the positive-y side of the table and place it at [0.75, -0.18] on the negative-y side of the table. +Only the right_arm is available. Keep every left_arm_action as null/None. +After placing the cup at the target location, return the right_arm to its initial pose. diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/__init__.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/__init__.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/__init__.py new file mode 100644 index 00000000..015c4151 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/__init__.py @@ -0,0 +1,19 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__: list[str] = [] diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/atomic_actions.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/atomic_actions.py new file mode 100644 index 00000000..06ef404d --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/atomic_actions.py @@ -0,0 +1,159 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from typing import Any + +import torch + +from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.base_agent_env import ( + BaseAgentEnv, +) +from embodichain.lab.gym.envs.tasks.tableware.configurable_success import ( + evaluate_configured_success, +) +from embodichain.lab.gym.utils.registration import register_env + +__all__ = ["AtomicActionsAgentEnv"] + + +@register_env("AtomicActionsAgent-v3", max_episode_steps=600) +class AtomicActionsAgentEnv(BaseAgentEnv, EmbodiedEnv): + """Config-driven agent environment for atomic-action tasks.""" + + def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs): + super().__init__(cfg, **kwargs) + if bool(getattr(self, "ignore_terminations_during_agent", False)): + self.cfg.ignore_terminations = True + super()._init_agents(**kwargs) + + def reset(self, seed: int | None = None, options: dict | None = None): + obs, info = super().reset(seed=seed, options=options) + super().get_states() + return obs, info + + def update_obj_info(self) -> None: + super().update_obj_info() + self._apply_grasp_pose_overrides() + + def is_task_success(self, **kwargs) -> torch.Tensor: + return evaluate_configured_success(self) + + def compute_task_state(self, **kwargs) -> tuple[torch.Tensor, torch.Tensor, dict]: + success = self.is_task_success() + fail = torch.zeros_like(success) + return success, fail, {} + + def _apply_grasp_pose_overrides(self) -> None: + overrides = list(getattr(self, "agent_grasp_pose_overrides", []) or []) + if bool(getattr(self, "force_apple_top_down_grasp_pose", False)): + overrides.append( + { + "type": "top_down", + "object": getattr(self, "agent_success_object", "apple"), + "height_offset": getattr( + self, "apple_top_down_grasp_height_offset", 0.035 + ), + } + ) + + for override in overrides: + if str(override.get("type", "")).lower() != "top_down": + raise ValueError(f"Unsupported grasp pose override: {override!r}.") + self._apply_top_down_grasp_pose_override(override) + + def _apply_top_down_grasp_pose_override(self, override: dict[str, Any]) -> None: + object_name = str(override.get("object", override.get("object_uid"))) + if not hasattr(self, "obj_info") or object_name not in self.obj_info: + return + + obj = self.sim.get_rigid_object(object_name) + object_pose = obj.get_local_pose(to_matrix=True) + if object_pose.ndim == 3: + object_pose = object_pose.squeeze(0) + object_pose = object_pose.to( + dtype=self.init_qpos.dtype, device=self.robot.device + ) + + grasp_pose_world = self._make_top_down_grasp_pose_world( + object_pose=object_pose, + height_offset=float(override.get("height_offset", 0.035)), + side=str(override.get("side", "right")), + ) + grasp_pose_object = torch.linalg.inv(object_pose) @ grasp_pose_world + self.obj_info[object_name]["grasp_pose_obj"] = grasp_pose_object + self.affordance_datas[f"{object_name}_grasp_pose_object"] = ( + grasp_pose_object.unsqueeze(0) + ) + + def _make_top_down_grasp_pose_world( + self, + object_pose: torch.Tensor, + height_offset: float, + side: str, + ) -> torch.Tensor: + object_position = object_pose[:3, 3] + z_axis = torch.tensor( + [0.0, 0.0, -1.0], dtype=object_pose.dtype, device=object_pose.device + ) + x_axis = self._top_down_grasp_x_axis(object_position, side=side) + y_axis = torch.cross(z_axis, x_axis, dim=0) + y_axis = y_axis / torch.linalg.norm(y_axis).clamp_min(1e-6) + + grasp_pose = torch.eye(4, dtype=object_pose.dtype, device=object_pose.device) + grasp_pose[:3, 0] = x_axis + grasp_pose[:3, 1] = y_axis + grasp_pose[:3, 2] = z_axis + grasp_pose[:3, 3] = object_position + torch.tensor( + [0.0, 0.0, height_offset], + dtype=object_pose.dtype, + device=object_pose.device, + ) + return grasp_pose + + def _top_down_grasp_x_axis( + self, object_position: torch.Tensor, side: str + ) -> torch.Tensor: + base_pose = getattr(self, f"{side}_arm_base_pose", None) + if base_pose is None: + base_pose = getattr(self, "right_arm_base_pose", None) + if base_pose is None: + base_pose = getattr(self, "left_arm_base_pose", None) + + if base_pose is not None: + base_pose = torch.as_tensor( + base_pose, dtype=object_position.dtype, device=object_position.device + ) + base_position = base_pose[:3, 3] if base_pose.ndim == 2 else base_pose[:3] + x_axis = object_position - base_position + x_axis = x_axis.clone() + x_axis[2] = 0.0 + else: + x_axis = torch.tensor( + [1.0, 0.0, 0.0], + dtype=object_position.dtype, + device=object_position.device, + ) + + if torch.linalg.norm(x_axis) < 1e-6: + x_axis = torch.tensor( + [1.0, 0.0, 0.0], + dtype=object_position.dtype, + device=object_position.device, + ) + return x_axis / torch.linalg.norm(x_axis).clamp_min(1e-6) diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/base_agent_env.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/base_agent_env.py new file mode 100644 index 00000000..f94992e9 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/base_agent_env.py @@ -0,0 +1,356 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import torch +from embodichain.utils import logger + + +class BaseAgentEnv: + + def _init_agents(self, agent_config, task_name, agent_config_path=None): + from embodichain.gen_sim.action_agent_pipeline.agents.task_agent import ( + TaskAgent, + ) + from embodichain.gen_sim.action_agent_pipeline.agents.compile_agent import ( + CompileAgent, + ) + from embodichain.gen_sim.action_agent_pipeline.agents.recovery_agent import ( + RecoveryAgent, + ) + from embodichain.gen_sim.action_agent_pipeline.agents.llm import ( + task_llm, + compile_llm, + recovery_llm, + ) + + self.task_agent = TaskAgent( + task_llm, + **agent_config["Agent"], + **agent_config["TaskAgent"], + task_name=task_name, + config_dir=agent_config_path, + ) + self.recovery_agent = RecoveryAgent( + recovery_llm, + **agent_config["Agent"], + **agent_config["RecoveryAgent"], + task_name=task_name, + config_dir=agent_config_path, + ) + self.compile_agent = CompileAgent( + compile_llm, + **agent_config["Agent"], + **agent_config["CompileAgent"], + task_name=task_name, + config_dir=agent_config_path, + ) + + def get_states(self): + # TODO: only support num_env = 1 for now + # store robot states in each env.reset + self.init_qpos = self.robot.get_qpos().squeeze(0) + + self._agent_arm_slots = self._resolve_agent_arm_slots() + for side in ("left", "right"): + self._initialize_agent_arm_slot(side, self._agent_arm_slots.get(side)) + + self.open_state = torch.as_tensor( + getattr( + self, + "agent_open_state", + getattr(self, "gripper_open_state", [0.05]), + ), + dtype=self.init_qpos.dtype, + device=self.init_qpos.device, + ).flatten() + self.close_state = torch.as_tensor( + getattr( + self, + "agent_close_state", + getattr(self, "gripper_close_state", [0.0]), + ), + dtype=self.init_qpos.dtype, + device=self.init_qpos.device, + ).flatten() + self.left_arm_current_gripper_state = self._initial_gripper_state("left") + self.right_arm_current_gripper_state = self._initial_gripper_state("right") + + self.update_obj_info() + + def _resolve_agent_arm_slots(self) -> dict[str, dict[str, str | None] | None]: + configured_slots = getattr(self, "agent_arm_slots", None) + if configured_slots is not None: + return self._normalize_agent_arm_slots(configured_slots) + + if hasattr(self, "single_arm_name") or hasattr(self, "single_eef_name"): + slot = getattr(self, "agent_single_arm_slot", "right") + return self._normalize_agent_arm_slots( + { + slot: { + "arm": getattr(self, "single_arm_name", "right_arm"), + "eef": getattr(self, "single_eef_name", "right_eef"), + } + } + ) + + control_parts = getattr(self.robot, "control_parts", {}) or {} + if "arm" in control_parts and "hand" in control_parts: + slot = getattr(self, "agent_single_arm_slot", "left") + return self._normalize_agent_arm_slots( + {slot: {"arm": "arm", "eef": "hand"}} + ) + + return self._normalize_agent_arm_slots( + { + "left": {"arm": "left_arm", "eef": "left_eef"}, + "right": {"arm": "right_arm", "eef": "right_eef"}, + } + ) + + def _normalize_agent_arm_slots( + self, slots + ) -> dict[str, dict[str, str | None] | None]: + normalized = {"left": None, "right": None} + for side in normalized: + slot_cfg = slots.get(side) if isinstance(slots, dict) else None + if slot_cfg is None: + continue + if isinstance(slot_cfg, str): + normalized[side] = {"arm": slot_cfg, "eef": None} + continue + normalized[side] = { + "arm": slot_cfg.get("arm", slot_cfg.get("arm_control_part")), + "eef": slot_cfg.get( + "eef", + slot_cfg.get("hand", slot_cfg.get("eef_control_part")), + ), + } + return normalized + + def _initialize_agent_arm_slot( + self, side: str, slot_cfg: dict[str, str | None] | None + ) -> None: + arm_name = slot_cfg.get("arm") if slot_cfg else None + eef_name = slot_cfg.get("eef") if slot_cfg else None + arm_joints = self._get_control_part_joint_ids(arm_name) + eef_joints = self._get_control_part_joint_ids(eef_name) + + setattr(self, f"{side}_arm_joints", arm_joints) + setattr(self, f"{side}_eef_joints", eef_joints) + + if arm_name is None or not arm_joints: + setattr(self, f"{side}_arm_init_qpos", self.init_qpos.new_empty(0)) + setattr(self, f"{side}_arm_init_xpos", None) + setattr(self, f"{side}_arm_base_pose", None) + setattr(self, f"{side}_arm_current_qpos", self.init_qpos.new_empty(0)) + setattr(self, f"{side}_arm_current_xpos", None) + return + + init_qpos = self.init_qpos[arm_joints] + init_xpos = self.robot.compute_fk( + init_qpos, name=arm_name, to_matrix=True + ).squeeze(0) + base_pose = self.robot.get_control_part_base_pose( + arm_name, to_matrix=True + ).squeeze(0) + + setattr(self, f"{side}_arm_init_qpos", init_qpos) + setattr(self, f"{side}_arm_init_xpos", init_xpos) + setattr(self, f"{side}_arm_base_pose", base_pose) + setattr(self, f"{side}_arm_current_qpos", init_qpos) + setattr(self, f"{side}_arm_current_xpos", init_xpos) + + def _get_control_part_joint_ids(self, control_part: str | None) -> list[int]: + if control_part is None: + return [] + if control_part not in (getattr(self.robot, "control_parts", {}) or {}): + return [] + return list(self.robot.get_joint_ids(name=control_part)) + + def _initial_gripper_state(self, side: str) -> torch.Tensor: + if len(getattr(self, f"{side}_eef_joints", []) or []) == 0: + return self.open_state.new_empty(0) + return self.open_state + + def update_obj_info(self): + # store some useful obj information + obj_info = getattr(self, "obj_info", {}) + obj_uids = self.sim.get_rigid_object_uid_list() + for obj_name in obj_uids: + obj = self.sim.get_rigid_object(obj_name) + obj_pose = obj.get_local_pose(to_matrix=True).squeeze(0) + + if obj_name not in obj_info: + obj_height = obj_pose[2, 3] # Extract the height (z-coordinate) + obj_grasp_pose = self.affordance_datas.get( + f"{obj_name}_grasp_pose_object", None + ) + obj_info[obj_name] = { + "pose": obj_pose, # Store the full pose (4x4 matrix) + "height": obj_height, # Store the initial height (z-coordinate) + "grasp_pose_obj": ( + obj_grasp_pose.squeeze(0) + if obj_grasp_pose is not None + else None + ), # Store the grasp pose if available + } + else: + obj_info[obj_name]["pose"] = obj_pose + + self.obj_info = obj_info + + # -------------------- Common getters / setters -------------------- + + def get_obs_for_agent(self): + obs = self.get_obs() + rgb = obs["sensor"]["cam_high"]["color"].squeeze(0) + + # Get validation camera data + camera_data = self.event_manager.get_functor("validation_cameras")(self, None) + result = {"rgb": rgb} + result.update({k: v.squeeze(0) for k, v in camera_data.items()}) + return result + + def get_current_qpos_agent(self): + return self.left_arm_current_qpos, self.right_arm_current_qpos + + def set_current_qpos_agent(self, arm_qpos, is_left): + if is_left: + self.left_arm_current_qpos = arm_qpos + else: + self.right_arm_current_qpos = arm_qpos + + def get_current_xpos_agent(self): + return self.left_arm_current_xpos, self.right_arm_current_xpos + + def set_current_xpos_agent(self, arm_xpos, is_left): + if is_left: + self.left_arm_current_xpos = arm_xpos + else: + self.right_arm_current_xpos = arm_xpos + + def get_current_gripper_state_agent(self): + return self.left_arm_current_gripper_state, self.right_arm_current_gripper_state + + def set_current_gripper_state_agent(self, arm_gripper_state, is_left): + if is_left: + self.left_arm_current_gripper_state = arm_gripper_state + else: + self.right_arm_current_gripper_state = arm_gripper_state + + # -------------------- IK / FK -------------------- + def get_arm_ik(self, target_xpos, is_left, qpos_seed=None): + control_part = self.get_agent_arm_control_part(is_left) + ret, qpos = self.robot.compute_ik( + name=control_part, pose=target_xpos, joint_seed=qpos_seed + ) + return ret.all().item(), qpos.squeeze(0) + + def get_arm_fk(self, qpos, is_left): + control_part = self.get_agent_arm_control_part(is_left) + xpos = self.robot.compute_fk( + name=control_part, qpos=torch.as_tensor(qpos), to_matrix=True + ) + return xpos.squeeze(0) + + def get_agent_arm_control_part(self, is_left: bool) -> str: + return self._get_agent_control_part(is_left=is_left, key="arm") + + def get_agent_eef_control_part(self, is_left: bool) -> str | None: + return self._get_agent_control_part(is_left=is_left, key="eef", required=False) + + def _get_agent_control_part( + self, is_left: bool, key: str, required: bool = True + ) -> str | None: + if not hasattr(self, "_agent_arm_slots"): + self._agent_arm_slots = self._resolve_agent_arm_slots() + side = "left" if is_left else "right" + slot_cfg = getattr(self, "_agent_arm_slots", {}).get(side) + control_part = slot_cfg.get(key) if slot_cfg else None + if control_part is None and required: + logger.log_error( + f"{side}_{key} is not configured for agent control.", + error_type=ValueError, + ) + return control_part + + # -------------------- get compiled graph for action list -------------------- + def generate_graph_for_actions(self, regenerate=False, recovery=False, **kwargs): + logger.log_info( + f"Generate graph for creating {'recovery' if recovery else ''} action list for {self.compile_agent.task_name}.", + color="yellow" if recovery else "green", + ) + + print(f"\033[92m\nStart task graph generation.\n\033[0m") + task_agent_input = self.task_agent.get_composed_observations( + env=self, + regenerate=regenerate, + observations=self.get_obs_for_agent(), + **kwargs, + ) + task_graph = self.task_agent.generate(**task_agent_input) + + recovery_spec = None + if recovery: + print(f"\033[91m\nStart recovery spec generation.\n\033[0m") + recovery_agent_input = self.recovery_agent.get_composed_observations( + env=self, + regenerate=regenerate, + task_graph=task_graph, + **kwargs, + ) + recovery_spec = self.recovery_agent.generate(**recovery_agent_input) + + print(f"\033[94m\nStart graph compilation.\n\033[0m") + compile_agent_input = self.compile_agent.get_composed_observations( + env=self, + regenerate=regenerate, + task_graph=task_graph, + recovery_spec=recovery_spec, + recovery_enabled=recovery, + **kwargs, + ) + graph_file_path, kwargs, graph_content = self.compile_agent.generate( + **compile_agent_input + ) + + return graph_file_path, kwargs, graph_content + + # -------------------- get action list -------------------- + def create_demo_action_list( + self, regenerate=False, recovery=False, *args, **kwargs + ): + graph_file_path, compile_kwargs, _ = self.generate_graph_for_actions( + regenerate=regenerate, recovery=recovery + ) + public_atomic_kwargs = { + "use_public_grasp_semantics": True, + "use_public_grasp_action": False, + "use_public_place_action": True, + "allow_public_grasp_annotation": True, + "force_public_grasp_reannotate": False, + } + for key in public_atomic_kwargs: + if key in kwargs: + public_atomic_kwargs[key] = kwargs[key] + compile_kwargs.update(public_atomic_kwargs) + compile_kwargs["interactive_error_injection"] = kwargs.get( + "interactive_error_injection", False + ) + action_list = self.compile_agent.act(graph_file_path, **compile_kwargs) + return action_list diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/pour_water_agent_env.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/pour_water_agent_env.py new file mode 100644 index 00000000..d10d13e9 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/pour_water_agent_env.py @@ -0,0 +1,40 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from typing import Dict, Optional + +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.base_agent_env import ( + BaseAgentEnv, +) +from embodichain.lab.gym.envs import EmbodiedEnvCfg +from embodichain.lab.gym.envs.tasks.tableware.pour_water.pour_water import PourWaterEnv +from embodichain.lab.gym.utils.registration import register_env + +__all__ = ["PourWaterAgentEnv"] + + +@register_env("PourWaterAgent-v3", max_episode_steps=600) +class PourWaterAgentEnv(BaseAgentEnv, PourWaterEnv): + def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs): + super().__init__(cfg, **kwargs) + super()._init_agents(**kwargs) + + def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None): + obs, info = super().reset(seed=seed, options=options) + super().get_states() + return obs, info diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/rearrangement_agent_env.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/rearrangement_agent_env.py new file mode 100644 index 00000000..e2b201cd --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/rearrangement_agent_env.py @@ -0,0 +1,62 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from typing import Dict, Optional + +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.base_agent_env import ( + BaseAgentEnv, +) +from embodichain.lab.gym.envs import EmbodiedEnvCfg +from embodichain.lab.gym.envs.tasks.tableware.rearrangement import RearrangementEnv +from embodichain.lab.gym.utils.registration import register_env + +__all__ = ["RearrangementAgentEnv"] + + +@register_env("RearrangementAgent-v3", max_episode_steps=600) +class RearrangementAgentEnv(BaseAgentEnv, RearrangementEnv): + def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs): + super().__init__(cfg, **kwargs) + super()._init_agents(**kwargs) + + def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None): + obs, info = super().reset(seed=seed, options=options) + super().get_states() + return obs, info + + def is_task_success(self): + fork = self.sim.get_rigid_object("fork") + spoon = self.sim.get_rigid_object("spoon") + plate = self.sim.get_rigid_object("plate") + + plate_pose = plate.get_local_pose(to_matrix=True) + spoon_place_target_y = plate_pose[0, 1, 3] - 0.16 + fork_place_target_y = plate_pose[0, 1, 3] + 0.16 + + spoon_pose = spoon.get_local_pose(to_matrix=True) + spoon_y = spoon_pose[0, 1, 3] + + fork_pose = fork.get_local_pose(to_matrix=True) + fork_y = fork_pose[0, 1, 3] + + tolerance = self.metadata.get("success_params", {}).get("tolerance", 0.02) + + return ( + abs(spoon_y - spoon_place_target_y) <= tolerance + and abs(fork_y - fork_place_target_y) <= tolerance + ) diff --git a/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/single_arm_agent_env.py b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/single_arm_agent_env.py new file mode 100644 index 00000000..e47d63bf --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/env_adapters/tableware/single_arm_agent_env.py @@ -0,0 +1,119 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import torch + +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.base_agent_env import ( + BaseAgentEnv, +) + +__all__ = ["SingleArmAgentEnv"] + + +class SingleArmAgentEnv(BaseAgentEnv): + """Agent mixin for single-arm robots exposed through the right-arm graph slot.""" + + def get_states(self): + # TODO: only support num_env = 1 for now + self.init_qpos = self.robot.get_qpos().squeeze(0) + + self.single_arm_name = getattr(self, "single_arm_name", "right_arm") + self.single_eef_name = getattr(self, "single_eef_name", "right_eef") + + self.left_arm_joints = [] + self.left_eef_joints = [] + self.right_arm_joints = list( + self.robot.get_joint_ids(name=self.single_arm_name) + ) + self.right_eef_joints = list( + self.robot.get_joint_ids(name=self.single_eef_name) + ) + + self.left_arm_init_qpos = self.init_qpos.new_empty(0) + self.left_arm_init_xpos = None + self.left_arm_base_pose = None + + self.right_arm_init_qpos = self.init_qpos[self.right_arm_joints] + self.right_arm_init_xpos = self.robot.compute_fk( + self.right_arm_init_qpos, name=self.single_arm_name, to_matrix=True + ).squeeze(0) + self.right_arm_base_pose = self.robot.get_control_part_base_pose( + self.single_arm_name, to_matrix=True + ).squeeze(0) + + self.left_arm_current_qpos = self.left_arm_init_qpos + self.right_arm_current_qpos = self.right_arm_init_qpos + self.left_arm_current_xpos = self.left_arm_init_xpos + self.right_arm_current_xpos = self.right_arm_init_xpos + + self.open_state = torch.as_tensor( + getattr(self, "gripper_open_state", [0.05]), + dtype=self.init_qpos.dtype, + device=self.init_qpos.device, + ).flatten() + self.close_state = torch.as_tensor( + getattr(self, "gripper_close_state", [0.0]), + dtype=self.init_qpos.dtype, + device=self.init_qpos.device, + ).flatten() + self.left_arm_current_gripper_state = self.open_state.new_empty(0) + self.right_arm_current_gripper_state = self.open_state + + self.update_obj_info() + + def get_arm_ik(self, target_xpos, is_left, qpos_seed=None): + if is_left: + raise ValueError( + "SingleArmAgentEnv only supports the right-arm graph slot." + ) + ret, qpos = self.robot.compute_ik( + name=self.single_arm_name, pose=target_xpos, joint_seed=qpos_seed + ) + return ret.all().item(), qpos.squeeze(0) + + def get_arm_fk(self, qpos, is_left): + if is_left: + raise ValueError( + "SingleArmAgentEnv only supports the right-arm graph slot." + ) + xpos = self.robot.compute_fk( + name=self.single_arm_name, qpos=torch.as_tensor(qpos), to_matrix=True + ) + return xpos.squeeze(0) + + def create_demo_action_list( + self, regenerate=False, recovery=False, *args, **kwargs + ): + graph_file_path, compile_kwargs, _ = self.generate_graph_for_actions( + regenerate=regenerate, recovery=recovery + ) + public_atomic_kwargs = { + "use_public_grasp_semantics": True, + "use_public_grasp_action": False, + "use_public_place_action": True, + "allow_public_grasp_annotation": True, + "force_public_grasp_reannotate": False, + } + for key in public_atomic_kwargs: + if key in kwargs: + public_atomic_kwargs[key] = kwargs[key] + compile_kwargs.update(public_atomic_kwargs) + compile_kwargs["interactive_error_injection"] = kwargs.get( + "interactive_error_injection", False + ) + return self.compile_agent.act(graph_file_path, **compile_kwargs) diff --git a/embodichain/gen_sim/action_agent_pipeline/error_functions.py b/embodichain/gen_sim/action_agent_pipeline/error_functions.py new file mode 100644 index 00000000..859cab59 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/error_functions.py @@ -0,0 +1,319 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import random +import re + +import numpy as np +import torch + +from embodichain.lab.gym.utils.misc import apply_rotation +from embodichain.lab.sim.atomic_actions import AtomicActionEngine, MoveActionCfg +from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg +from embodichain.gen_sim.action_agent_pipeline.atom_action_utils import ( + apply_offset_to_pose, + resolve_arm_side, +) +from embodichain.utils.logger import log_warning + +__all__ = [ + "judge_active_arms", + "object_error_types", + "action_error_types", + "update_scene", + "misplaced_object", + "fallen_object", + "setup_interactive_error_input", + "restore_interactive_error_input", + "interactive_error_requested", + "inject_interactive_error", + "wrong_affordance", +] + + +def judge_active_arms(action_str: str) -> list[str]: + right_active = re.search(r"\bright_arm_action\s*=", action_str) is not None + left_active = re.search(r"\bleft_arm_action\s*=", action_str) is not None + right_none = re.search(r"\bright_arm_action\s*=\s*None\b", action_str) is not None + left_none = re.search(r"\bleft_arm_action\s*=\s*None\b", action_str) is not None + + active_arms = [] + if left_active and not left_none: + active_arms.append("left_arm") + if right_active and not right_none: + active_arms.append("right_arm") + + if len(active_arms) == 0: + raise ValueError( + "No active arm detected: both right_arm_action and left_arm_action are None." + ) + + return active_arms + + +object_error_types = [ + "misplaced_object", + "fallen_object", +] +action_error_types = [ + "wrong_affordance", +] + + +def update_scene(env) -> None: + env.sim.update(step=100) + + +def misplaced_object(env, error_obj, error_pose, relative_error_xyz) -> None: + if error_pose is not None: + obj_pose = torch.as_tensor(error_pose).clone() + if obj_pose.ndim == 3: + obj_pose = obj_pose.squeeze(0) + else: + obj_pose = ( + env.sim.get_rigid_object(error_obj) + .get_local_pose(to_matrix=True) + .squeeze(0) + .clone() + ) + obj_pose[0, 3] += relative_error_xyz[0] + obj_pose[1, 3] += relative_error_xyz[1] + obj_pose[2, 3] += relative_error_xyz[2] + + env.sim.get_rigid_object(error_obj).set_local_pose(obj_pose.unsqueeze(0)) + update_scene(env) + + +def fallen_object(env, error_obj, error_pose, relative_error_xyz) -> None: + if error_pose is not None: + obj_pose = torch.as_tensor(error_pose).clone() + if obj_pose.ndim == 3: + obj_pose = obj_pose.squeeze(0) + else: + obj_pose = ( + env.sim.get_rigid_object(error_obj) + .get_local_pose(to_matrix=True) + .squeeze(0) + .clone() + ) + obj_pose[0, 3] += relative_error_xyz[0] + obj_pose[1, 3] += relative_error_xyz[1] + obj_pose[2, 3] += relative_error_xyz[2] + obj_pose = apply_rotation(obj_pose, "x", 90) + obj_pose = apply_rotation(obj_pose, "y", 90) + + env.sim.get_rigid_object(error_obj).set_local_pose(obj_pose.unsqueeze(0)) + update_scene(env) + + +def setup_interactive_error_input(enabled: bool = False): + if not enabled: + return None + + import sys + import termios + import tty + + stdin = sys.stdin + if not hasattr(stdin, "fileno") or not stdin.isatty(): + return None + + fd = stdin.fileno() + try: + old_settings = termios.tcgetattr(fd) + except termios.error: + return None + + tty.setcbreak(fd) + return stdin, old_settings + + +def restore_interactive_error_input(interactive_input) -> None: + if interactive_input is None: + return + + import termios + + stdin, old_settings = interactive_input + termios.tcsetattr(stdin.fileno(), termios.TCSADRAIN, old_settings) + + +def interactive_error_requested(interactive_input) -> bool: + if interactive_input is None: + return False + + import select + + stdin, _ = interactive_input + readable, _, _ = select.select([stdin], [], [], 0) + if not readable: + return False + + key = stdin.read(1) + return key.lower() == "f" + + +def _parse_relative_error_xyz(value: str) -> list[float]: + value = value.strip().translate( + str.maketrans({"[": " ", "]": " ", "(": " ", ")": " "}) + ) + parts = [part for part in re.split(r"[,\s]+", value) if part] + if len(parts) != 3: + raise ValueError( + "relative_error_xyz must contain exactly three numbers, " + "for example: 0.02, 0.02, 0 or [0.02, 0.02, 0]" + ) + try: + return [float(part) for part in parts] + except ValueError as exc: + raise ValueError( + "relative_error_xyz must contain exactly three numbers, " + "for example: 0.02, 0.02, 0 or [0.02, 0.02, 0]" + ) from exc + + +def _get_interactive_error_objects(env) -> list[str]: + if hasattr(env, "sim") and hasattr(env.sim, "get_rigid_object_uid_list"): + return [obj for obj in env.sim.get_rigid_object_uid_list() if obj != "table"] + return [obj for obj in getattr(env, "obj_info", {}).keys() if obj != "table"] + + +def _select_interactive_error_object(env) -> str: + object_names = _get_interactive_error_objects(env) + if len(object_names) == 0: + raise ValueError("No object is available for interactive error injection.") + + print("Objects:") + for index, obj_name in enumerate(object_names, start=1): + print(f"{index}: {obj_name}") + + selection = "" + while selection == "": + selection = input("Select error object: ").strip() + if not selection.isdigit(): + raise ValueError(f"Unsupported object selection: {selection}.") + + index = int(selection) + if index < 1 or index > len(object_names): + raise ValueError(f"Unsupported object selection: {selection}.") + + return object_names[index - 1] + + +def inject_interactive_error(env) -> None: + print("\nInteractive error injection requested.") + print("1: misplaced_object") + print("2: fallen_object") + print("E: exit without injecting failure") + error_choice = "" + while error_choice == "": + error_choice = input("Select error type: ").strip() + if error_choice.lower() == "e": + log_warning("Interactive error injection canceled.") + return + if error_choice not in {"1", "2"}: + raise ValueError(f"Unsupported interactive error selection: {error_choice}.") + + error_obj = _select_interactive_error_object(env) + print("relative_error_xyz example: 0.02, 0.02, 0 or [0.02, 0.02, 0]") + relative_error_xyz = _parse_relative_error_xyz( + input("relative_error_xyz (x, y, z): ") + ) + + if error_choice == "1": + misplaced_object( + env, + error_obj=error_obj, + error_pose=None, + relative_error_xyz=relative_error_xyz, + ) + elif error_choice == "2": + fallen_object( + env, + error_obj=error_obj, + error_pose=None, + relative_error_xyz=relative_error_xyz, + ) + + log_warning( + f"Injected interactive error {error_choice} on {error_obj} with relative_error_xyz={relative_error_xyz}." + ) + + +def wrong_affordance(env, action, error_arm, error_pose, relative_error_xyz): + if action is None: + raise ValueError("wrong_affordance requires a compiled arm action ndarray.") + + action = np.array(action, copy=True) + if action.ndim != 2 or action.shape[1] < 3: + raise ValueError( + f"wrong_affordance expects action with shape [T, arm_dof+2], got {action.shape}." + ) + + side = resolve_arm_side(env, error_arm) + is_left = side == "left" + if hasattr(env, "get_agent_arm_control_part"): + select_arm = env.get_agent_arm_control_part(is_left) + else: + select_arm = "left_arm" if is_left else "right_arm" + start_qpos = torch.as_tensor(action[0, :-2], dtype=torch.float32) + last_qpos = torch.as_tensor(action[-1, :-2], dtype=torch.float32) + gripper_state_traj = action[:, -1:].copy() + last_pose = env.get_arm_fk(qpos=last_qpos, is_left=is_left).clone() + + if error_pose is not None: + target_pose = torch.as_tensor(error_pose).clone() + if target_pose.ndim == 3: + target_pose = target_pose.squeeze(0) + else: + if relative_error_xyz is None: + raise ValueError( + "wrong_affordance requires either error_pose or relative_error_xyz." + ) + target_pose = apply_offset_to_pose(last_pose, relative_error_xyz) + + motion_generator = MotionGenerator( + cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=env.robot.uid)) + ) + engine = AtomicActionEngine( + motion_generator=motion_generator, + actions_cfg_list=[ + MoveActionCfg( + control_part=select_arm, + sample_interval=len(action), + ) + ], + ) + move_action = engine._actions["move"] + is_success, trajectory, _ = move_action.execute( + target=torch.as_tensor( + target_pose, dtype=torch.float32, device=env.robot.device + ), + start_qpos=start_qpos.to(env.robot.device).reshape(1, -1), + ) + if not is_success: + raise RuntimeError("Public MoveAction failed for wrong_affordance.") + + arm_traj = trajectory[0].detach().cpu().numpy() + if len(arm_traj) != len(gripper_state_traj): + raise ValueError( + "Public MoveAction trajectory length does not match the source action " + f"length: {len(arm_traj)} != {len(gripper_state_traj)}." + ) + + return np.concatenate([arm_traj, gripper_state_traj, gripper_state_traj], axis=-1) diff --git a/embodichain/gen_sim/action_agent_pipeline/graph_spec.py b/embodichain/gen_sim/action_agent_pipeline/graph_spec.py new file mode 100644 index 00000000..3d26890e --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/graph_spec.py @@ -0,0 +1,1380 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import importlib +import re +from collections.abc import Mapping +from copy import deepcopy +from functools import partial +from pathlib import Path +from typing import Any + +from embodichain.utils.llm_json import extract_json_object + +__all__ = [ + "compile_agent_graph_from_file", + "compile_agent_graph_spec", + "expand_recovery_spec", + "load_agent_graph_bundle", + "normalize_recovery_spec", +] + +MONITOR_TYPE_ALIASES = { + "object_moved": "object_moved", + "object_shifted": "object_moved", + "object_displaced": "object_moved", + "object_pose_changed": "object_moved", + "moved_object": "object_moved", + "hold_lost": "hold_lost", + "object_held": "hold_lost", + "grasp_lost": "hold_lost", + "object_dropped": "hold_lost", + "not_held": "hold_lost", + "slip": "hold_lost", + "slipped": "hold_lost", +} + +RECOVERY_STEP_TYPE_ALIASES = { + "retry": "retry_failed_edge", + "retry_edge": "retry_failed_edge", + "retry_failed_edge": "retry_failed_edge", + "replay": "replay_edge", + "repeat_edge": "replay_edge", + "replay_edge": "replay_edge", + "regrasp": "regrasp", + "re_grasp": "regrasp", + "grasp_again": "regrasp", + "regrasp_both": "regrasp_both", + "dual_regrasp": "regrasp_both", + "regrasp_objects": "regrasp_both", + "regrasp_all": "regrasp_both", + "action": "action", + "atomic_action": "action", +} + + +def load_agent_graph_bundle(path: str | Path) -> dict[str, Any]: + """Load a compiled graph JSON bundle from disk.""" + return extract_json_object(Path(path).read_text(encoding="utf-8")) + + +def compile_agent_graph_from_file( + path: str | Path, + *, + env: Any = None, + graph_cls: type | None = None, + action_module: Any = None, + monitor_module: Any = None, +) -> Any: + """Compile a graph JSON bundle from disk into an executable graph.""" + bundle = load_agent_graph_bundle(path) + task_graph = bundle.get("task_graph", bundle) + recovery_graph = bundle.get("recovery_graph") + return compile_agent_graph_spec( + task_graph, + recovery_graph, + env=env, + graph_cls=graph_cls, + action_module=action_module, + monitor_module=monitor_module, + ) + + +def compile_agent_graph_spec( + task_graph: str | Mapping[str, Any], + recovery_graph: str | Mapping[str, Any] | None = None, + *, + env: Any = None, + graph_cls: type | None = None, + action_module: Any = None, + monitor_module: Any = None, +) -> Any: + """Compile nominal and explicit recovery JSON graphs into ``AgentTaskGraph``. + + Args: + task_graph: Nominal graph spec with ``nodes`` and ``edges``. + recovery_graph: Optional compiled recovery graph with ``recovery_nodes``, + ``recovery_edges``, and ``recovery_branches``. + env: Runtime environment bound into monitor partials. + graph_cls: Optional graph class override for tests. + action_module: Optional atomic action namespace override. + monitor_module: Optional monitor namespace override. + + Returns: + Executable ``AgentTaskGraph`` instance. + """ + task_spec = extract_json_object(task_graph) + recovery_spec = extract_json_object( + recovery_graph or _empty_recovery_graph(task_spec) + ) + _reject_legacy_recovery_schema(recovery_spec) + nominal_node_ids, nominal_edge_ids = _validate_task_spec(task_spec) + graph_cls, action_module, monitor_module = _resolve_runtime( + graph_cls=graph_cls, + action_module=action_module, + monitor_module=monitor_module, + ) + + graph = graph_cls( + start=task_spec["start"], + goal=task_spec["goal"], + max_transitions=int(task_spec.get("max_transitions", 1000)), + ) + + for node in task_spec.get("nodes", []): + graph.add_node(node["id"], node.get("semantic", "")) + + for edge in task_spec.get("edges", []): + graph.add_edge( + edge["id"], + edge["source"], + edge["target"], + left_arm_action=_compile_action(edge.get("left_arm_action"), action_module), + right_arm_action=_compile_action( + edge.get("right_arm_action"), action_module + ), + ) + + recovery_node_ids = _add_recovery_nodes(graph, recovery_spec, nominal_node_ids) + recovery_edge_ids = _add_recovery_edges( + graph, + recovery_spec, + action_module=action_module, + defined_node_ids=nominal_node_ids | recovery_node_ids, + ) + _add_recovery_branches( + graph, + recovery_spec, + env=env, + monitor_module=monitor_module, + nominal_edge_ids=nominal_edge_ids, + recovery_edge_ids=recovery_edge_ids, + ) + + return graph + + +def expand_recovery_spec( + task_graph: str | Mapping[str, Any], + recovery_spec: str | Mapping[str, Any] | None = None, +) -> dict[str, Any]: + """Expand a lightweight recovery authoring spec into an explicit graph. + + The LLM-facing schema should stay compact: each binding says which nominal + edge to monitor, which failure monitor to use, and which reusable recovery + policy steps to apply. This compiler expands those bindings into concrete + ``recovery_nodes``, ``recovery_edges``, and ``recovery_branches`` that the + runtime can execute. + """ + task_spec = extract_json_object(task_graph) + spec, issues = normalize_recovery_spec(task_spec, recovery_spec or {}) + if issues: + issue_text = "; ".join(issues) + raise ValueError( + f"Recovery spec needs canonicalization before expansion: {issue_text}" + ) + _reject_compiled_recovery_schema_in_authoring_spec(spec) + + expanded = { + "task": spec.get("task", task_spec.get("task", "")), + "recovery_nodes": [], + "recovery_edges": [], + "recovery_branches": [], + } + + _validate_recovery_authoring_spec(spec) + bindings = list(spec.get("recovery_bindings", [])) + if not bindings: + return expanded + nominal_edges = {edge["id"]: edge for edge in task_spec.get("edges", [])} + used_node_ids = {node["id"] for node in task_spec.get("nodes", [])} + used_edge_ids = set(nominal_edges) + + for binding_index, binding in enumerate(bindings): + _expand_recovery_binding( + expanded, + binding, + binding_index=binding_index, + nominal_edges=nominal_edges, + used_node_ids=used_node_ids, + used_edge_ids=used_edge_ids, + ) + + return expanded + + +def normalize_recovery_spec( + task_graph: str | Mapping[str, Any], + recovery_spec: str | Mapping[str, Any] | None = None, +) -> tuple[dict[str, Any], list[str]]: + """Canonicalize a lightweight recovery spec before graph expansion. + + The compiler keeps common aliases and small omissions deterministic, while + returning unresolved semantic gaps for CompileAgent to canonicalize with one + optional LLM call. + """ + task_spec = extract_json_object(task_graph) + spec = extract_json_object(recovery_spec or {}) + _reject_compiled_recovery_schema_in_authoring_spec(spec) + + issues: list[str] = [] + nominal_edges = {edge["id"]: edge for edge in task_spec.get("edges", [])} + bindings = spec.get("recovery_bindings", spec.get("bindings", [])) + if not isinstance(bindings, list): + return ( + { + "task": spec.get("task", task_spec.get("task", "")), + "recovery_bindings": [], + }, + ["recovery_bindings must be a list"], + ) + + normalized = { + "task": spec.get("task", task_spec.get("task", "")), + "recovery_bindings": [], + } + allowed_top_keys = {"task", "recovery_bindings", "bindings"} + unknown_top_keys = set(spec) - allowed_top_keys + if unknown_top_keys: + issues.append(f"unknown top-level keys: {', '.join(sorted(unknown_top_keys))}") + + for binding_index, binding in enumerate(bindings): + if not isinstance(binding, Mapping): + issues.append(f"binding {binding_index} must be an object") + continue + normalized_binding, binding_issues = _normalize_recovery_binding( + binding, + binding_index=binding_index, + nominal_edges=nominal_edges, + ) + if normalized_binding is not None: + normalized["recovery_bindings"].append(normalized_binding) + issues.extend(binding_issues) + + return normalized, issues + + +def _normalize_recovery_binding( + binding: Mapping[str, Any], + *, + binding_index: int, + nominal_edges: Mapping[str, Mapping[str, Any]], +) -> tuple[dict[str, Any] | None, list[str]]: + issues: list[str] = [] + edge_id = binding.get("edge_id", binding.get("edge")) + if not edge_id: + return None, [f"binding {binding_index} is missing edge_id"] + edge_id = str(edge_id) + monitored_edge = nominal_edges.get(edge_id) + if monitored_edge is None: + issues.append(f"binding {binding_index} references unknown edge_id '{edge_id}'") + + monitors, monitor_issues = _normalize_monitor_list(binding, monitored_edge) + recovery_steps, recovery_issues = _normalize_recovery_steps( + binding, + monitored_edge, + monitors, + ) + issues.extend(monitor_issues) + issues.extend(recovery_issues) + + merge = _normalize_merge(binding.get("merge", "target")) + if merge is None: + issues.append( + f"binding {binding_index} has unsupported merge value " + f"'{binding.get('merge')}'" + ) + merge = "target" + + allowed_binding_keys = { + "edge_id", + "edge", + "failure_name", + "failure", + "monitors", + "monitor", + "monitor_intent", + "recovery", + "recovery_steps", + "recovery_intent", + "policy", + "merge", + "repeat_until_success", + "repeat", + } + unknown_binding_keys = set(binding) - allowed_binding_keys + if unknown_binding_keys: + issues.append( + f"binding {binding_index} has unknown keys: " + f"{', '.join(sorted(unknown_binding_keys))}" + ) + + return ( + { + "edge_id": edge_id, + "failure_name": str( + binding.get( + "failure_name", + binding.get("failure", f"failure_{binding_index}"), + ) + ), + "monitors": monitors, + "recovery": recovery_steps, + "merge": merge, + "repeat_until_success": bool( + binding.get( + "repeat_until_success", + binding.get("repeat", True), + ) + ), + }, + issues, + ) + + +def _normalize_monitor_list( + binding: Mapping[str, Any], + monitored_edge: Mapping[str, Any] | None, +) -> tuple[list[dict[str, Any]], list[str]]: + monitors = binding.get("monitors", binding.get("monitor")) + issues: list[str] = [] + if monitors is None: + if "monitor_intent" in binding: + issues.append( + f"binding for edge '{binding.get('edge_id', binding.get('edge'))}' " + "has monitor_intent but no canonical monitors" + ) + else: + issues.append( + f"binding for edge '{binding.get('edge_id', binding.get('edge'))}' " + "has no monitors" + ) + return [], issues + + normalized_monitors: list[dict[str, Any]] = [] + for monitor_index, monitor in enumerate(_as_list(monitors)): + normalized_monitor, monitor_issues = _normalize_monitor( + monitor, + monitor_index=monitor_index, + monitored_edge=monitored_edge, + ) + if normalized_monitor is not None: + normalized_monitors.append(normalized_monitor) + issues.extend(monitor_issues) + + return normalized_monitors, issues + + +def _normalize_monitor( + monitor: Mapping[str, Any], + *, + monitor_index: int, + monitored_edge: Mapping[str, Any] | None, +) -> tuple[dict[str, Any] | None, list[str]]: + if not isinstance(monitor, Mapping): + monitor = {"type": monitor} + monitor = dict(monitor) + issues: list[str] = [] + if "fn" in monitor: + return { + "fn": monitor["fn"], + "kwargs": dict(monitor.get("kwargs", {})), + }, issues + + monitor_type = _canonical_type( + monitor.get("type", monitor.get("monitor_type", monitor.get("name"))), + MONITOR_TYPE_ALIASES, + ) + if monitor_type is None: + return None, [f"monitor {monitor_index} has no supported type"] + + if monitor_type == "object_moved": + objects = monitor.get("objects", monitor.get("obj_name", monitor.get("object"))) + if objects is None: + objects = _edge_action_objects(monitored_edge) + objects = [str(obj_name) for obj_name in _as_list(objects) if obj_name] + if not objects: + return None, [ + f"monitor {monitor_index} object_moved needs objects or obj_name" + ] + return { + "type": "object_moved", + "objects": objects, + "threshold": monitor.get("threshold", 0.02), + }, issues + + robot_name = monitor.get("robot_name", monitor.get("arm")) + obj_name = monitor.get("obj_name", monitor.get("object")) + if robot_name is None or obj_name is None: + inferred = _infer_single_robot_object(monitored_edge) + if inferred is not None: + robot_name = robot_name or inferred[0] + obj_name = obj_name or inferred[1] + if robot_name is None or obj_name is None: + return None, [ + f"monitor {monitor_index} hold_lost needs robot_name and obj_name" + ] + return { + "type": "hold_lost", + "robot_name": str(robot_name), + "obj_name": str(obj_name), + "threshold": monitor.get("threshold", 0.05), + }, issues + + +def _normalize_recovery_steps( + binding: Mapping[str, Any], + monitored_edge: Mapping[str, Any] | None, + monitors: list[Mapping[str, Any]], +) -> tuple[list[dict[str, Any]], list[str]]: + recovery_steps = ( + binding.get("recovery") + or binding.get("recovery_steps") + or binding.get("policy") + ) + issues: list[str] = [] + if recovery_steps is None: + if "recovery_intent" in binding: + issues.append( + f"binding for edge '{binding.get('edge_id', binding.get('edge'))}' " + "has recovery_intent but no canonical recovery steps" + ) + return [], issues + recovery_steps = [{"type": "retry_failed_edge"}] + + normalized_steps: list[dict[str, Any]] = [] + for step_index, step in enumerate(_as_list(recovery_steps)): + normalized_step, step_issues = _normalize_recovery_step( + step, + step_index=step_index, + monitored_edge=monitored_edge, + monitors=monitors, + ) + if normalized_step is not None: + normalized_steps.append(normalized_step) + issues.extend(step_issues) + + return normalized_steps, issues + + +def _normalize_recovery_step( + step: Mapping[str, Any], + *, + step_index: int, + monitored_edge: Mapping[str, Any] | None, + monitors: list[Mapping[str, Any]], +) -> tuple[dict[str, Any] | None, list[str]]: + if not isinstance(step, Mapping): + step = {"type": step} + step = dict(step) + if "left_arm_action" in step or "right_arm_action" in step: + return { + "type": "action", + "left_arm_action": deepcopy(step.get("left_arm_action")), + "right_arm_action": deepcopy(step.get("right_arm_action")), + **_optional_step_metadata(step), + }, [] + + step_type = _canonical_type( + step.get("type", step.get("name")), RECOVERY_STEP_TYPE_ALIASES + ) + if step_type is None: + return None, [f"recovery step {step_index} has no supported type"] + + base = { + "type": step_type, + **_optional_step_metadata(step), + } + if step_type == "retry_failed_edge": + return base, [] + + if step_type == "action": + return None, [ + f"recovery step {step_index} action needs left_arm_action or right_arm_action" + ] + + if step_type == "replay_edge": + edge_id = step.get("edge_id", step.get("edge")) + if edge_id is None: + return None, [f"recovery step {step_index} replay_edge needs edge_id"] + base["edge_id"] = str(edge_id) + return base, [] + + if step_type == "regrasp_both": + arms = step.get("arms") + if arms is None: + arms = _infer_regrasp_arms(monitored_edge, monitors) + if not arms: + return None, [f"recovery step {step_index} regrasp_both needs arms"] + base.update( + { + "arms": {str(robot): str(obj) for robot, obj in dict(arms).items()}, + "pre_grasp_dis": step.get("pre_grasp_dis", 0.1), + } + ) + return base, [] + + robot_name = step.get("robot_name", step.get("arm")) + obj_name = step.get("obj_name", step.get("object")) + if robot_name is None or obj_name is None: + inferred = _infer_regrasp_target(monitored_edge, monitors) + if inferred is not None: + robot_name = robot_name or inferred[0] + obj_name = obj_name or inferred[1] + if robot_name is None or obj_name is None: + return None, [ + f"recovery step {step_index} regrasp needs robot_name and obj_name" + ] + + base.update( + { + "robot_name": str(robot_name), + "obj_name": str(obj_name), + "pre_grasp_dis": step.get("pre_grasp_dis", 0.1), + } + ) + return base, [] + + +def _optional_step_metadata(step: Mapping[str, Any]) -> dict[str, Any]: + metadata = {} + for key in ("name", "semantic"): + if key in step: + metadata[key] = step[key] + return metadata + + +def _canonical_type(value: Any, aliases: Mapping[str, str]) -> str | None: + if value is None: + return None + key = _sanitize_id(str(value)) + return aliases.get(key) + + +def _normalize_merge(value: Any) -> str | None: + key = _sanitize_id(str(value)) + if key in {"target", "complete", "completed", "finish", "success"}: + return "target" + if key in {"source", "retry", "precondition", "restore_precondition"}: + return "source" + return None + + +def _edge_action_objects(edge: Mapping[str, Any] | None) -> list[str]: + objects: list[str] = [] + if edge is None: + return objects + for action_key in ("left_arm_action", "right_arm_action"): + action = edge.get(action_key) + if not action: + continue + obj_name = dict(action.get("kwargs", {})).get("obj_name") + if obj_name is not None: + objects.append(str(obj_name)) + return objects + + +def _edge_robot_objects(edge: Mapping[str, Any] | None) -> dict[str, str]: + robot_objects: dict[str, str] = {} + if edge is None: + return robot_objects + for action_key in ("left_arm_action", "right_arm_action"): + action = edge.get(action_key) + if not action: + continue + kwargs = dict(action.get("kwargs", {})) + robot_name = kwargs.get("robot_name") + obj_name = kwargs.get("obj_name") + if robot_name is not None and obj_name is not None: + robot_objects[str(robot_name)] = str(obj_name) + return robot_objects + + +def _infer_single_robot_object( + edge: Mapping[str, Any] | None, +) -> tuple[str, str] | None: + robot_objects = _edge_robot_objects(edge) + if len(robot_objects) == 1: + return next(iter(robot_objects.items())) + return None + + +def _infer_regrasp_target( + edge: Mapping[str, Any] | None, + monitors: list[Mapping[str, Any]], +) -> tuple[str, str] | None: + hold_monitors = [ + (str(monitor["robot_name"]), str(monitor["obj_name"])) + for monitor in monitors + if monitor.get("type") == "hold_lost" + ] + if len(hold_monitors) == 1: + return hold_monitors[0] + return _infer_single_robot_object(edge) + + +def _infer_regrasp_arms( + edge: Mapping[str, Any] | None, + monitors: list[Mapping[str, Any]], +) -> dict[str, str]: + arms = _edge_robot_objects(edge) + for monitor in monitors: + if monitor.get("type") == "hold_lost": + arms[str(monitor["robot_name"])] = str(monitor["obj_name"]) + return arms + + +def _expand_recovery_binding( + expanded: dict[str, Any], + binding: Mapping[str, Any], + *, + binding_index: int, + nominal_edges: Mapping[str, Mapping[str, Any]], + used_node_ids: set[str], + used_edge_ids: set[str], +) -> None: + edge_id = binding["edge_id"] + if edge_id not in nominal_edges: + raise ValueError( + f"Recovery binding references unknown nominal edge_id '{edge_id}'." + ) + + monitored_edge = nominal_edges[edge_id] + failure_name = binding.get("failure_name", f"failure_{binding_index}") + monitor_sequence = _expand_monitor_sequence(binding) + if not monitor_sequence: + raise ValueError(f"Recovery binding for edge '{edge_id}' must define monitors.") + + merge = binding.get("merge", "target") + if merge not in {"source", "target"}: + raise ValueError("Recovery binding merge must be 'source' or 'target'.") + + recovery_steps = list( + binding.get("recovery") + or binding.get("recovery_steps") + or [{"type": "retry_failed_edge"}] + ) + if not recovery_steps: + raise ValueError( + f"Recovery binding for edge '{edge_id}' has no recovery steps." + ) + + current_node = monitored_edge["source"] + merge_node = monitored_edge[merge] + branch_edges: list[str] = [] + generated_edges: list[tuple[dict[str, Any], Mapping[str, Any]]] = [] + + for step_index, step in enumerate(recovery_steps): + step = dict(step) + step_label = _recovery_step_label(step, monitored_edge) + is_last_step = step_index == len(recovery_steps) - 1 + target_node = merge_node + if not is_last_step: + target_node = _unique_id( + f"rn_{_sanitize_id(edge_id)}_{step_index + 1}_{step_label}", + used_node_ids, + ) + expanded["recovery_nodes"].append( + { + "id": target_node, + "semantic": step.get("semantic", step_label.replace("_", " ")), + } + ) + + recovery_edge = { + "id": _unique_id( + f"re_{_sanitize_id(edge_id)}_{step_index + 1}_{step_label}", + used_edge_ids, + ), + "source": current_node, + "target": target_node, + **_expand_recovery_step_actions(step, monitored_edge, nominal_edges), + } + expanded["recovery_edges"].append(recovery_edge) + branch_edges.append(recovery_edge["id"]) + generated_edges.append((recovery_edge, step)) + current_node = target_node + + expanded["recovery_branches"].append( + { + "edge_id": edge_id, + "failure_name": failure_name, + "monitor_sequence": monitor_sequence, + "recovery_edges": branch_edges, + } + ) + + if binding.get("repeat_until_success", True): + for recovery_edge, step in generated_edges: + recovery_monitor_sequence = _recovery_step_monitor_sequence( + step, + fallback_monitor_sequence=monitor_sequence, + ) + _add_reusable_recovery_branch( + expanded, + recovery_edge, + step, + binding=binding, + monitor_sequence=recovery_monitor_sequence, + used_edge_ids=used_edge_ids, + ) + + +def _add_reusable_recovery_branch( + expanded: dict[str, Any], + recovery_edge: Mapping[str, Any], + step: Mapping[str, Any], + *, + binding: Mapping[str, Any], + monitor_sequence: list[dict[str, Any]], + used_edge_ids: set[str], +) -> None: + failure_name = binding.get("failure_name", "recovery_failure") + repair_edges = [recovery_edge["id"]] + + hold_monitor = _first_hold_lost_monitor(binding) + if hold_monitor is not None and _step_type(step) not in {"regrasp", "regrasp_both"}: + repair_edge = { + "id": _unique_id( + f"{recovery_edge['id']}_repair_regrasp_{_sanitize_id(hold_monitor['obj_name'])}", + used_edge_ids, + ), + "source": recovery_edge["source"], + "target": recovery_edge["source"], + **_regrasp_actions( + robot_name=hold_monitor["robot_name"], + obj_name=hold_monitor["obj_name"], + pre_grasp_dis=float(step.get("pre_grasp_dis", 0.1)), + ), + } + expanded["recovery_edges"].append(repair_edge) + repair_edges = [repair_edge["id"]] + repair_monitor_sequence = _object_moved_monitors([hold_monitor["obj_name"]]) + expanded["recovery_branches"].append( + { + "edge_id": repair_edge["id"], + "failure_name": f"{failure_name}_during_{repair_edge['id']}", + "monitor_sequence": repair_monitor_sequence, + "recovery_edges": [repair_edge["id"]], + } + ) + + expanded["recovery_branches"].append( + { + "edge_id": recovery_edge["id"], + "failure_name": f"{failure_name}_during_{recovery_edge['id']}", + "monitor_sequence": deepcopy(monitor_sequence), + "recovery_edges": repair_edges, + } + ) + + +def _expand_monitor_sequence(binding: Mapping[str, Any]) -> list[dict[str, Any]]: + monitors = binding.get("monitors", []) + monitor_sequence: list[dict[str, Any]] = [] + for monitor in monitors: + monitor = dict(monitor) + if "fn" in monitor: + monitor_sequence.append( + {"fn": monitor["fn"], "kwargs": dict(monitor.get("kwargs", {}))} + ) + continue + + monitor_type = monitor.get("type") + if monitor_type == "object_moved": + objects = monitor.get("objects") + if objects is None: + objects = [monitor["obj_name"]] + monitor_sequence.extend( + _object_moved_monitors( + _as_list(objects), + threshold=monitor.get("threshold", 0.02), + ) + ) + elif monitor_type in {"hold_lost", "object_held"}: + monitor_sequence.append( + { + "fn": "monitor_object_held", + "kwargs": { + "robot_name": monitor["robot_name"], + "obj_name": monitor["obj_name"], + "threshold": monitor.get("threshold", 0.05), + }, + } + ) + else: + raise ValueError(f"Unsupported recovery monitor type '{monitor_type}'.") + + return monitor_sequence + + +def _recovery_step_monitor_sequence( + step: Mapping[str, Any], + *, + fallback_monitor_sequence: list[dict[str, Any]], +) -> list[dict[str, Any]]: + step_type = _step_type(step) + if step_type in {"regrasp", "regrasp_both"}: + objects = _regrasp_step_objects(step) + if objects: + return _object_moved_monitors(objects) + return deepcopy(fallback_monitor_sequence) + + +def _object_moved_monitors( + objects: list[str], + *, + threshold: float = 0.02, +) -> list[dict[str, Any]]: + return [ + { + "fn": "monitor_object_moved", + "kwargs": {"obj_name": obj_name, "threshold": threshold}, + } + for obj_name in objects + ] + + +def _expand_recovery_step_actions( + step: Mapping[str, Any], + monitored_edge: Mapping[str, Any], + nominal_edges: Mapping[str, Mapping[str, Any]], +) -> dict[str, Any]: + step_type = _step_type(step) + + if step_type in {"retry_failed_edge", "retry_edge"}: + return _copy_edge_actions(monitored_edge) + + if step_type == "replay_edge": + replay_edge_id = step["edge_id"] + if replay_edge_id not in nominal_edges: + raise ValueError(f"Unknown replay_edge id '{replay_edge_id}'.") + return _copy_edge_actions(nominal_edges[replay_edge_id]) + + if step_type in {"regrasp", "regrasp_both"}: + arms = step.get("arms") + if arms: + return _multi_regrasp_actions( + arms, + pre_grasp_dis=float(step.get("pre_grasp_dis", 0.1)), + ) + return _regrasp_actions( + robot_name=step["robot_name"], + obj_name=step["obj_name"], + pre_grasp_dis=float(step.get("pre_grasp_dis", 0.1)), + ) + + if step_type == "action" or "left_arm_action" in step or "right_arm_action" in step: + return { + "left_arm_action": deepcopy(step.get("left_arm_action")), + "right_arm_action": deepcopy(step.get("right_arm_action")), + } + + raise ValueError(f"Unsupported recovery step type '{step_type}'.") + + +def _copy_edge_actions(edge: Mapping[str, Any]) -> dict[str, Any]: + left_action = deepcopy(edge.get("left_arm_action")) + right_action = deepcopy(edge.get("right_arm_action")) + return { + "left_arm_action": left_action, + "right_arm_action": right_action, + } + + +def _regrasp_actions( + *, + robot_name: str, + obj_name: str, + pre_grasp_dis: float, +) -> dict[str, Any]: + action = { + "fn": "grasp", + "kwargs": { + "robot_name": robot_name, + "obj_name": obj_name, + "pre_grasp_dis": pre_grasp_dis, + }, + } + if "left" in robot_name: + return {"left_arm_action": action, "right_arm_action": None} + return {"left_arm_action": None, "right_arm_action": action} + + +def _multi_regrasp_actions( + arms: Mapping[str, str], + *, + pre_grasp_dis: float, +) -> dict[str, Any]: + actions = {"left_arm_action": None, "right_arm_action": None} + for robot_name, obj_name in arms.items(): + action = { + "fn": "grasp", + "kwargs": { + "robot_name": robot_name, + "obj_name": obj_name, + "pre_grasp_dis": pre_grasp_dis, + }, + } + if "left" in robot_name: + actions["left_arm_action"] = action + else: + actions["right_arm_action"] = action + return actions + + +def _regrasp_step_objects(step: Mapping[str, Any]) -> list[str]: + arms = step.get("arms") + if arms: + return [str(obj_name) for obj_name in arms.values()] + obj_name = step.get("obj_name") + return [str(obj_name)] if obj_name is not None else [] + + +def _first_hold_lost_monitor(binding: Mapping[str, Any]) -> dict[str, Any] | None: + for monitor in binding.get("monitors", []): + monitor = dict(monitor) + if monitor.get("type") in {"hold_lost", "object_held"}: + return { + "robot_name": monitor["robot_name"], + "obj_name": monitor["obj_name"], + } + return None + + +def _recovery_step_label( + step: Mapping[str, Any], + monitored_edge: Mapping[str, Any], +) -> str: + step_type = _step_type(step) + if step_type in {"retry_failed_edge", "retry_edge"}: + return f"retry_{_sanitize_id(monitored_edge['id'])}" + if step_type == "replay_edge": + return f"replay_{_sanitize_id(step['edge_id'])}" + if step_type in {"regrasp", "regrasp_both"}: + if step.get("arms"): + return "regrasp_objects" + return f"regrasp_{_sanitize_id(step['obj_name'])}" + return _sanitize_id(step.get("name", step_type)) + + +def _step_type(step: Mapping[str, Any]) -> str: + return step.get("type", "action") + + +def _as_list(value: Any) -> list[Any]: + if isinstance(value, list): + return value + if isinstance(value, tuple): + return list(value) + return [value] + + +def _unique_id(base: str, used_ids: set[str]) -> str: + candidate = _sanitize_id(base) + if candidate not in used_ids: + used_ids.add(candidate) + return candidate + + index = 2 + while f"{candidate}_{index}" in used_ids: + index += 1 + unique_candidate = f"{candidate}_{index}" + used_ids.add(unique_candidate) + return unique_candidate + + +def _sanitize_id(value: str) -> str: + return re.sub(r"[^0-9a-zA-Z_]+", "_", str(value)).strip("_").lower() + + +def _empty_recovery_graph(task_graph: Mapping[str, Any]) -> dict[str, Any]: + return { + "task": task_graph.get("task", ""), + "recovery_nodes": [], + "recovery_edges": [], + "recovery_branches": [], + } + + +def _reject_compiled_recovery_schema_in_authoring_spec( + recovery_spec: Mapping[str, Any], +) -> None: + compiled_keys = {"recovery_nodes", "recovery_edges", "recovery_branches"} + present_keys = compiled_keys & set(recovery_spec) + if present_keys: + present = ", ".join(sorted(present_keys)) + raise ValueError( + "RecoveryAgent must output the lightweight recovery_bindings schema. " + f"Compiled recovery graph keys are not accepted here: {present}." + ) + if "recoveries" in recovery_spec or "error_functions" in recovery_spec: + raise ValueError( + "Recovery specs must not contain legacy recoveries or executable error_functions." + ) + + +def _validate_recovery_authoring_spec(recovery_spec: Mapping[str, Any]) -> None: + allowed_top_keys = {"task", "recovery_bindings"} + unknown_top_keys = set(recovery_spec) - allowed_top_keys + if unknown_top_keys: + unknown = ", ".join(sorted(unknown_top_keys)) + raise ValueError(f"Unknown recovery spec keys: {unknown}.") + + allowed_binding_keys = { + "edge_id", + "failure_name", + "monitors", + "recovery", + "recovery_steps", + "merge", + "repeat_until_success", + } + allowed_monitor_keys = { + "type", + "objects", + "obj_name", + "robot_name", + "threshold", + "fn", + "kwargs", + } + allowed_step_keys = { + "type", + "robot_name", + "obj_name", + "arms", + "pre_grasp_dis", + "edge_id", + "left_arm_action", + "right_arm_action", + "name", + "semantic", + } + for binding in recovery_spec.get("recovery_bindings", []): + unknown_binding_keys = set(binding) - allowed_binding_keys + if unknown_binding_keys: + unknown = ", ".join(sorted(unknown_binding_keys)) + raise ValueError(f"Unknown recovery binding keys: {unknown}.") + for monitor in binding.get("monitors", []): + unknown_monitor_keys = set(monitor) - allowed_monitor_keys + if unknown_monitor_keys: + unknown = ", ".join(sorted(unknown_monitor_keys)) + raise ValueError(f"Unknown recovery monitor keys: {unknown}.") + for step in binding.get("recovery", binding.get("recovery_steps", [])): + unknown_step_keys = set(step) - allowed_step_keys + if unknown_step_keys: + unknown = ", ".join(sorted(unknown_step_keys)) + raise ValueError(f"Unknown recovery step keys: {unknown}.") + + +def _reject_legacy_recovery_schema(recovery_spec: Mapping[str, Any]) -> None: + if "recovery_bindings" in recovery_spec: + raise ValueError( + "compile_agent_graph_spec expects a compiled recovery_graph. " + "Call expand_recovery_spec(task_graph, recovery_spec) first." + ) + if "recoveries" in recovery_spec: + raise ValueError( + "Legacy recovery schema key 'recoveries' is no longer supported. " + "Use recovery_bindings before compilation or recovery_graph after compilation." + ) + if "error_functions" in recovery_spec: + raise ValueError( + "Recovery graphs do not execute error_functions. " + "Use monitor_sequence and recovery_edges only." + ) + + for branch in recovery_spec.get("recovery_branches", []): + if "error_functions" in branch: + raise ValueError( + "Recovery branches do not execute error_functions. " + "Use monitor_sequence and recovery_edges only." + ) + if "recovery_actions" in branch or "merge" in branch: + raise ValueError( + "Legacy recovery branch keys 'recovery_actions' and 'merge' are no " + "longer supported. Define explicit recovery_edges instead." + ) + + +def _resolve_runtime( + *, + graph_cls: type | None, + action_module: Any, + monitor_module: Any, +) -> tuple[type, Any, Any]: + if graph_cls is None: + graph_cls = _resolve_attr( + importlib.import_module( + "embodichain.gen_sim.action_agent_pipeline.agent_graph" + ), + "AgentTaskGraph", + ) + if action_module is None: + action_module = importlib.import_module( + "embodichain.gen_sim.action_agent_pipeline.atom_actions" + ) + if monitor_module is None: + monitor_module = importlib.import_module( + "embodichain.gen_sim.action_agent_pipeline.monitor_functions" + ) + return graph_cls, action_module, monitor_module + + +def _validate_task_spec(task_spec: Mapping[str, Any]) -> tuple[set[str], set[str]]: + node_ids = set() + for node in task_spec.get("nodes", []): + node_id = node["id"] + if node_id in node_ids: + raise ValueError(f"Duplicate graph node id '{node_id}'.") + node_ids.add(node_id) + + for required_node in (task_spec["start"], task_spec["goal"]): + if required_node not in node_ids: + raise ValueError(f"Graph node '{required_node}' is not defined.") + + edge_specs = list(task_spec.get("edges", [])) + edge_ids = set() + for edge in edge_specs: + edge_id = edge["id"] + if edge_id in edge_ids: + raise ValueError(f"Duplicate graph edge id '{edge_id}'.") + edge_ids.add(edge_id) + if edge.get("left_arm_action") is None and edge.get("right_arm_action") is None: + raise ValueError(f"Nominal edge '{edge_id}' must define an arm action.") + + for node_key in ("source", "target"): + node_id = edge[node_key] + if node_id not in node_ids: + raise ValueError( + f"Edge '{edge_id}' references unknown {node_key} node '{node_id}'." + ) + + _validate_nominal_path(task_spec, edge_specs) + return node_ids, edge_ids + + +def _validate_nominal_path( + task_spec: Mapping[str, Any], + edge_specs: list[Mapping[str, Any]], +) -> None: + outgoing_edges: dict[str, Mapping[str, Any]] = {} + for edge in edge_specs: + source = edge["source"] + if source in outgoing_edges: + raise ValueError( + f"Nominal node '{source}' has multiple outgoing edges. " + "The current graph executor expects one deterministic nominal path." + ) + outgoing_edges[source] = edge + + current = task_spec["start"] + goal = task_spec["goal"] + visited_edges = set() + visited_nodes = {current} + + while current != goal: + edge = outgoing_edges.get(current) + if edge is None: + raise ValueError( + f"Nominal graph has no start-to-goal path from node '{current}'." + ) + edge_id = edge["id"] + if edge_id in visited_edges: + raise ValueError("Nominal graph contains a cycle.") + + visited_edges.add(edge_id) + current = edge["target"] + if current in visited_nodes and current != goal: + raise ValueError("Nominal graph contains a cycle.") + visited_nodes.add(current) + + all_edge_ids = {edge["id"] for edge in edge_specs} + unused_edge_ids = all_edge_ids - visited_edges + if unused_edge_ids: + unused = ", ".join(sorted(unused_edge_ids)) + raise ValueError( + f"Nominal graph contains edges outside the start-to-goal path: {unused}." + ) + + +def _add_recovery_nodes( + graph: Any, + recovery_spec: Mapping[str, Any], + nominal_node_ids: set[str], +) -> set[str]: + recovery_node_ids = set() + recovery_nodes = recovery_spec.get("recovery_nodes", []) + if isinstance(recovery_nodes, Mapping): + recovery_nodes = [ + {"id": node_id, **dict(node_spec)} + for node_id, node_spec in recovery_nodes.items() + ] + + for node in recovery_nodes: + node_id = node["id"] + if node_id in nominal_node_ids or node_id in recovery_node_ids: + raise ValueError(f"Duplicate recovery node id '{node_id}'.") + recovery_node_ids.add(node_id) + graph.add_node(node_id, node.get("semantic", "")) + + return recovery_node_ids + + +def _add_recovery_edges( + graph: Any, + recovery_spec: Mapping[str, Any], + *, + action_module: Any, + defined_node_ids: set[str], +) -> set[str]: + recovery_edge_ids = set() + for edge in recovery_spec.get("recovery_edges", []): + edge_id = edge["id"] + if edge_id in graph.edges or edge_id in recovery_edge_ids: + raise ValueError(f"Duplicate recovery edge id '{edge_id}'.") + if edge.get("left_arm_action") is None and edge.get("right_arm_action") is None: + raise ValueError(f"Recovery edge '{edge_id}' must define an arm action.") + + for node_key in ("source", "target"): + node_id = edge[node_key] + if node_id not in defined_node_ids: + raise ValueError( + f"Recovery edge '{edge_id}' references unknown {node_key} node '{node_id}'." + ) + + graph.add_edge( + edge_id, + edge["source"], + edge["target"], + left_arm_action=_compile_action(edge.get("left_arm_action"), action_module), + right_arm_action=_compile_action( + edge.get("right_arm_action"), action_module + ), + is_recovery=True, + ) + recovery_edge_ids.add(edge_id) + + return recovery_edge_ids + + +def _add_recovery_branches( + graph: Any, + recovery_spec: Mapping[str, Any], + *, + env: Any, + monitor_module: Any, + nominal_edge_ids: set[str], + recovery_edge_ids: set[str], +) -> None: + recoverable_edge_ids = nominal_edge_ids | recovery_edge_ids + for branch_index, branch in enumerate(recovery_spec.get("recovery_branches", [])): + edge_id = branch["edge_id"] + if edge_id not in recoverable_edge_ids: + raise ValueError(f"Recovery branch references unknown edge_id '{edge_id}'.") + + edge = graph.edges[edge_id] + monitor_sequence = [ + _compile_monitor(monitor, monitor_module, env=env) + for monitor in branch.get("monitor_sequence", []) + ] + if not monitor_sequence: + raise ValueError( + f"Recovery branch for edge '{edge_id}' must define monitor_sequence." + ) + + branch_recovery_edges = list(branch.get("recovery_edges", [])) + _validate_recovery_branch_path( + graph, + branch=branch, + monitored_edge=edge, + recovery_edge_ids=recovery_edge_ids, + ) + + edge.monitor_sequences = list(edge.monitor_sequences or []) + edge.monitor_labels = list(edge.monitor_labels or []) + monitor_index = len(edge.monitor_sequences) + edge.monitor_sequences.append(monitor_sequence) + edge.monitor_labels.append( + branch.get("failure_name", f"recovery_{branch_index}") + ) + graph.add_recovery( + edge.id, + monitor_index=monitor_index, + recovery_edges=branch_recovery_edges, + ) + + +def _validate_recovery_branch_path( + graph: Any, + *, + branch: Mapping[str, Any], + monitored_edge: Any, + recovery_edge_ids: set[str], +) -> None: + branch_edges = list(branch.get("recovery_edges", [])) + if not branch_edges: + raise ValueError( + f"Recovery branch for edge '{monitored_edge.id}' must define recovery_edges." + ) + + previous_target = None + for index, edge_id in enumerate(branch_edges): + if edge_id not in recovery_edge_ids: + raise ValueError( + f"Recovery branch for edge '{monitored_edge.id}' references unknown recovery edge '{edge_id}'." + ) + + recovery_edge = graph.edges[edge_id] + if index == 0 and recovery_edge.source != monitored_edge.source: + raise ValueError( + f"Recovery branch for edge '{monitored_edge.id}' must start from '{monitored_edge.source}'." + ) + if previous_target is not None and recovery_edge.source != previous_target: + raise ValueError( + f"Recovery branch for edge '{monitored_edge.id}' is not path-contiguous at '{edge_id}'." + ) + previous_target = recovery_edge.target + + if previous_target not in {monitored_edge.source, monitored_edge.target}: + raise ValueError( + f"Recovery branch for edge '{monitored_edge.id}' must merge into the failed edge source or target node." + ) + + +def _compile_action(spec: Any, action_module: Any) -> Any: + if spec is None: + return None + return _compile_call(spec, action_module) + + +def _compile_monitor(spec: Mapping[str, Any], monitor_module: Any, *, env: Any) -> Any: + kwargs = dict(spec.get("kwargs", {})) + kwargs.setdefault("env", env) + return _compile_call({"fn": spec["fn"], "kwargs": kwargs}, monitor_module) + + +def _compile_call(spec: Mapping[str, Any], namespace: Any) -> Any: + return partial(_resolve_attr(namespace, spec["fn"]), **dict(spec.get("kwargs", {}))) + + +def _resolve_attr(namespace: Any, name: str) -> Any: + if isinstance(namespace, Mapping): + return namespace[name] + return getattr(namespace, name) diff --git a/embodichain/gen_sim/action_agent_pipeline/monitor_functions.py b/embodichain/gen_sim/action_agent_pipeline/monitor_functions.py new file mode 100644 index 00000000..2de036c3 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/monitor_functions.py @@ -0,0 +1,112 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from embodichain.utils.logger import log_error +from embodichain.gen_sim.action_agent_pipeline.monitor_utils import ( + _as_pose_matrix, + _get_object_pose, + get_arm_object_distance, + get_gripper_distance, +) +import numpy as np +import torch + + +def monitor_object_moved( + env, + obj_name: str, + last_frame_pose: torch.Tensor | np.ndarray | list | tuple | dict = None, + threshold: float = 0.01, + **kwargs, +) -> bool: + """Trigger when an object moved from the last frame beyond a threshold. + + Args: + env: The current agent environment. + obj_name: Target rigid object name. + last_frame_pose: Previous-frame pose or a state dict returned by + :func:`capture_object_state`. + threshold: Maximum allowed translation change in meters. + + Returns: + ``True`` if the monitored failure occurs, i.e. the object moved more than + the threshold. + """ + if last_frame_pose is None: + last_frame_pose = env.obj_info.get(obj_name).get("pose") + current_pose = _get_object_pose(env, obj_name) + previous_pose = _as_pose_matrix(last_frame_pose, device=current_pose.device) + movement = torch.norm(current_pose[:3, 3] - previous_pose[:3, 3]).item() + return movement > threshold + + +def monitor_object_held( + env, + robot_name: str, + obj_name: str, + threshold: float = 0.05, + **kwargs, +) -> bool: + """Trigger when an object is no longer being held by the corresponding arm. + + The function name is historical. To keep all monitor semantics consistent, + this function returns ``True`` when failure occurs, namely when the object is + too far from the corresponding arm end-effector and is treated as no longer held. + + Args: + env: The current agent environment. + robot_name: Robot-side selector containing ``left`` or ``right``. + obj_name: Target rigid object name. + threshold: Maximum allowed arm-object distance before hold failure is triggered. + + Returns: + ``True`` if the monitored failure occurs, i.e. the object is no longer + close enough to the corresponding arm. + """ + arm_object_distance = get_arm_object_distance(env, robot_name, obj_name) + return arm_object_distance > threshold + + +# TODO: not used currently +def monitor_gripper_distance( + env, + robot_name: str, + threshold: float = 0.01, + comparison: str = "less", + **kwargs, +) -> bool: + """Trigger when gripper distance violates the expected threshold condition. + + Args: + env: The current agent environment. + robot_name: Robot-side selector containing ``left`` or ``right``. + threshold: Threshold for the gripper distance. + comparison: ``"less"`` checks ``distance < threshold`` and ``"greater"`` + checks ``distance > threshold``. + + Returns: + ``True`` if the monitored failure occurs and the comparison condition is unsatisfied. + """ + distance = get_gripper_distance(env, robot_name) + + if comparison == "less": + return distance < threshold + if comparison == "greater": + return distance > threshold + + log_error(f"Unsupported comparison '{comparison}'. Expected 'less' or 'greater'.") diff --git a/embodichain/gen_sim/action_agent_pipeline/monitor_utils.py b/embodichain/gen_sim/action_agent_pipeline/monitor_utils.py new file mode 100644 index 00000000..f51ae55e --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/monitor_utils.py @@ -0,0 +1,122 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import numpy as np +import torch + +from embodichain.gen_sim.action_agent_pipeline.atom_action_utils import get_arm_states +from embodichain.utils.logger import log_error +from embodichain.utils.math import matrix_from_quat + + +def _to_tensor( + value: torch.Tensor | np.ndarray | list | tuple | float, + *, + device: torch.device | None = None, + dtype: torch.dtype = torch.float32, +) -> torch.Tensor: + """Convert input to a tensor.""" + if isinstance(value, torch.Tensor): + return value.to(device=device or value.device, dtype=dtype) + return torch.as_tensor(value, device=device, dtype=dtype) + + +def _as_pose_matrix( + pose: torch.Tensor | np.ndarray | list | tuple | dict, + *, + device: torch.device | None = None, +) -> torch.Tensor: + """Convert a pose-like input into a 4x4 pose matrix.""" + if isinstance(pose, dict): + if "pose" not in pose: + log_error("Pose dict must contain key 'pose'.") + pose = pose["pose"] + + pose_tensor = _to_tensor(pose, device=device) + + if pose_tensor.dim() == 3 and pose_tensor.shape[0] == 1: + pose_tensor = pose_tensor.squeeze(0) + + if pose_tensor.shape == (4, 4): + return pose_tensor + + if pose_tensor.dim() == 1 and pose_tensor.shape[0] == 7: + pose_matrix = torch.eye(4, dtype=torch.float32, device=pose_tensor.device) + pose_matrix[:3, 3] = pose_tensor[:3] + pose_matrix[:3, :3] = matrix_from_quat(pose_tensor[3:].unsqueeze(0)).squeeze(0) + return pose_matrix + + log_error( + f"Unsupported pose format {tuple(pose_tensor.shape)}. Expected (4, 4) or (7,)." + ) + + +def _get_rigid_object(env, obj_name: str): + """Fetch a rigid object by name.""" + obj_uids = env.sim.get_rigid_object_uid_list() + if obj_name not in obj_uids: + log_error( + f"Rigid object '{obj_name}' not found. Available objects: {obj_uids}." + ) + return env.sim.get_rigid_object(obj_name) + + +def _get_object_pose(env, obj_name: str) -> torch.Tensor: + """Get the current 4x4 local pose of a rigid object.""" + return _get_rigid_object(env, obj_name).get_local_pose(to_matrix=True).squeeze(0) + + +def _get_actual_arm_pose(env, robot_name: str) -> torch.Tensor: + """Get the current end-effector pose of the selected arm.""" + is_left, control_part, _, _, _ = get_arm_states(env, robot_name) + arm_joints = env.left_arm_joints if is_left else env.right_arm_joints + arm_qpos = env.robot.get_qpos().squeeze(0)[arm_joints] + arm_pose = env.robot.compute_fk( + arm_qpos, name=control_part, to_matrix=True + ).squeeze(0) + return arm_pose + + +def capture_object_state(env, obj_name: str) -> dict[str, torch.Tensor]: + """Capture the current object pose for frame-to-frame monitoring.""" + pose = _get_object_pose(env, obj_name) + return { + "pose": pose.clone(), + "position": pose[:3, 3].clone(), + } + + +def get_gripper_distance(env, robot_name: str) -> float: + """Estimate the current gripper opening distance.""" + is_left, _, _, _, _ = get_arm_states(env, robot_name) + side = "left" if is_left else "right" + if hasattr(env, "get_agent_eef_control_part"): + eef_control_part = env.get_agent_eef_control_part(is_left) + else: + eef_control_part = f"{side}_eef" + if eef_control_part is None: + return 0.0 + eef_qpos = env.robot.get_qpos(name=eef_control_part).squeeze(0) + return float(torch.mean(torch.abs(eef_qpos)).item()) + + +def get_arm_object_distance(env, robot_name: str, obj_name: str) -> float: + """Compute the distance between the current arm end-effector and object.""" + arm_pose = _get_actual_arm_pose(env, robot_name) + obj_pose = _get_object_pose(env, obj_name) + return float(torch.norm(arm_pose[:3, 3] - obj_pose[:3, 3]).item()) diff --git a/embodichain/gen_sim/action_agent_pipeline/prompt_builders/__init__.py b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/__init__.py new file mode 100644 index 00000000..5d3d23cd --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/__init__.py @@ -0,0 +1,23 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from .task_prompt import TaskPrompt +from .compile_prompt import CompilePrompt +from .recovery_prompt import RecoveryPrompt + +__all__ = ["CompilePrompt", "RecoveryPrompt", "TaskPrompt"] diff --git a/embodichain/gen_sim/action_agent_pipeline/prompt_builders/compile_prompt.py b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/compile_prompt.py new file mode 100644 index 00000000..aaa44d29 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/compile_prompt.py @@ -0,0 +1,27 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +__all__ = ["CompilePrompt"] + + +class CompilePrompt: + """Compatibility namespace for compile-agent prompt imports. + + CompileAgent compiles JSON graph specs directly and does not ask an LLM to + generate Python control code. + """ diff --git a/embodichain/gen_sim/action_agent_pipeline/prompt_builders/recovery_prompt.py b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/recovery_prompt.py new file mode 100644 index 00000000..a6ebecd1 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/recovery_prompt.py @@ -0,0 +1,101 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from langchain_core.messages import SystemMessage +from langchain_core.prompts import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, +) + +__all__ = ["RecoveryPrompt"] + + +class RecoveryPrompt: + @staticmethod + def augment_task_graph(**kwargs): + schema = """{ + "task": "", + "recovery_bindings": [ + { + "edge_id": "", + "failure_name": "", + "monitors": [ + {"type": "object_moved", "objects": [""], "threshold": 0.02} + ], + "recovery": [ + { + "type": "regrasp", + "robot_name": "", + "obj_name": "", + "pre_grasp_dis": 0.1 + } + ], + "merge": "target", + "repeat_until_success": true + } + ] +}""" + empty_schema = '{"task": "", "recovery_bindings": []}' + kwargs.update( + { + "empty_recovery_schema": empty_schema, + "recovery_schema": schema, + } + ) + + prompt = ChatPromptTemplate.from_messages( + [ + SystemMessage( + content=( + "You are a robotic manipulation recovery policy designer. " + "Given a nominal atomic-action graph, add only compact " + "monitor-to-recovery bindings. Do not write graph nodes, " + "graph edges, graph branches, Python code, or prose. " + "Output only JSON." + ) + ), + HumanMessagePromptTemplate.from_template( + [ + { + "type": "text", + "text": ( + "Use the context below to generate a lightweight recovery " + "spec for the nominal graph.\n\n" + "**Environment background:**\n{basic_background}\n\n" + '**Task goal:**\n"{task_prompt}"\n\n' + "**Available atomic actions:**\n{atom_actions}\n\n" + "**Possible external failure descriptions " + "(prompt context only, not executable output):**\n" + "{error_functions}\n\n" + "**Available monitor functions:**\n{monitor_functions}\n\n" + "**Nominal task graph JSON:**\n{task_graph}\n\n" + "**Required JSON schema:**\n" + "{recovery_schema}\n\n" + "Output exactly one JSON object following the schema. " + "If no edge needs a recovery branch, output " + "`{empty_recovery_schema}`.\n\n" + "Follow these recovery rules:\n" + "{recovery_rules}" + ), + }, + ] + ), + ] + ) + + return prompt.invoke(kwargs) diff --git a/embodichain/gen_sim/action_agent_pipeline/prompt_builders/task_prompt.py b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/task_prompt.py new file mode 100644 index 00000000..5d7fd3dd --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompt_builders/task_prompt.py @@ -0,0 +1,114 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from typing import Any + +import torch +from langchain_core.messages import SystemMessage +from langchain_core.prompts import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, +) +from embodichain.utils.utility import encode_image + +__all__ = ["TaskPrompt"] + + +class TaskPrompt: + @staticmethod + def generate_task_graph(observations: dict[str, Any], **kwargs: Any) -> Any: + """Build a prompt that asks the task agent for a nominal JSON graph.""" + schema = """{ + "task": "", + "start": "v0_start", + "goal": "vN_done", + "nodes": [ + {"id": "v0_start", "semantic": ""}, + {"id": "v1_", "semantic": ""} + ], + "edges": [ + { + "id": "e01_", + "source": "v0_start", + "target": "v1_", + "left_arm_action": {"fn": "", "kwargs": {}}, + "right_arm_action": null + } + ] +}""" + + observation = ( + observations["rgb"].cpu().numpy() + if isinstance(observations["rgb"], torch.Tensor) + else observations["rgb"] + ) + kwargs.update( + { + "graph_schema": schema, + "observation": encode_image(observation), + } + ) + + prompt = ChatPromptTemplate.from_messages( + [ + SystemMessage( + content=( + "You are a precise robotic manipulation graph planner. " + "Given a camera observation and task description, produce only " + "the nominal atomic-action graph. Do not add failure monitors, " + "error injection, recovery branches, Python code, or prose. " + "All actions must strictly use the provided atomic API functions." + ) + ), + HumanMessagePromptTemplate.from_template( + [ + { + "type": "image_url", + "image_url": { + "url": "data:image/png;base64,{observation}", + }, + }, + { + "type": "text", + "text": ( + "Use the current camera observation and context below to " + "generate a nominal atomic-action graph for the task.\n\n" + "**Environment background:**\n{basic_background}\n\n" + '**Task goal:**\n"{task_prompt}"\n\n' + "**Available atomic actions:**\n{atom_actions}\n\n" + "**Required JSON schema:**\n" + "{graph_schema}\n\n" + "Rules:\n" + "- Output exactly one JSON object and nothing else.\n" + "- The nominal graph must be one deterministic start-to-goal chain with no branches, cycles, or orphan edges.\n" + "- Each edge is one semantic task step from source node to target node.\n" + "- Every edge must define at least one non-null arm action.\n" + "- Use `null` for an idle arm action.\n" + "- Put only JSON primitives inside kwargs: strings, numbers, booleans, null, arrays, or objects.\n" + "- Do not include `env`, tensors, comments, validation conditions, monitors, errors, or recovery fields.\n" + "- Preserve task order and use both arms on the same edge when they should act simultaneously.\n" + "- Use stable ids such as `v0_start`, `v1_grasped`, `e01_grasp_objects`.\n" + "- Replace `N` with the concrete final step index; do not literally output `vN_done`.\n" + "- The final edge target must equal the `goal` field." + ), + }, + ] + ), + ] + ) + return prompt.invoke(kwargs) diff --git a/embodichain/agents/prompts/atom_actions.txt b/embodichain/gen_sim/action_agent_pipeline/prompts/atom_actions.txt similarity index 86% rename from embodichain/agents/prompts/atom_actions.txt rename to embodichain/gen_sim/action_agent_pipeline/prompts/atom_actions.txt index 257464c5..387d0c01 100644 --- a/embodichain/agents/prompts/atom_actions.txt +++ b/embodichain/gen_sim/action_agent_pipeline/prompts/atom_actions.txt @@ -1,23 +1,22 @@ ### Atom Functions for Robot Arm Control Each atomic function returns a list of joint-space trajectories (list[np.ndarray]). -All functions support an optional argument: +All atom functions are public-only wrappers backed by +`embodichain.lab.sim.atomic_actions.AtomicActionEngine`. +If a public atomic action cannot resolve semantics, solve IK, or plan a +trajectory, it raises an error immediately. Use only the parameters listed below. -force_valid: bool - If True, the system will automatically correct an invalid target pose by - projecting it to the nearest valid pose. Use this option carefully: - enable it only for actions where small spatial deviations are acceptable - and will not compromise task correctness. Default is False. +For `grasp`, semantic grasp is the default public mode. It uses public +`PickUpAction` with `ObjectSemantics(AntipodalAffordance)`; the caller does not +need to pass any public backend flags. Use the following functions exactly as defined. Do not invent new APIs or parameters. "grasp": def grasp(robot_name: str, obj_name: str, pre_grasp_dis: float, **kwargs) -> list[np.ndarray] - Moves the specified arm to the target object’s affordance-based grasp pose and executes a grasp by closing the gripper. + Moves the specified arm to the target object’s public grasp target and executes a grasp by closing the gripper. - The function plans a two-stage trajectory: - (1) from the current pose to a pre-grasp pose offset from the object, and - (2) from the pre-grasp pose to the final grasp pose, followed by gripper closure. + The public PickUpAction plans approach, gripper closing, and post-grasp lift. Upon completion, the gripper is closed and the target object is expected to be stably held by the gripper. @@ -27,7 +26,7 @@ Use the following functions exactly as defined. Do not invent new APIs or parame "place_on_table": def place_on_table(robot_name: str, obj_name: str, x: float, y: float, pre_place_dis: float, **kwargs) -> list[np.ndarray] - Moves the specified robot arm with the target object to the desired [x, y] location on the table and opens the gripper to place the object. + Moves the specified robot arm with the target object to the desired [x, y] location on the table and opens the gripper to place the object using public PlaceAction. The z-coordinate is automatically adjusted based on the table height and the object’s dimensions. This function assumes that the robot is already holding the object and that the task is to place it on the table at the specified coordinates. Remember that when you need to place some objects on the table at specific coordinates, use this function without using other movement atom actions. @@ -124,13 +123,3 @@ Use the following functions exactly as defined. Do not invent new APIs or parame Opens the arm’s gripper using a short (10-step) gripper-only trajectory. Example: open_gripper(robot_name='right_arm') # Opens the right gripper using a 10-step gripper-only trajectory. - -### Drive Function (Trajectory Synchronization) -"drive": - def drive(left_arm_action=None, right_arm_action=None, **kwargs) -> list[torch.Tensor] - Wraps one or two arm trajectories into synchronized full-robot actions. - • If only one arm action is provided, the other arm stays idle. - • If both are provided, they are temporally aligned and executed together. - • The actions are obtained from the output of the above functions. - Example: - drive(left_arm_action=left_actions, right_arm_action=right_actions) \ No newline at end of file diff --git a/embodichain/gen_sim/action_agent_pipeline/prompts/basic_background.txt b/embodichain/gen_sim/action_agent_pipeline/prompts/basic_background.txt new file mode 100644 index 00000000..3a84455e --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompts/basic_background.txt @@ -0,0 +1,20 @@ +The environment uses a right-handed world coordinate system, where 1 unit equals 1 meter. +All robot poses are represented as 4×4 homogeneous transformation matrices. + +The robot base coordinate frame is the ONLY authoritative frame for all spatial reasoning, planning, and action generation. + +ROBOT BASE COORDINATE DEFINITIONS + +All directions below are defined strictly in the robot base frame: + +* Moving forward increases x +* Moving backward decreases x +* Moving left increases y +* Moving right decreases y +* Moving up increases z +* Moving down decreases z + +ROBOT INITIALIZATION AND TERMINATION + +Both robot arms start in predefined initial configurations with their end-effectors open. +At task completion, both arms must be returned to their initial poses. \ No newline at end of file diff --git a/embodichain/gen_sim/action_agent_pipeline/prompts/error_functions.txt b/embodichain/gen_sim/action_agent_pipeline/prompts/error_functions.txt new file mode 100644 index 00000000..254aaa36 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompts/error_functions.txt @@ -0,0 +1,30 @@ +Possible external failure descriptions for recovery spec generation. + +These entries are prompt context only. They describe failures that may happen outside the agent during execution, such as physics randomization, object motion, grasp loss, or an externally disturbed trajectory. Do not output these as executable `error_functions`, do not set probabilities, and do not ask the compiler/runtime to inject them. + +1. `misplaced_object` +Kind: +- External object pose disturbance. + +Purpose: +- A rigid object is shifted away from its expected pose before or during a nominal edge. + +Typical context: +- The target object moved on the table. +- A held or nearby object drifted far enough that the next planned action may miss. + +Useful monitor families: +- Object movement or object pose monitors. + +2. `fallen_object` +Kind: +- External object orientation disturbance. + +Purpose: +- A rigid object is tipped or fallen so its expected grasp, pour, or placement affordance is no longer valid. + +Typical context: +- A cup, bottle, or container falls before grasping or while being manipulated. + +Useful monitor families: +- Object pose, fallen-object, or held-object monitors. diff --git a/embodichain/gen_sim/action_agent_pipeline/prompts/monitor_functions.txt b/embodichain/gen_sim/action_agent_pipeline/prompts/monitor_functions.txt new file mode 100644 index 00000000..d2065203 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompts/monitor_functions.txt @@ -0,0 +1,61 @@ +All monitor functions are designed to be passed into `drive(..., monitor_sequences=[...])`. +Each monitor function returns: +- `True`: the monitor is triggered and the failure condition is detected +- `False`: the monitor is not triggered and the failure condition is not detected + +When wrapping a monitor with `partial`, pass the function object itself, for example: +`partial(monitor_object_moved, obj_name="cup", threshold=0.02)` + +1. `monitor_object_moved` +Purpose: +- Detect whether a rigid object has moved from the last frame by more than a translation threshold. + +Typical failure meanings: +- object was unexpectedly pushed +- object shifted during grasping +- placed object slid away + +Signature: +- `monitor_object_moved(env, obj_name, last_frame_pose=None, threshold=0.01, **kwargs)` + +Key arguments: +- `obj_name`: target rigid object name, such as `"cup"` or `"bottle"` +- `last_frame_pose`: previous-frame pose; if not provided, it falls back to `env.obj_info[obj_name]["pose"]`. Typically, no need to provide +- `threshold`: maximum allowed position change in meters + +Returns `True` when: +- the Euclidean distance between current object position and previous object position is greater than `threshold` + +Recommended use: +- use when the arm attempts to grasp an object, where the object is expected to be stationary +- when operating the arm with the grasped object, do not use this function, as the object movement is expected and does not necessarily indicate a failure; instead, use `monitor_object_held` to detect hold failure + +2. `monitor_object_held` +Purpose: +- Detect whether an object is no longer close enough to the corresponding arm end-effector and should be treated as no longer held. + +Typical failure meanings: +- object slipped out of hand +- grasp was lost +- held object is no longer attached to the arm region + +Signature: +- `monitor_object_held(env, robot_name, obj_name, threshold=0.05, **kwargs)` + +Key arguments: +- `robot_name`: arm identifier containing `"left"` or `"right"` +- `obj_name`: target rigid object name +- `threshold`: maximum allowed distance between object center and the corresponding arm end-effector + +Returns `True` when: +- the object is farther than the threshold distance from the selected arm end-effector +- in other words, grasp loss / hold failure is detected + +Important note: +- the function name is historical, but its return semantics are unified with the other monitor functions +- `True` always means trigger / failure occurred +- for this function specifically, `True` means the object is no longer held successfully + +Recommended use: +- use when the object is already grasped in the arm +- do not use this function when grasping or placing an object, as the object is not yet securely held during the grasping phase \ No newline at end of file diff --git a/embodichain/gen_sim/action_agent_pipeline/prompts/recovery_rules.txt b/embodichain/gen_sim/action_agent_pipeline/prompts/recovery_rules.txt new file mode 100644 index 00000000..2667c709 --- /dev/null +++ b/embodichain/gen_sim/action_agent_pipeline/prompts/recovery_rules.txt @@ -0,0 +1,55 @@ +RecoveryAgent writes a lightweight authoring spec. CompileAgent expands it into +the executable recovery graph. + +Top-level rules: +- Output only `task` and `recovery_bindings`. +- Do not output `recovery_nodes`, `recovery_edges`, `recovery_branches`, Python + code, injected errors, probabilities, comments, or prose. +- The nominal task graph is fixed input. Do not rewrite, reorder, merge, or + delete its nodes or edges. +- Skip an edge when it has no high-value, supported, recoverable failure. + +Binding rules: +- Each binding attaches one recoverable failure policy to one nominal edge. +- `edge_id` must be an existing nominal edge id. +- `failure_name` should be short and stable. +- `monitors` is a list of monitor templates. Supported types are: + - `object_moved`: fields `objects` or `obj_name`, optional `threshold` + - `hold_lost`: fields `robot_name`, `obj_name`, optional `threshold` +- The compiler canonicalizes common aliases and can infer simple fields from + the nominal edge: grasped object names for `object_moved`, both grasp arms for + `regrasp_both`, and single-object grasp targets when unambiguous. +- `recovery` is an ordered list of policy steps. Supported step types are: + - `regrasp`: fields `robot_name`, `obj_name`, optional `pre_grasp_dis` + - `regrasp_both`: field `arms`, mapping robot names to object names, optional + `pre_grasp_dis` + - `retry_failed_edge` + - `replay_edge`: field `edge_id` + - `action`: optional `left_arm_action`, optional `right_arm_action` +- Use `action` only when the named templates are not enough. +- `merge` must be `target` when recovery completes the failed edge state, or + `source` when recovery only restores the failed edge precondition and the + failed edge should be retried. +- Set `repeat_until_success` to true for normal recoverable failures. The + compiler will make generated recovery edges self-monitoring and reusable. + +Monitor guidance: +- All monitor templates use `True` = failure occurred. +- Use `object_moved` when an object is expected to remain still, such as before + or during grasping. +- Use `hold_lost` only when the object should already be held by an arm. +- If different failures need different recovery policies, create separate + bindings instead of mixing them in one binding. + +Recovery guidance: +- Prefer short policies that either complete the failed edge or restore its + precondition. +- Use `regrasp_both` when two arms can re-grasp independently and + simultaneously. +- If one recovery step depends on the physical result of a previous step, put + them as separate ordered policy steps. +- If an object is lost during a step whose source precondition required the + object to be lifted or positioned, recovery must restore that precondition + before completing or retrying the failed step. +- During grasping, do not use `hold_lost`; the object is not guaranteed to be + held until grasp completes. Use `object_moved` for grasp-phase displacement. diff --git a/embodichain/gen_sim/mllm.py b/embodichain/gen_sim/mllm.py new file mode 100644 index 00000000..94382d2e --- /dev/null +++ b/embodichain/gen_sim/mllm.py @@ -0,0 +1,108 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import os +from collections.abc import Mapping +from typing import Any + +from embodichain.gen_sim.simready_pipeline.configs import ( + DEFAULT_LLM_MODEL, + get_openai_compatible_llm_config, +) + +__all__ = [ + "DEFAULT_LLM_MODEL", + "apply_proxy_env", + "create_chat_openai", + "create_openai_client", + "get_openai_compatible_llm_config", +] + + +def apply_proxy_env(proxy_url: str | None) -> None: + """Apply an optional proxy URL for OpenAI-compatible clients.""" + if not proxy_url: + return + os.environ["HTTP_PROXY"] = proxy_url + os.environ["HTTPS_PROXY"] = proxy_url + + +def _resolve_llm_config( + *, + config: Mapping[str, Any] | None, + required: bool, + require_base_url: bool, +) -> dict[str, Any]: + if config is not None: + return dict(config) + return get_openai_compatible_llm_config( + required=required, + require_base_url=require_base_url, + ) + + +def create_openai_client( + *, + config: Mapping[str, Any] | None = None, + required: bool = True, + require_base_url: bool = False, +): + """Create the shared OpenAI-compatible SDK client used by gen-sim MLLM calls.""" + from openai import OpenAI + + cfg = _resolve_llm_config( + config=config, + required=required, + require_base_url=require_base_url, + ) + apply_proxy_env(cfg.get("proxy_url")) + + kwargs: dict[str, Any] = { + "api_key": cfg["api_key"], + "default_query": cfg.get("default_query") or None, + } + if cfg.get("base_url"): + kwargs["base_url"] = cfg["base_url"] + return OpenAI(**kwargs) + + +def create_chat_openai( + *, + temperature: float = 0.0, + model: str | None = None, + config: Mapping[str, Any] | None = None, + required: bool = True, +): + """Create the shared LangChain OpenAI-compatible chat client for agents.""" + from langchain_openai import ChatOpenAI + + cfg = _resolve_llm_config( + config=config, + required=required, + require_base_url=False, + ) + apply_proxy_env(cfg.get("proxy_url")) + + kwargs: dict[str, Any] = { + "temperature": temperature, + "model": model or cfg.get("model") or DEFAULT_LLM_MODEL, + "api_key": cfg["api_key"], + } + if cfg.get("base_url"): + kwargs["base_url"] = cfg["base_url"] + return ChatOpenAI(**kwargs) diff --git a/embodichain/gen_sim/simready_pipeline/configs/__init__.py b/embodichain/gen_sim/simready_pipeline/configs/__init__.py index 015c4151..517a4660 100644 --- a/embodichain/gen_sim/simready_pipeline/configs/__init__.py +++ b/embodichain/gen_sim/simready_pipeline/configs/__init__.py @@ -16,4 +16,18 @@ from __future__ import annotations -__all__: list[str] = [] +from embodichain.gen_sim.simready_pipeline.configs.llm_config import ( + DEFAULT_LLM_MODEL, + GEN_CONFIG_PATH, + LLM_ENV_PATH, + LEGACY_LLM_ENV_PATH, + get_openai_compatible_llm_config, +) + +__all__ = [ + "DEFAULT_LLM_MODEL", + "GEN_CONFIG_PATH", + "LLM_ENV_PATH", + "LEGACY_LLM_ENV_PATH", + "get_openai_compatible_llm_config", +] diff --git a/embodichain/gen_sim/simready_pipeline/configs/llm_config.py b/embodichain/gen_sim/simready_pipeline/configs/llm_config.py new file mode 100644 index 00000000..573ebc88 --- /dev/null +++ b/embodichain/gen_sim/simready_pipeline/configs/llm_config.py @@ -0,0 +1,146 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + +__all__ = [ + "DEFAULT_LLM_MODEL", + "GEN_CONFIG_PATH", + "LLM_ENV_PATH", + "LEGACY_LLM_ENV_PATH", + "get_openai_compatible_llm_config", +] + +DEFAULT_LLM_MODEL = "gpt-4o" +CONFIG_DIR = Path(__file__).resolve().parent +GEN_CONFIG_PATH = CONFIG_DIR / "gen_config.json" +PROJECT_ROOT = next( + ( + parent + for parent in CONFIG_DIR.parents + if (parent / "setup.py").exists() and (parent / "embodichain").exists() + ), + CONFIG_DIR.parents[3], +) +LLM_ENV_PATH = PROJECT_ROOT / ".env" +LEGACY_LLM_ENV_PATH = CONFIG_DIR / ".env" + + +def _load_env_file(path: Path | None = None) -> dict[str, str]: + """Read local KEY=VALUE credentials without overriding shell variables.""" + path = path or LLM_ENV_PATH + if not path.exists(): + return {} + + env_values: dict[str, str] = {} + for raw_line in path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip().strip("\"'") + if key: + env_values[key] = value + return env_values + + +def _load_env_files(paths: tuple[Path, ...] | None = None) -> dict[str, str]: + """Read local env files, with later paths taking precedence.""" + env_values: dict[str, str] = {} + for path in paths or (LEGACY_LLM_ENV_PATH, LLM_ENV_PATH): + env_values.update(_load_env_file(path)) + return env_values + + +def _get_first_value( + local_env: dict[str, str], + *names: str, + default: str | None = None, +) -> str | None: + for name in names: + value = os.getenv(name) + if value: + return value + value = local_env.get(name) + if value: + return value + return default + + +def _load_gen_config(path: Path | None = None) -> dict[str, Any]: + path = path or GEN_CONFIG_PATH + if not path.exists(): + raise FileNotFoundError(f"gen_config.json not found: {path}") + + with path.open("r", encoding="utf-8") as f: + raw_cfg = json.load(f) + return dict(raw_cfg.get("llm", {}).get("openai_compatible", {})) + + +def get_openai_compatible_llm_config( + *, + required: bool = False, + require_base_url: bool = False, + default_model: str = DEFAULT_LLM_MODEL, +) -> dict[str, Any]: + """Return shared OpenAI-compatible LLM config for agents and gen-sim.""" + local_env = _load_env_files() + json_cfg = _load_gen_config() + + cfg = { + "api_key": _get_first_value(local_env, "OPENAI_API_KEY") + or json_cfg.get("api_key", ""), + "model": _get_first_value(local_env, "OPENAI_MODEL", "LLM_MODEL") + or json_cfg.get("model") + or default_model, + "base_url": _get_first_value( + local_env, + "OPENAI_BASE_URL", + "OPENAI_API_BASE", + "LLM_URL", + ) + or json_cfg.get("base_url", ""), + "default_query": json_cfg.get("default_query", {}) or {}, + "proxy_url": _get_first_value( + local_env, + "EMBODICHAIN_LLM_PROXY", + "LLM_PROXY_URL", + ) + or json_cfg.get("proxy_url", ""), + } + + if cfg["base_url"]: + cfg["base_url"] = cfg["base_url"].rstrip("/") + + if required: + required_keys = ["api_key", "model"] + if require_base_url: + required_keys.append("base_url") + missing = [key for key in required_keys if not cfg.get(key)] + if missing: + raise ValueError( + f"Missing required LLM config keys: {missing}. " + f"Set them in shell environment variables, {LLM_ENV_PATH}, " + f"{LEGACY_LLM_ENV_PATH}, or {GEN_CONFIG_PATH}." + ) + + return cfg diff --git a/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py index 73db0874..c5ee0ac3 100644 --- a/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py +++ b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py @@ -17,52 +17,26 @@ import argparse import base64 import json -import os import re from pathlib import Path import numpy as np import trimesh import pyrender from PIL import Image -from openai import OpenAI import itertools from scipy.spatial import ConvexHull from typing import Dict, Any, List +from embodichain.gen_sim.mllm import ( + create_openai_client, + get_openai_compatible_llm_config, +) -def _load_gen_config() -> Dict[str, Any]: - config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json" - if not config_path.exists(): - raise FileNotFoundError(f"gen_config.json not found: {config_path}") - - with config_path.open("r", encoding="utf-8") as f: - raw_cfg = json.load(f) - - cfg = raw_cfg.get("llm", {}).get("openai_compatible", {}) - cfg["api_key"] = os.getenv("OPENAI_API_KEY") or cfg.get("api_key", "") - cfg["model"] = os.getenv("OPENAI_MODEL") or cfg.get("model", "") - cfg["base_url"] = os.getenv("OPENAI_BASE_URL") or cfg.get("base_url", "") - cfg["default_query"] = cfg.get("default_query", {}) - if cfg["base_url"]: - cfg["base_url"] = cfg["base_url"].rstrip("/") - - required = ["api_key", "model", "base_url"] - missing = [k for k in required if k not in cfg or not cfg[k]] - if missing: - raise ValueError(f"Missing required config keys: {missing}") - - return cfg - - -_GEN_CONFIG = _load_gen_config() +_GEN_CONFIG = get_openai_compatible_llm_config(required=True) DEPLOYMENT = _GEN_CONFIG["model"] -client = OpenAI( - api_key=_GEN_CONFIG["api_key"], - base_url=_GEN_CONFIG["base_url"], - default_query=_GEN_CONFIG.get("default_query") or None, -) +client = create_openai_client(config=_GEN_CONFIG) STRATEGY = None diff --git a/embodichain/lab/gym/envs/tasks/__init__.py b/embodichain/lab/gym/envs/tasks/__init__.py index 56da5a8c..61c28169 100644 --- a/embodichain/lab/gym/envs/tasks/__init__.py +++ b/embodichain/lab/gym/envs/tasks/__init__.py @@ -19,7 +19,6 @@ # Tableware task environments from embodichain.lab.gym.envs.tasks.tableware.pour_water.pour_water import ( PourWaterEnv, - PourWaterAgentEnv, ) from embodichain.lab.gym.envs.tasks.tableware.scoop_ice import ScoopIce from embodichain.lab.gym.envs.tasks.tableware.stack_blocks_two import StackBlocksTwoEnv @@ -38,8 +37,16 @@ from embodichain.lab.gym.envs.tasks.tableware.match_object_container import ( MatchObjectContainerEnv, ) +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.atomic_actions import ( + AtomicActionsAgentEnv, +) from embodichain.lab.gym.envs.tasks.tableware.rearrangement import ( RearrangementEnv, +) +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.pour_water_agent_env import ( + PourWaterAgentEnv, +) +from embodichain.gen_sim.action_agent_pipeline.env_adapters.tableware.rearrangement_agent_env import ( RearrangementAgentEnv, ) @@ -60,6 +67,7 @@ "PlaceObjectDrawerEnv", "StackCupsEnv", "MatchObjectContainerEnv", + "AtomicActionsAgentEnv", "RearrangementEnv", "RearrangementAgentEnv", "PushCubeEnv", diff --git a/embodichain/lab/gym/envs/tasks/tableware/base_agent_env.py b/embodichain/lab/gym/envs/tasks/tableware/base_agent_env.py deleted file mode 100644 index b6662dc3..00000000 --- a/embodichain/lab/gym/envs/tasks/tableware/base_agent_env.py +++ /dev/null @@ -1,202 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -import torch -from embodichain.utils import logger - - -class BaseAgentEnv: - - def _init_agents(self, agent_config, task_name, agent_config_path=None): - from embodichain.agents.hierarchy.task_agent import TaskAgent - from embodichain.agents.hierarchy.code_agent import CodeAgent - from embodichain.agents.hierarchy.validation_agent import ValidationAgent - from embodichain.agents.hierarchy.llm import ( - task_llm, - code_llm, - validation_llm, - ) - - if agent_config.get("TaskAgent") is not None: - self.task_agent = TaskAgent( - task_llm, - **agent_config["Agent"], - **agent_config["TaskAgent"], - task_name=task_name, - config_dir=agent_config_path, - ) - self.code_agent = CodeAgent( - code_llm, - **agent_config["Agent"], - **agent_config.get("CodeAgent"), - task_name=task_name, - config_dir=agent_config_path, - ) - self.validation_agent = ValidationAgent( - validation_llm, - task_name=task_name, - task_description=self.code_agent.prompt_kwargs.get("task_prompt")[ - "content" - ], - basic_background=self.code_agent.prompt_kwargs.get("basic_background")[ - "content" - ], - atom_actions=self.code_agent.prompt_kwargs.get("atom_actions")["content"], - ) - - def get_states(self): - # TODO: only support num_env = 1 for now - # store robot states in each env.reset - self.init_qpos = self.robot.get_qpos().squeeze(0) - - self.left_arm_joints = self.robot.get_joint_ids(name="left_arm") - self.right_arm_joints = self.robot.get_joint_ids(name="right_arm") - self.left_eef_joints = self.robot.get_joint_ids(name="left_eef") - self.right_eef_joints = self.robot.get_joint_ids(name="right_eef") - - self.left_arm_init_qpos = self.init_qpos[self.left_arm_joints] - self.right_arm_init_qpos = self.init_qpos[self.right_arm_joints] - - self.left_arm_init_xpos = self.robot.compute_fk( - self.left_arm_init_qpos, name="left_arm", to_matrix=True - ).squeeze(0) - self.right_arm_init_xpos = self.robot.compute_fk( - self.right_arm_init_qpos, name="right_arm", to_matrix=True - ).squeeze(0) - - self.left_arm_current_qpos = self.left_arm_init_qpos - self.right_arm_current_qpos = self.right_arm_init_qpos - - self.left_arm_current_xpos = self.left_arm_init_xpos - self.right_arm_current_xpos = self.right_arm_init_xpos - - self.left_arm_base_pose = self.robot.get_control_part_base_pose( - "left_arm", to_matrix=True - ).squeeze(0) - self.right_arm_base_pose = self.robot.get_control_part_base_pose( - "right_arm", to_matrix=True - ).squeeze(0) - - self.open_state = torch.tensor([0.05]) - self.close_state = torch.tensor([0.0]) - self.left_arm_current_gripper_state = self.open_state - self.right_arm_current_gripper_state = self.open_state - - # store some useful obj information - init_obj_info = {} - obj_uids = self.sim.get_rigid_object_uid_list() - for obj_name in obj_uids: - obj = self.sim.get_rigid_object(obj_name) - obj_pose = obj.get_local_pose(to_matrix=True).squeeze(0) - obj_height = obj_pose[2, 3] # Extract the height (z-coordinate) - obj_grasp_pose = self.affordance_datas.get( - f"{obj_name}_grasp_pose_object", None - ) - init_obj_info[obj_name] = { - "pose": obj_pose, # Store the full pose (4x4 matrix) - "height": obj_height, # Store the height (z-coordinate) - "grasp_pose_obj": ( - obj_grasp_pose.squeeze(0) if obj_grasp_pose is not None else None - ), # Store the grasp pose if available - } - self.init_obj_info = init_obj_info - - # -------------------- Common getters / setters -------------------- - - def get_obs_for_agent(self): - obs = self.get_obs() - rgb = obs["sensor"]["cam_high"]["color"].squeeze(0) - - # Get validation camera data - camera_data = self.event_manager.get_functor("validation_cameras")(self, None) - result = {"rgb": rgb} - result.update({k: v.squeeze(0) for k, v in camera_data.items()}) - return result - - def get_current_qpos_agent(self): - return self.left_arm_current_qpos, self.right_arm_current_qpos - - def set_current_qpos_agent(self, arm_qpos, is_left): - if is_left: - self.left_arm_current_qpos = arm_qpos - else: - self.right_arm_current_qpos = arm_qpos - - def get_current_xpos_agent(self): - return self.left_arm_current_xpos, self.right_arm_current_xpos - - def set_current_xpos_agent(self, arm_xpos, is_left): - if is_left: - self.left_arm_current_xpos = arm_xpos - else: - self.right_arm_current_xpos = arm_xpos - - def get_current_gripper_state_agent(self): - return self.left_arm_current_gripper_state, self.right_arm_current_gripper_state - - def set_current_gripper_state_agent(self, arm_gripper_state, is_left): - if is_left: - self.left_arm_current_gripper_state = arm_gripper_state - else: - self.right_arm_current_gripper_state = arm_gripper_state - - # -------------------- IK / FK -------------------- - def get_arm_ik(self, target_xpos, is_left, qpos_seed=None): - control_part = "left_arm" if is_left else "right_arm" - ret, qpos = self.robot.compute_ik( - name=control_part, pose=target_xpos, joint_seed=qpos_seed - ) - return ret.all().item(), qpos.squeeze(0) - - def get_arm_fk(self, qpos, is_left): - control_part = "left_arm" if is_left else "right_arm" - xpos = self.robot.compute_fk( - name=control_part, qpos=torch.as_tensor(qpos), to_matrix=True - ) - return xpos.squeeze(0) - - # -------------------- get only code for action list -------------------- - def generate_code_for_actions(self, regenerate=False, **kwargs): - logger.log_info( - f"Generate code for creating action list for {self.code_agent.task_name}.", - color="green", - ) - - # Task planning - print(f"\033[92m\nStart task planning.\n\033[0m") - - task_agent_input = self.task_agent.get_composed_observations( - env=self, regenerate=regenerate, **kwargs - ) - task_plan = self.task_agent.generate(**task_agent_input) - - # Code generation - print(f"\033[94m\nStart code generation.\n\033[0m") - code_agent_input = self.code_agent.get_composed_observations( - env=self, regenerate=regenerate, **kwargs - ) - code_agent_input["task_plan"] = task_plan - - code_file_path, kwargs, code = self.code_agent.generate(**code_agent_input) - return code_file_path, kwargs, code - - # -------------------- get action list -------------------- - def create_demo_action_list(self, regenerate=False, *args, **kwargs): - code_file_path, kwargs, _ = self.generate_code_for_actions( - regenerate=regenerate - ) - action_list = self.code_agent.act(code_file_path, **kwargs) - return action_list diff --git a/embodichain/lab/gym/envs/tasks/tableware/configurable_success.py b/embodichain/lab/gym/envs/tasks/tableware/configurable_success.py new file mode 100644 index 00000000..0afa8490 --- /dev/null +++ b/embodichain/lab/gym/envs/tasks/tableware/configurable_success.py @@ -0,0 +1,248 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from typing import Any + +import torch + +__all__ = ["evaluate_configured_success"] + + +def evaluate_configured_success( + env, spec: Mapping[str, Any] | None = None +) -> torch.Tensor: + """Evaluate a config-defined task success predicate.""" + + success_spec = spec or getattr(env, "agent_success", None) + if success_spec is None: + success_spec = _legacy_success_spec(env) + if success_spec is None: + return _constant(env, False) + return _evaluate_spec(env, success_spec) + + +def _legacy_success_spec(env) -> Mapping[str, Any] | None: + object_name = getattr(env, "agent_success_object", None) + container_name = getattr(env, "agent_success_container", None) + if object_name is not None and container_name is not None: + return { + "type": "object_in_container", + "object": object_name, + "container": container_name, + "radius": getattr(env, "container_success_radius", 0.1), + "min_z_offset": getattr(env, "container_success_min_z_offset", -0.03), + "max_z_offset": getattr(env, "container_success_max_z_offset", 0.25), + } + + target_position = getattr(env, "agent_success_position", None) + if object_name is not None and target_position is not None: + return { + "type": "object_position_near", + "object": object_name, + "target_position": target_position, + "tolerance": getattr(env, "agent_success_tolerance", 0.05), + } + + target_xy = getattr(env, "target_xy", None) + if target_xy is not None: + return { + "type": "object_xy_near", + "object": getattr(env, "success_object", "cup"), + "target_xy": target_xy, + "tolerance": getattr(env, "success_tolerance", 0.04), + } + + return None + + +def _evaluate_spec( + env, spec: Mapping[str, Any] | Sequence[Mapping[str, Any]] +) -> torch.Tensor: + if isinstance(spec, Sequence) and not isinstance(spec, (str, bytes, Mapping)): + return _evaluate_all(env, spec) + if not isinstance(spec, Mapping): + raise TypeError( + f"Success spec must be a mapping or sequence, got {type(spec)}." + ) + + op = str(spec.get("op", "")).lower() + if not op and "terms" in spec and "type" not in spec and "func" not in spec: + op = "all" + + if op in {"all", "and"}: + return _evaluate_all(env, spec.get("terms", [])) + if op in {"any", "or"}: + return _evaluate_any(env, spec.get("terms", [])) + if op == "not": + term = spec.get("term", None) + if term is None: + terms = spec.get("terms", []) + if len(terms) != 1: + raise ValueError("Success op='not' requires exactly one term.") + term = terms[0] + return ~_evaluate_spec(env, term) + + term_type = str(spec.get("type", spec.get("func", ""))).lower() + if term_type in {"object_position_near", "object_near_position"}: + return _object_position_near(env, spec) + if term_type in {"object_xy_near", "object_near_xy"}: + return _object_xy_near(env, spec) + if term_type == "object_in_container": + return _object_in_container(env, spec) + if term_type in {"object_not_fallen", "not_fallen"}: + return _object_not_fallen(env, spec) + if term_type in {"object_axis_offset_near", "object_relative_axis_near"}: + return _object_axis_offset_near(env, spec) + if term_type in {"object_axis_near", "object_coordinate_near"}: + return _object_axis_near(env, spec) + if term_type in {"object_lifted", "object_height_above_initial"}: + return _object_lifted(env, spec) + + raise ValueError(f"Unsupported success term type: {term_type!r}.") + + +def _evaluate_all(env, terms: Sequence[Mapping[str, Any]]) -> torch.Tensor: + success = _constant(env, True) + for term in terms: + success = success & _evaluate_spec(env, term) + return success + + +def _evaluate_any(env, terms: Sequence[Mapping[str, Any]]) -> torch.Tensor: + success = _constant(env, False) + for term in terms: + success = success | _evaluate_spec(env, term) + return success + + +def _constant(env, value: bool) -> torch.Tensor: + return torch.full((env.num_envs,), value, dtype=torch.bool, device=env.device) + + +def _pose(env, uid: str) -> torch.Tensor: + return env.sim.get_rigid_object(uid).get_local_pose(to_matrix=True) + + +def _position(env, uid: str) -> torch.Tensor: + return _pose(env, uid)[:, :3, 3] + + +def _tensor(value: Any, *, dtype: torch.dtype, device: torch.device) -> torch.Tensor: + return torch.as_tensor(value, dtype=dtype, device=device) + + +def _object_name(spec: Mapping[str, Any]) -> str: + return str(spec.get("object", spec.get("object_uid"))) + + +def _object_position_near(env, spec: Mapping[str, Any]) -> torch.Tensor: + position = _position(env, _object_name(spec)) + target = _tensor( + spec.get("target_position", spec.get("position", spec.get("target"))), + dtype=position.dtype, + device=position.device, + ).flatten() + if target.numel() == 2: + return _object_xy_near(env, {**spec, "target_xy": target}) + target = target.reshape(1, 3) + tolerance = float(spec.get("tolerance", 0.05)) + return torch.linalg.norm(position - target, dim=-1) <= tolerance + + +def _object_xy_near(env, spec: Mapping[str, Any]) -> torch.Tensor: + position = _position(env, _object_name(spec)) + target_xy = _tensor( + spec.get("target_xy", spec.get("xy", spec.get("target"))), + dtype=position.dtype, + device=position.device, + ).flatten()[:2] + tolerance = float(spec.get("tolerance", spec.get("xy_tolerance", 0.05))) + return ( + torch.linalg.norm(position[:, :2] - target_xy.reshape(1, 2), dim=-1) + <= tolerance + ) + + +def _object_in_container(env, spec: Mapping[str, Any]) -> torch.Tensor: + object_position = _position(env, _object_name(spec)) + container_position = _position( + env, str(spec.get("container", spec.get("container_uid"))) + ) + radius = float(spec.get("xy_radius", spec.get("radius", 0.1))) + min_z_offset = float(spec.get("min_z_offset", -0.03)) + max_z_offset = float(spec.get("max_z_offset", 0.25)) + + xy_distance = torch.linalg.norm( + object_position[:, :2] - container_position[:, :2], dim=-1 + ) + z_offset = object_position[:, 2] - container_position[:, 2] + return ( + (xy_distance <= radius) + & (z_offset >= min_z_offset) + & (z_offset <= max_z_offset) + ) + + +def _object_not_fallen(env, spec: Mapping[str, Any]) -> torch.Tensor: + pose = _pose(env, _object_name(spec)) + pose_z_axis = pose[:, :3, 2] + world_z_axis = torch.tensor([0, 0, 1], dtype=pose.dtype, device=pose.device) + dot_product = torch.sum(pose_z_axis * world_z_axis, dim=-1).clamp(-1.0, 1.0) + angle = torch.arccos(dot_product) + return angle < float(spec.get("max_tilt", torch.pi / 4)) + + +def _object_axis_offset_near(env, spec: Mapping[str, Any]) -> torch.Tensor: + object_position = _position(env, _object_name(spec)) + reference_position = _position( + env, str(spec.get("reference", spec.get("reference_uid"))) + ) + axis = _axis_index(str(spec.get("axis", "y"))) + target_value = reference_position[:, axis] + float(spec.get("offset", 0.0)) + tolerance = float(spec.get("tolerance", 0.02)) + return torch.abs(object_position[:, axis] - target_value) <= tolerance + + +def _object_axis_near(env, spec: Mapping[str, Any]) -> torch.Tensor: + object_position = _position(env, _object_name(spec)) + axis = _axis_index(str(spec.get("axis", "y"))) + target_value = float(spec.get("target", spec.get("value"))) + tolerance = float(spec.get("tolerance", 0.02)) + return torch.abs(object_position[:, axis] - target_value) <= tolerance + + +def _object_lifted(env, spec: Mapping[str, Any]) -> torch.Tensor: + object_name = _object_name(spec) + position = _position(env, object_name) + initial_height = spec.get("initial_height") + if initial_height is None: + initial_height = getattr(env, "obj_info", {}).get(object_name, {}).get("height") + if initial_height is None: + initial_height = position[:, 2] + initial_height = _tensor( + initial_height, dtype=position.dtype, device=position.device + ) + return position[:, 2] >= initial_height + float(spec.get("min_height", 0.1)) + + +def _axis_index(axis: str) -> int: + axes = {"x": 0, "y": 1, "z": 2} + if axis not in axes: + raise ValueError(f"Unsupported axis {axis!r}; expected one of x, y, z.") + return axes[axis] diff --git a/embodichain/lab/gym/envs/tasks/tableware/pour_water/pour_water.py b/embodichain/lab/gym/envs/tasks/tableware/pour_water/pour_water.py index 83e356bf..a1afc922 100644 --- a/embodichain/lab/gym/envs/tasks/tableware/pour_water/pour_water.py +++ b/embodichain/lab/gym/envs/tasks/tableware/pour_water/pour_water.py @@ -15,18 +15,16 @@ # ---------------------------------------------------------------------------- import torch -from typing import Dict, Optional from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg from embodichain.lab.gym.utils.registration import register_env from embodichain.utils import logger -from embodichain.lab.gym.envs.tasks.tableware.base_agent_env import BaseAgentEnv from embodichain.lab.gym.envs.tasks.tableware.pour_water.action_bank import ( PourWaterActionBank, ) -__all__ = ["PourWaterEnv", "PourWaterAgentEnv"] +__all__ = ["PourWaterEnv"] @register_env("PourWater-v3", max_episode_steps=600) @@ -147,15 +145,3 @@ def _is_fall(self, pose: torch.Tensor) -> torch.Tensor: # Compute angle and check if fallen angle = torch.arccos(dot_product) return angle >= torch.pi / 4 - - -@register_env("PourWaterAgent-v3", max_episode_steps=600) -class PourWaterAgentEnv(BaseAgentEnv, PourWaterEnv): - def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs): - super().__init__(cfg, **kwargs) - super()._init_agents(**kwargs) - - def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None): - obs, info = super().reset(seed=seed, options=options) - super().get_states() - return obs, info diff --git a/embodichain/lab/gym/envs/tasks/tableware/rearrangement.py b/embodichain/lab/gym/envs/tasks/tableware/rearrangement.py index 7f9559ca..b20e8a7a 100644 --- a/embodichain/lab/gym/envs/tasks/tableware/rearrangement.py +++ b/embodichain/lab/gym/envs/tasks/tableware/rearrangement.py @@ -4,12 +4,10 @@ # All rights reserved. # ---------------------------------------------------------------------------- -from typing import Dict, Optional from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg from embodichain.lab.gym.utils.registration import register_env -from embodichain.lab.gym.envs.tasks.tableware.base_agent_env import BaseAgentEnv -__all__ = ["RearrangementEnv", "RearrangementAgentEnv"] +__all__ = ["RearrangementEnv"] @register_env("Rearrangement-v3", max_episode_steps=600) @@ -53,38 +51,3 @@ def is_task_success(self) -> bool: or abs(fork_x - fork_place_target_x) > tolerance or abs(fork_y - fork_place_target_y) > tolerance ) - - -@register_env("RearrangementAgent-v3", max_episode_steps=600) -class RearrangementAgentEnv(BaseAgentEnv, RearrangementEnv): - def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs): - super().__init__(cfg, **kwargs) - super()._init_agents(**kwargs) - - def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None): - obs, info = super().reset(seed=seed, options=options) - super().get_states() - return obs, info - - def is_task_success(self): - fork = self.sim.get_rigid_object("fork") - spoon = self.sim.get_rigid_object("spoon") - plate = self.sim.get_rigid_object("plate") - - plate_pose = plate.get_local_pose(to_matrix=True) - spoon_place_target_y = plate_pose[0, 1, 3] - 0.16 - fork_place_target_y = plate_pose[0, 1, 3] + 0.16 - - spoon_pose = spoon.get_local_pose(to_matrix=True) - spoon_y = spoon_pose[0, 1, 3] - - fork_pose = fork.get_local_pose(to_matrix=True) - fork_y = fork_pose[0, 1, 3] - - tolerance = self.metadata.get("success_params", {}).get("tolerance", 0.02) - - # spoon and fork should with the y range of tolerance related to plate. - return ( - abs(spoon_y - spoon_place_target_y) <= tolerance - and abs(fork_y - fork_place_target_y) <= tolerance - ) diff --git a/embodichain/lab/gym/utils/misc.py b/embodichain/lab/gym/utils/misc.py index de02b392..88c2dcce 100644 --- a/embodichain/lab/gym/utils/misc.py +++ b/embodichain/lab/gym/utils/misc.py @@ -285,6 +285,24 @@ def get_replaced_pose( return pose_to_change +def apply_rotation(pose, axis, angle_deg): + rot3 = R.from_euler(axis, angle_deg, degrees=True).as_matrix() # (3,3) + + # numpy + if isinstance(pose, np.ndarray): + rot4 = np.eye(4, dtype=pose.dtype) + rot4[:3, :3] = rot3.astype(pose.dtype, copy=False) + return pose @ rot4 + + # torch + if torch.is_tensor(pose): + rot4 = torch.eye(4, dtype=pose.dtype, device=pose.device) + rot4[:3, :3] = torch.tensor(rot3, dtype=pose.dtype, device=pose.device) + return pose @ rot4 + + raise TypeError(f"pose must be np.ndarray or torch.Tensor, got {type(pose)}") + + def get_offset_pose( pose_to_change: np.ndarray, offset_value: Union[float, List[float]], diff --git a/embodichain/lab/scripts/run_env.py b/embodichain/lab/scripts/run_env.py index 50ff124f..8700cce1 100644 --- a/embodichain/lab/scripts/run_env.py +++ b/embodichain/lab/scripts/run_env.py @@ -28,6 +28,27 @@ from embodichain.utils.logger import log_warning, log_info, log_error +def _log_task_success(env) -> bool | None: + try: + success_fn = ( + env.get_wrapper_attr("is_task_success") + if hasattr(env, "get_wrapper_attr") + else env.is_task_success + ) + success = success_fn() + except Exception as exc: + log_warning(f"Failed to evaluate task success after execution: {exc}") + return None + + if isinstance(success, torch.Tensor): + success_value = bool(success.detach().cpu().flatten().all().item()) + else: + success_value = bool(np.asarray(success).flatten().all()) + + log_info(f"Task success after execution: {success_value}", color="green") + return success_value + + def generate_and_execute_action_list(env, idx, debug_mode, **kwargs): action_list = env.get_wrapper_attr("create_demo_action_list")( @@ -38,6 +59,11 @@ def generate_and_execute_action_list(env, idx, debug_mode, **kwargs): log_warning("Action is invalid. Skip to next generation.") return False + if getattr(action_list, "already_executed", False): + log_info("Action list was already executed by the agent runtime.") + _log_task_success(env) + return True + for action in tqdm.tqdm( action_list, desc=f"Executing action list #{idx}", unit="step" ): @@ -48,6 +74,7 @@ def generate_and_execute_action_list(env, idx, debug_mode, **kwargs): # TODO: We may assume in export demonstration rollout, there is no truncation from the env. # but truncation is useful to improve the generation efficiency. + _log_task_success(env) return True @@ -113,14 +140,22 @@ def main(args, env, gym_config): # TODO: Support multiple trajectories per episode generation. num_traj = 1 for i in range(gym_config.get("max_episodes", 1)): + runtime_kwargs = { + "save_path": getattr(args, "save_path", ""), + "save_video": getattr(args, "save_video", False), + "debug_mode": getattr(args, "debug_mode", False), + "regenerate": getattr(args, "regenerate", False), + "recovery": getattr(args, "recovery", False), + "interactive_error_injection": getattr( + args, "interactive_error_injection", False + ), + } + generate_function( env, num_traj, i, - save_path=getattr(args, "save_path", ""), - save_video=getattr(args, "save_video", False), - debug_mode=getattr(args, "debug_mode", False), - regenerate=getattr(args, "regenerate", False), + **runtime_kwargs, ) # Final reset. diff --git a/embodichain/lab/sim/atom_actions.py b/embodichain/lab/sim/atom_actions.py deleted file mode 100644 index 2abefea9..00000000 --- a/embodichain/lab/sim/atom_actions.py +++ /dev/null @@ -1,948 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -import numpy as np -from embodichain.utils.logger import log_info, log_warning, log_error -from copy import deepcopy -from embodichain.lab.gym.utils.misc import ( - mul_linear_expand, - get_rotation_replaced_pose, -) -from embodichain.utils.math import get_offset_pose -import torch -from tqdm import tqdm -from scipy.spatial.transform import Rotation as R -from embodichain.utils.utility import encode_image - -# Import utility functions for atom actions -from embodichain.lab.sim.utility.atom_action_utils import ( - draw_axis, - get_arm_states, - find_nearest_valid_pose, - get_qpos, - plan_trajectory, - plan_gripper_trajectory, - finalize_actions, - extract_drive_calls, -) - -""" ---------------------------------------------Atom action functions---------------------------------------------------- ---------------------------------------------Atom action functions---------------------------------------------------- ---------------------------------------------Atom action functions---------------------------------------------------- -""" - - -# TODO: write a move_to_pose atom action, the use this action to form other atom actions -def grasp( - robot_name: str, - obj_name: str, - pre_grasp_dis: float = 0.05, - env=None, - force_valid=False, - **kwargs, -): - # Get target object - obj_uids = env.sim.get_rigid_object_uid_list() - if obj_name in obj_uids: - target_obj = env.sim.get_rigid_object(obj_name) - else: - log_error(f"No matched object {obj_uids}.") - target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0) - - # Open the gripper if currently closed - actions = None - select_arm_current_gripper_state = ( - env.left_arm_current_gripper_state - if "left" in robot_name - else env.right_arm_current_gripper_state - ) - if select_arm_current_gripper_state <= env.open_state - 0.01: - actions = open_gripper(robot_name, env, **kwargs) - - # Retract the end-effector to avoid collision - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - select_arm_base_pose = ( - env.left_arm_base_pose if is_left else env.right_arm_base_pose - ) - base_to_eef_xy_dis = torch.norm( - select_arm_base_pose[:2, 3] - select_arm_current_pose[:2, 3] - ) - base_to_obj_xy_dis = torch.norm( - select_arm_base_pose[:2, 3] - target_obj_pose[:2, 3] - ) - dis_eps = kwargs.get("dis_eps", 0.05) - select_arm_init_pose = ( - env.left_arm_init_xpos if is_left else env.right_arm_init_xpos - ) - if base_to_eef_xy_dis > base_to_obj_xy_dis and not torch.allclose( - select_arm_current_pose, select_arm_init_pose, rtol=1e-5, atol=1e-8 - ): - delta = base_to_eef_xy_dis - (base_to_obj_xy_dis - dis_eps) - back_actions = move_by_relative_offset( - robot_name=robot_name, - dx=0.0, - dy=0.0, - dz=-delta, - env=env, - force_valid=force_valid, - mode="intrinsic", - sample_num=15, - **kwargs, - ) - actions = ( - np.concatenate([actions, back_actions], axis=0) - if actions is not None - else back_actions - ) - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Pose ---------------------------------------- - # Move the end-effector to a good place for starting grasping to avoid bad poses - select_arm_retract_pose = deepcopy( - env.left_arm_init_xpos if is_left else env.right_arm_init_xpos - ) - select_arm_retract_pose = get_offset_pose( - select_arm_retract_pose, 0.15, "z", "intrinsic" - ) - select_arm_retract_qpos = get_qpos( - env, - is_left, - select_arm, - select_arm_retract_pose, - env.left_arm_init_qpos if is_left else env.right_arm_init_qpos, - force_valid=force_valid, - name="retract_to_good_pose", - ) - qpos_list_back_to_retract = [select_arm_current_qpos, select_arm_retract_qpos] - sample_num = 30 - - plan_trajectory( - env, - select_arm, - qpos_list_back_to_retract, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - select_arm_current_qpos = select_arm_retract_qpos - select_arm_current_pose = select_arm_retract_pose - - # Rotate the arm base to face the object for better grasping - delta_xy = target_obj_pose[:2, 3] - select_arm_base_pose[:2, 3] - dx, dy = delta_xy[0], delta_xy[1] - aim_horizontal_angle = np.arctan2(dy, dx) - select_arm_aim_qpos = deepcopy(select_arm_current_qpos) - select_arm_aim_qpos[0] = aim_horizontal_angle - - # Get best grasp pose from affordance data - grasp_pose_object = env.init_obj_info.get(obj_name)["grasp_pose_obj"] - if ( - grasp_pose_object[0, 2] > 0.5 - ): # whether towards x direction TODO: make it robust - # Align the object pose's z-axis with the arm's aiming direction - target_obj_pose = torch.tensor( - get_rotation_replaced_pose( - np.array(target_obj_pose), - float(select_arm_aim_qpos[0]), - "z", - "intrinsic", - ) - ) - best_pickpose = target_obj_pose @ grasp_pose_object - grasp_pose = deepcopy(best_pickpose) - grasp_pose_pre1 = deepcopy(grasp_pose) - grasp_pose_pre1 = get_offset_pose(grasp_pose_pre1, -pre_grasp_dis, "z", "intrinsic") - - # Solve IK for pre-grasp and grasp poses - grasp_qpos_pre1 = get_qpos( - env, - is_left, - select_arm, - grasp_pose_pre1, - select_arm_aim_qpos, - force_valid=force_valid, - name="grasp pre1", - ) - grasp_qpos = get_qpos( - env, - is_left, - select_arm, - grasp_pose, - grasp_qpos_pre1, - force_valid=force_valid, - name="grasp", - ) - - # Update env state to final grasp pose - env.set_current_qpos_agent(grasp_qpos, is_left=is_left) - env.set_current_xpos_agent(grasp_pose, is_left=is_left) - - # ------------------------------------ Traj 0: init → aim ------------------------------------ - qpos_list_init_to_aim = [select_arm_current_qpos, select_arm_aim_qpos] - # base_sample_num = 10 - # base_angle = 0.08 - # sample_num = max(int(delta_angle / base_angle * base_sample_num), 2) - - sample_num = 10 - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_aim, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ------------------------------------ Traj 1: aim → pre-grasp ------------------------------------ - qpos_list_aim_to_pre1 = [select_arm_aim_qpos, grasp_qpos_pre1] - sample_num = kwargs.get("sample_num", 30) - - plan_trajectory( - env, - select_arm, - qpos_list_aim_to_pre1, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ------------------------------------ Traj 2: pre-grasp → grasp ------------------------------------ - qpos_list_pre1_to_grasp = [grasp_qpos_pre1, grasp_qpos] - sample_num = kwargs.get("sample_num", 20) - - plan_trajectory( - env, - select_arm, - qpos_list_pre1_to_grasp, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - traj_actions = finalize_actions(select_qpos_traj, ee_state_list_select) - actions = ( - traj_actions - if actions is None - else np.concatenate([actions, traj_actions], axis=0) - ) - - # ------------------------------------ Close gripper ------------------------------------ - close_gripper_actions = close_gripper(robot_name, env, **kwargs) - actions = np.concatenate([actions, close_gripper_actions], axis=0) - - log_info( - f"Total generated trajectory number for grasp: {len(actions)}.", color="green" - ) - - return actions - - -def place_on_table( - robot_name: str, - obj_name: str, - x: float = None, - y: float = None, - pre_place_dis: float = 0.08, - env=None, - force_valid=False, - **kwargs, -): - - init_obj_height = env.init_obj_info.get(obj_name).get("height") - height = init_obj_height + kwargs.get("eps", 0.03) - - traj_actions = move_to_absolute_position( - robot_name, x=x, y=y, z=height, env=env, force_valid=force_valid, **kwargs - ) - open_actions = open_gripper(robot_name, env, **kwargs) - - actions = np.concatenate([traj_actions, open_actions], axis=0) - - log_info( - f"Total generated trajectory number for place on table: {len(actions)}.", - color="green", - ) - - return actions - - -def move_relative_to_object( - robot_name: str, - obj_name: str, - x_offset: float = 0, - y_offset: float = 0, - z_offset: float = 0, - env=None, - force_valid=False, - **kwargs, -): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Pose ---------------------------------------- - # Resolve target object - obj_uids = env.sim.get_rigid_object_uid_list() - if obj_name in obj_uids: - target_obj = env.sim.get_rigid_object(obj_name) - else: - log_error("No matched object.") - - # Get object base pose (4x4 matrix) - target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0) - - # Construct target pose (preserve orientation) - move_target_pose = deepcopy(select_arm_current_pose) - move_target_pose[:3, 3] = target_obj_pose[:3, 3] - move_target_pose[0, 3] += x_offset - move_target_pose[1, 3] += y_offset - move_target_pose[2, 3] += z_offset - - # Solve IK for target pose - move_target_qpos = get_qpos( - env, - is_left, - select_arm, - move_target_pose, - select_arm_current_qpos, - force_valid=force_valid, - name="move relative to object", - ) - - # Update env states - env.set_current_qpos_agent(move_target_qpos, is_left=is_left) - env.set_current_xpos_agent(move_target_pose, is_left=is_left) - - # ------------------------------------ Traj 1: init → target ------------------------------------ - qpos_list_init_to_target = [select_arm_current_qpos, move_target_qpos] - sample_num = kwargs.get("sample_num", 30) - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_target, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for move relative to object: {len(actions)}.", - color="green", - ) - - return actions - - -def move_to_absolute_position( - robot_name: str, - x: float = None, - y: float = None, - z: float = None, - env=None, - force_valid=False, - **kwargs, -): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Pose ---------------------------------------- - # Start from current pose, then selectively update xyz - move_pose = deepcopy(select_arm_current_pose) - - current_xyz = move_pose[:3, 3].clone() - - target_xyz = current_xyz.clone() - if x is not None: - target_xyz[0] = x - if y is not None: - target_xyz[1] = y - if z is not None: - target_xyz[2] = z - - move_pose[:3, 3] = target_xyz - - # Try IK on target pose - move_qpos = get_qpos( - env, - is_left, - select_arm, - move_pose, - select_arm_current_qpos, - force_valid=force_valid, - name="move to absolute position", - ) - - # Update env states - env.set_current_qpos_agent(move_qpos, is_left=is_left) - env.set_current_xpos_agent(move_pose, is_left=is_left) - - # ------------------------------------ Traj: init → target ------------------------------------ - qpos_list_init_to_move = [select_arm_current_qpos, move_qpos] - sample_num = kwargs.get("sample_num", 30) - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_move, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for move to absolute position: {len(actions)}.", - color="green", - ) - - return actions - - -def move_by_relative_offset( - robot_name: str, - dx: float = 0.0, - dy: float = 0.0, - dz: float = 0.0, - mode: str = "extrinsic", - env=None, - force_valid=False, - **kwargs, -): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Pose ---------------------------------------- - move_pose = deepcopy(select_arm_current_pose) - - # Apply relative offsets (dx, dy, dz always floats) - move_pose = get_offset_pose(move_pose, dx, "x", mode) - move_pose = get_offset_pose(move_pose, dy, "y", mode) - move_pose = get_offset_pose(move_pose, dz, "z", mode) - - # Solve IK - move_qpos = get_qpos( - env, - is_left, - select_arm, - move_pose, - select_arm_current_qpos, - force_valid=force_valid, - name="move by relative offset", - ) - - # Update environment states - env.set_current_qpos_agent(move_qpos, is_left=is_left) - env.set_current_xpos_agent(move_pose, is_left=is_left) - - # ------------------------------------ Traj: init → target ------------------------------------ - qpos_list_init_to_move = [select_arm_current_qpos, move_qpos] - sample_num = kwargs.get("sample_num", 20) - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_move, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for move by relative offset: {len(actions)}.", - color="green", - ) - - return actions - - -def back_to_initial_pose(robot_name: str, env=None, **kwargs): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - # Get arm states - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # Retrieve the initial joint configuration of this arm - target_qpos = env.left_arm_init_qpos if is_left else env.right_arm_init_qpos - target_qpos = torch.as_tensor(target_qpos, dtype=select_arm_current_qpos.dtype) - - # ---------------------------------------- Pose ---------------------------------------- - # Pre-back pose: move along tool z by a small offset (use intrinsic frame) - pre_back_pose = deepcopy(select_arm_current_pose) - pre_back_pose = get_offset_pose(pre_back_pose, -0.08, "z", "intrinsic") - - # IK for pre-back - pre_back_qpos = get_qpos( - env, - is_left, - select_arm, - pre_back_pose, - select_arm_current_qpos, - force_valid=kwargs.get("force_valid", False), - name="pre back pose", - ) - - # Update env states (move to target pose) - target_pose = env.get_arm_fk(qpos=target_qpos, is_left=is_left) - env.set_current_qpos_agent(target_qpos, is_left=is_left) - env.set_current_xpos_agent(target_pose, is_left=is_left) - - # ------------------------------------ Traj: init → pre back_pose ------------------------------------ - qpos_list_init_to_preback = [select_arm_current_qpos, pre_back_qpos] - sample_num = 20 - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_preback, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ------------------------------------ Traj: init → initial_pose ------------------------------------ - qpos_list_preback_to_target = [pre_back_qpos, target_qpos] - sample_num = kwargs.get("sample_num", 30) - - plan_trajectory( - env, - select_arm, - qpos_list_preback_to_target, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for back to initial pose: {len(actions)}.", - color="green", - ) - - return actions - - -def rotate_eef(robot_name: str, degree: float = 0, env=None, **kwargs): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Pose ---------------------------------------- - # Compute new joint positions - rotated_qpos = deepcopy(select_arm_current_qpos) - rotated_qpos[5] += np.deg2rad(degree) - - # Optional: limit checking (commented out by default) - # joint5_limit = env.get_joint_limits(select_arm)[5] - # if rotated_qpos[5] < joint5_limit[0] or rotated_qpos[5] > joint5_limit[1]: - # log_warning("Rotated qpos exceeds joint limits.\n") - - # Compute FK for new pose - rotated_pose = env.get_arm_fk( - qpos=rotated_qpos, - is_left=is_left, - ) - - # Update environment state - env.set_current_qpos_agent(rotated_qpos, is_left=is_left) - env.set_current_xpos_agent(rotated_pose, is_left=is_left) - - # ------------------------------------ Traj 1: init → rotated ------------------------------------ - qpos_list_init_to_rotated = [select_arm_current_qpos, rotated_qpos] - sample_num = kwargs.get("sample_num", 20) - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_rotated, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for rotate eef: {len(actions)}.", - color="green", - ) - - return actions - - -def orient_eef( - robot_name: str, - direction: str = "front", # 'front' or 'down' - env=None, - force_valid=False, - **kwargs, -): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - # Get arm state - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Pose ---------------------------------------- - # Generate replacement rotation matrix - replaced_rotation_matrix = np.eye(4) - if direction == "front": - rotation_matrix = R.from_euler("xyz", [180, -90, 0], degrees=True).as_matrix() - replaced_rotation_matrix[:3, :3] = ( - rotation_matrix @ replaced_rotation_matrix[:3, :3] - ) - elif direction == "down": - rotation_matrix = R.from_euler("x", 180, degrees=True).as_matrix() - replaced_rotation_matrix[:3, :3] = ( - rotation_matrix @ replaced_rotation_matrix[:3, :3] - ) - else: - log_error("Rotation direction must be 'front' or 'down'.") - - rotation_replaced_pose = deepcopy(select_arm_current_pose) - rot_torch = torch.as_tensor( - replaced_rotation_matrix[:3, :3], - dtype=rotation_replaced_pose.dtype, - device=rotation_replaced_pose.device, - ) - rotation_replaced_pose[:3, :3] = rot_torch - - # Solve IK for the new pose - replace_target_qpos = get_qpos( - env, - is_left, - select_arm, - rotation_replaced_pose, - select_arm_current_qpos, - force_valid=force_valid, - name="replaced-rotation", - ) - - # ---------------------------------------- Update env ---------------------------------------- - env.set_current_qpos_agent(replace_target_qpos, is_left=is_left) - env.set_current_xpos_agent(rotation_replaced_pose, is_left=is_left) - - # ------------------------------------ Traj: init → target ------------------------------------ - qpos_list_init_to_rotated = [select_arm_current_qpos, replace_target_qpos] - sample_num = kwargs.get("sample_num", 20) - - plan_trajectory( - env, - select_arm, - qpos_list_init_to_rotated, - sample_num, - select_arm_current_gripper_state, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for orient eef: {len(actions)}.", - color="green", - ) - - return actions - - -def close_gripper(robot_name: str, env=None, **kwargs): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Traj ---------------------------------------- - sample_num = kwargs.get("sample_num", 15) - execute_open = False # False → closing motion - - plan_gripper_trajectory( - env, - is_left, - sample_num, - execute_open, - select_arm_current_qpos, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for close gripper: {len(actions)}.", - color="green", - ) - - return actions - - -def open_gripper(robot_name: str, env=None, **kwargs): - - # ---------------------------------------- Prepare ---------------------------------------- - select_qpos_traj = [] - ee_state_list_select = [] - - ( - is_left, - select_arm, - select_arm_current_qpos, - select_arm_current_pose, - select_arm_current_gripper_state, - ) = get_arm_states(env, robot_name) - - # ---------------------------------------- Traj ---------------------------------------- - sample_num = kwargs.get("sample_num", 15) - execute_open = True # True → opening motion - - plan_gripper_trajectory( - env, - is_left, - sample_num, - execute_open, - select_arm_current_qpos, - select_qpos_traj, - ee_state_list_select, - ) - - # ---------------------------------------- Final ---------------------------------------- - actions = finalize_actions(select_qpos_traj, ee_state_list_select) - - log_info( - f"Total generated trajectory number for open gripper: {len(actions)}.", - color="green", - ) - - return actions - - -def drive( - left_arm_action=None, - right_arm_action=None, - env=None, - **kwargs, -): - - if left_arm_action is not None and right_arm_action is not None: - len_left = len(left_arm_action) - len_right = len(right_arm_action) - - if len_left < len_right: - diff = len_right - len_left - padding = np.repeat(left_arm_action[-1:], diff, axis=0) - left_arm_action = np.concatenate([left_arm_action, padding], axis=0) - elif len_right < len_left: - diff = len_left - len_right - padding = np.repeat(right_arm_action[-1:], diff, axis=0) - right_arm_action = np.concatenate([right_arm_action, padding], axis=0) - - left_arm_index = env.left_arm_joints + env.left_eef_joints - right_arm_index = env.right_arm_joints + env.right_eef_joints - actions = np.zeros((len(right_arm_action), len(env.init_qpos))) - actions[:, left_arm_index] = left_arm_action - actions[:, right_arm_index] = right_arm_action - - elif left_arm_action is None and right_arm_action is not None: - left_arm_index = env.left_arm_joints + env.left_eef_joints - right_arm_index = env.right_arm_joints + env.right_eef_joints - left_arm_action = finalize_actions( - env.left_arm_current_qpos, env.left_arm_current_gripper_state - ) - left_arm_action = np.repeat( - left_arm_action[None, :], len(right_arm_action), axis=0 - ) - - actions = np.zeros( - (len(right_arm_action), len(env.robot.get_qpos().squeeze(0))), - dtype=np.float32, - ) - actions[:, left_arm_index] = left_arm_action - actions[:, right_arm_index] = right_arm_action - - elif right_arm_action is None and left_arm_action is not None: - left_arm_index = env.left_arm_joints + env.left_eef_joints - right_arm_index = env.right_arm_joints + env.right_eef_joints - right_arm_action = finalize_actions( - env.right_arm_current_qpos, env.right_arm_current_gripper_state - ) - right_arm_action = np.repeat( - right_arm_action[None, :], len(left_arm_action), axis=0 - ) - - actions = np.zeros( - (len(left_arm_action), len(env.robot.get_qpos().squeeze(0))), - dtype=np.float32, - ) - actions[:, left_arm_index] = left_arm_action - actions[:, right_arm_index] = right_arm_action - - else: - log_error("At least one arm action should be provided.") - - actions = torch.from_numpy(actions).to(dtype=torch.float32).unsqueeze(1) - actions = list(actions.unbind(dim=0)) - for i in tqdm(range(len(actions))): - action = actions[i] - env.step(action) - return actions - - -def save_observations( - step_id: int = 0, - step_name: str = None, - env=None, - **kwargs, -): - # When using feedback script - log_dir = kwargs.get("log_dir") - if log_dir: - save_dir = log_dir / "camera_images" - - # Prepare subfolder: {id}_generate_num/episode{current_check_num} - gen_id = kwargs.get("id", "unknown_id") - episode_id = kwargs.get("current_check_num", 0) - - sub_dir = save_dir / f"{gen_id}_generate_num" / f"episode{episode_id}" - sub_dir.mkdir(parents=True, exist_ok=True) - - # Encode image to Base64 - base64_image = encode_image(env.get_obs_for_agent()["rgb"]) - - # Decode Base64 back to raw image bytes - import base64 - - img_bytes = base64.b64decode(base64_image) - - # Ensure step_name is not None - step_name = step_name if step_name is not None else "unnamed_step" - - # Save the decoded image - output_path = sub_dir / f"step{step_id}_{step_name}.png" - with open(output_path, "wb") as f: - f.write(img_bytes) - - # Print save info - log_info(f"[save_observations] Saved image to: {output_path}") - - # When only running the script (no feedback script) - else: - pass diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py index 4f2698de..d9471e8a 100644 --- a/embodichain/lab/sim/atomic_actions/actions.py +++ b/embodichain/lab/sim/atomic_actions/actions.py @@ -124,6 +124,8 @@ def _resolve_start_qpos( arm_dof = self.dof if arm_dof is None else arm_dof if start_qpos is None: start_qpos = self.robot.get_qpos(name=self.cfg.control_part) + if not isinstance(start_qpos, torch.Tensor): + logger.log_error("start_qpos must be a torch.Tensor.", TypeError) if start_qpos.shape == (arm_dof,): start_qpos = start_qpos.unsqueeze(0).repeat(self.n_envs, 1) if start_qpos.shape != (self.n_envs, arm_dof): @@ -133,6 +135,63 @@ def _resolve_start_qpos( ) return start_qpos + def _resolve_qpos_target(self, target: torch.Tensor, *, name: str) -> torch.Tensor: + """Resolve a qpos target into batched control-part joint positions.""" + if not isinstance(target, torch.Tensor): + logger.log_error(f"{name} must be a torch.Tensor.", TypeError) + if target.shape == (4, 4): + logger.log_error( + f"{name} shape (4, 4) is reserved for pose targets.", + ValueError, + ) + if target.shape == (self.dof,): + target = target.unsqueeze(0).repeat(self.n_envs, 1) + if target.shape != (self.n_envs, self.dof): + logger.log_error( + f"{name} must have shape ({self.dof},) or ({self.n_envs}, {self.dof}), " + f"but got {target.shape}", + ValueError, + ) + return target + + def _interpolate_qpos_with_motion_generator( + self, + start_qpos: torch.Tensor, + target_qpos: torch.Tensor, + n_waypoints: int, + ) -> torch.Tensor: + """Interpolate control-part qpos through the shared motion generator.""" + if n_waypoints < 2: + logger.log_error( + "n_waypoints must be at least 2 to include start and target qpos.", + ValueError, + ) + + trajectories = [] + options = MotionGenOptions( + control_part=self.cfg.control_part, + is_linear=False, + interpolate_nums=n_waypoints - 1, + ) + for env_id in range(self.n_envs): + qpos_list = torch.stack( + [start_qpos[env_id], target_qpos[env_id]], + dim=0, + ) + trajectory, _ = self.motion_generator.interpolate_trajectory( + control_part=self.cfg.control_part, + qpos_list=qpos_list, + options=options, + ) + if trajectory.shape != (n_waypoints, self.dof): + logger.log_error( + "Joint-space interpolation returned shape " + f"{trajectory.shape}, expected ({n_waypoints}, {self.dof}).", + ValueError, + ) + trajectories.append(trajectory) + return torch.stack(trajectories, dim=0) + def _compute_three_phase_waypoints( self, hand_interp_steps: int, @@ -255,6 +314,26 @@ def execute( trajectory of shape (n_envs, n_waypoints, dof), joint_ids corresponding to trajectory """ + if isinstance(target, torch.Tensor): + target_shape = tuple(target.shape) + is_pose_shape = target_shape in { + (4, 4), + (self.n_envs, 4, 4), + } + is_qpos_shape = target_shape in { + (self.dof,), + (self.n_envs, self.dof), + } + if is_qpos_shape and not is_pose_shape: + start_qpos = self._resolve_start_qpos(start_qpos) + target_qpos = self._resolve_qpos_target(target, name="target_qpos") + trajectory = self._interpolate_qpos_with_motion_generator( + start_qpos, + target_qpos, + int(self.cfg.sample_interval), + ) + return True, trajectory, self.arm_joint_ids + is_success, move_xpos = self._resolve_pose_target( target, action_name=self.__class__.__name__ ) diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py index 15b868a8..d4922f37 100644 --- a/embodichain/lab/sim/atomic_actions/engine.py +++ b/embodichain/lab/sim/atomic_actions/engine.py @@ -179,7 +179,6 @@ def __init__( # Semantic analyzer for object understanding self._semantic_analyzer = SemanticAnalyzer() - # Initialize default actions self._actions: Dict[str, AtomicAction] = self._init_actions(actions_cfg_list) def _init_actions( @@ -195,6 +194,11 @@ def _init_actions( } if actions_cfg_list is not None: for cfg in actions_cfg_list: + if cfg.name in actions: + logger.log_error( + f"Duplicate action name in config: {cfg.name}. " + "Action names must be unique for AtomicActionEngine." + ) action_class = builtin_action_map.get( cfg.name ) or _global_action_registry.get(cfg.name) @@ -217,7 +221,8 @@ def execute_static( action_names = list(self._actions.keys()) if len(target_list) != len(action_names): logger.log_error( - f"Length of target_list ({len(target_list)}) must match number of actions ({len(action_names)})." + f"Length of target_list ({len(target_list)}) must match number of " + f"actions ({len(action_names)})." ) start_qpos = self.motion_generator.robot.get_qpos() n_envs = start_qpos.shape[0] diff --git a/embodichain/lab/sim/planners/motion_generator.py b/embodichain/lab/sim/planners/motion_generator.py index 220deeca..7e03a198 100644 --- a/embodichain/lab/sim/planners/motion_generator.py +++ b/embodichain/lab/sim/planners/motion_generator.py @@ -595,7 +595,11 @@ def interpolate_trajectory( feasible_pose_targets = feasible_pose_targets[valid_mask] else: # Perform joint space interpolation directly if not linear or if end-effector poses are not provided - qpos_interpolated = qpos_list.unsqueeze_(0).permute(1, 0, 2) # [N, 1, DOF] + if qpos_list.ndim != 2: + logger.log_error( + "qpos_list must have shape [num_waypoints, dof] for " + f"joint-space interpolation, but got {qpos_list.shape}." + ) if isinstance(options.interpolate_nums, int): interp_nums = [options.interpolate_nums] * (len(qpos_list) - 1) @@ -606,15 +610,12 @@ def interpolate_trajectory( ) interp_nums = options.interpolate_nums - interpolate_qpos_list = ( - interpolate_with_nums( - qpos_interpolated, - interp_nums=interp_nums, - device=self.device, - ) - .permute(1, 0, 2) - .squeeze_(0) - ) # [M, DOF] + interpolate_qpos_list = interpolate_with_nums( + qpos_list.unsqueeze(0), + interp_nums=interp_nums, + device=self.device, + ).squeeze(0) + # [M, DOF] feasible_pose_targets = None diff --git a/embodichain/utils/llm_json.py b/embodichain/utils/llm_json.py new file mode 100644 index 00000000..68bdacfb --- /dev/null +++ b/embodichain/utils/llm_json.py @@ -0,0 +1,73 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import re +from collections.abc import Mapping +from typing import Any + +__all__ = [ + "extract_json_object", + "normalize_json_content", +] + +_JSON_FENCE_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) + + +def extract_json_object(content: str | Mapping[str, Any]) -> dict[str, Any]: + """Extract a JSON object from plain or fenced LLM content. + + Args: + content: Raw LLM text, already parsed JSON-like mapping, or markdown fenced + JSON content. + + Returns: + Parsed JSON object. + + Raises: + ValueError: If no JSON object can be parsed. + """ + if isinstance(content, Mapping): + return dict(content) + + text = str(content).strip() + candidates = [match.group(1).strip() for match in _JSON_FENCE_RE.finditer(text)] + candidates.append(text) + + decoder = json.JSONDecoder() + for candidate in candidates: + try: + value = json.loads(candidate) + except json.JSONDecodeError: + start = candidate.find("{") + if start < 0: + continue + try: + value, _ = decoder.raw_decode(candidate[start:]) + except json.JSONDecodeError: + continue + + if isinstance(value, dict): + return value + + raise ValueError("Expected a JSON object in the LLM response.") + + +def normalize_json_content(content: str | Mapping[str, Any]) -> str: + """Normalize JSON-like LLM content into stable pretty-printed JSON text.""" + return json.dumps(extract_json_object(content), ensure_ascii=False, indent=2) diff --git a/pyproject.toml b/pyproject.toml index de1e5deb..e1505107 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dynamic = ["version"] # specified using PEP 508 direct references where present. dependencies = [ "dexsim_engine==0.4.1", + "numpy>=1.26.4,<2", "setuptools>=78.1.1", "gymnasium>=0.29.1", "langchain", diff --git a/tests/sim/atomic_actions/test_actions.py b/tests/sim/atomic_actions/test_actions.py index ba7324cc..ee9c1596 100644 --- a/tests/sim/atomic_actions/test_actions.py +++ b/tests/sim/atomic_actions/test_actions.py @@ -101,6 +101,19 @@ def _make_mock_motion_generator(robot: Mock | None = None) -> Mock: mg = Mock() mg.robot = robot or _make_mock_robot() mg.device = mg.robot.device + + def interpolate_trajectory(control_part=None, qpos_list=None, options=None): + weights = torch.linspace( + 0, + 1, + steps=options.interpolate_nums + 1, + dtype=qpos_list.dtype, + device=qpos_list.device, + ).view(-1, 1) + trajectory = qpos_list[0] + (qpos_list[-1] - qpos_list[0]) * weights + return trajectory, None + + mg.interpolate_trajectory = Mock(side_effect=interpolate_trajectory) return mg @@ -156,6 +169,10 @@ def test_resolve_start_qpos_broadcasts_single(self): for i in range(NUM_ENVS): assert torch.equal(result[i], single) + def test_resolve_start_qpos_rejects_non_tensor(self): + with pytest.raises(TypeError, match="torch.Tensor"): + self.action._resolve_start_qpos([0.0] * ARM_DOF) + def test_compute_three_phase_waypoints_sums_to_sample_interval(self): hand_interp_steps = 5 first, second, third = self.action._compute_three_phase_waypoints( @@ -186,6 +203,37 @@ def test_interpolate_hand_qpos_linear(self): expected_mid = torch.tensor([0.5, 0.5]) assert torch.allclose(result[1], expected_mid, atol=1e-6) + def test_execute_accepts_hand_qpos_target(self): + cfg = MoveActionCfg(control_part="hand", sample_interval=5) + action = MoveAction(self.mg, cfg=cfg) + start_qpos = torch.tensor([[0.0, 0.1], [0.2, 0.3]]) + target_qpos = torch.tensor([0.8, 0.9]) + + is_success, trajectory, joint_ids = action.execute( + target=target_qpos, + start_qpos=start_qpos, + ) + + assert is_success is True + assert trajectory.shape == (NUM_ENVS, cfg.sample_interval, HAND_DOF) + assert torch.allclose(trajectory[:, 0], start_qpos) + assert torch.allclose( + trajectory[:, -1], + target_qpos.unsqueeze(0).repeat(NUM_ENVS, 1), + ) + assert joint_ids == list(range(ARM_DOF, ARM_DOF + HAND_DOF)) + assert self.mg.interpolate_trajectory.call_count == NUM_ENVS + + def test_motion_generator_qpos_interpolation_requires_start_and_target_waypoints( + self, + ): + with pytest.raises(ValueError, match="at least 2"): + self.action._interpolate_qpos_with_motion_generator( + torch.zeros(NUM_ENVS, ARM_DOF), + torch.ones(NUM_ENVS, ARM_DOF), + 1, + ) + # --------------------------------------------------------------------------- # PickUpAction diff --git a/tests/sim/atomic_actions/test_engine.py b/tests/sim/atomic_actions/test_engine.py index 52dc034d..b908110a 100644 --- a/tests/sim/atomic_actions/test_engine.py +++ b/tests/sim/atomic_actions/test_engine.py @@ -27,6 +27,7 @@ Affordance, ObjectSemantics, ) +from embodichain.lab.sim.atomic_actions.actions import MoveAction, MoveActionCfg from embodichain.lab.sim.atomic_actions.engine import ( AtomicActionEngine, SemanticAnalyzer, @@ -185,6 +186,102 @@ def test_unsupported_type_raises(self): self.engine._resolve_target(42) +# --------------------------------------------------------------------------- +# AtomicActionEngine.execute_static +# --------------------------------------------------------------------------- + + +def _make_sequence_test_robot() -> Mock: + robot = Mock() + robot.device = torch.device("cpu") + robot.dof = 4 + + def get_qpos(name=None): + if name == "arm": + return torch.zeros(1, 2) + if name == "hand": + return torch.zeros(1, 2) + return torch.zeros(1, 4) + + def get_joint_ids(name=None): + if name == "arm": + return [0, 1] + if name == "hand": + return [2, 3] + return [0, 1, 2, 3] + + robot.get_qpos = get_qpos + robot.get_joint_ids = get_joint_ids + return robot + + +class TestExecuteStaticSequence: + """Tests execute_static with configured action order.""" + + def setup_method(self): + self.robot = _make_sequence_test_robot() + self.mg = Mock() + self.mg.robot = self.robot + self.mg.device = torch.device("cpu") + + def interpolate_trajectory(control_part=None, qpos_list=None, options=None): + weights = torch.linspace( + 0, + 1, + steps=options.interpolate_nums + 1, + dtype=qpos_list.dtype, + device=qpos_list.device, + ).view(-1, 1) + trajectory = qpos_list[0] + (qpos_list[-1] - qpos_list[0]) * weights + return trajectory, None + + self.mg.interpolate_trajectory = Mock(side_effect=interpolate_trajectory) + + def teardown_method(self): + unregister_action("_test_hand_move") + + def test_duplicate_move_names_are_rejected(self): + with pytest.raises(RuntimeError, match="Duplicate action name"): + AtomicActionEngine( + self.mg, + actions_cfg_list=[ + MoveActionCfg(control_part="arm", sample_interval=3), + MoveActionCfg(control_part="hand", sample_interval=4), + ], + ) + + def test_execute_static_uses_action_config_order(self): + register_action("_test_hand_move", MoveAction) + engine = AtomicActionEngine( + self.mg, + actions_cfg_list=[ + MoveActionCfg(control_part="arm", sample_interval=3), + MoveActionCfg( + name="_test_hand_move", + control_part="hand", + sample_interval=4, + ), + ], + ) + + assert "move" in engine._actions + assert "_test_hand_move" in engine._actions + assert list(engine._actions) == ["move", "_test_hand_move"] + + is_success, trajectory = engine.execute_static( + target_list=[ + torch.tensor([1.0, 2.0]), + torch.tensor([0.5, 0.7]), + ] + ) + + assert is_success is True + assert trajectory.shape == (1, 7, 4) + assert torch.allclose(trajectory[0, 2, :2], torch.tensor([1.0, 2.0])) + assert torch.allclose(trajectory[0, 3, :2], torch.tensor([1.0, 2.0])) + assert torch.allclose(trajectory[0, -1, 2:], torch.tensor([0.5, 0.7])) + + if __name__ == "__main__": test = TestSemanticAnalyzer() test.setup_method() diff --git a/tests/sim/planners/test_motion_generator.py b/tests/sim/planners/test_motion_generator.py index 300d191b..267dd0cd 100644 --- a/tests/sim/planners/test_motion_generator.py +++ b/tests/sim/planners/test_motion_generator.py @@ -45,6 +45,36 @@ def to_numpy(tensor): return np.array(tensor) +def test_interpolate_trajectory_joint_space_uses_waypoint_axis(): + motion_gen = MotionGenerator.__new__(MotionGenerator) + motion_gen.device = torch.device("cpu") + motion_gen.robot = None + + qpos_list = torch.tensor( + [ + [0.0, 0.0], + [1.0, 2.0], + ], + dtype=torch.float32, + ) + original_shape = qpos_list.shape + + trajectory, feasible_pose_targets = motion_gen.interpolate_trajectory( + control_part="arm", + qpos_list=qpos_list, + options=MotionGenOptions( + is_linear=False, + interpolate_nums=4, + ), + ) + + assert qpos_list.shape == original_shape + assert feasible_pose_targets is None + assert trajectory.shape == (5, 2) + assert torch.allclose(trajectory[0], qpos_list[0]) + assert torch.allclose(trajectory[-1], qpos_list[-1]) + + class BaseTestMotionGenerator(object): def setup_simulation(self): cls = type(self) @@ -229,6 +259,7 @@ def test_create_trajectory_with_xpos(self, is_linear): plan_result = self.motion_gen.generate( target_states=target_states, options=options ) + assert plan_result.success, "Motion planning failed" out_qpos_list = to_numpy(plan_result.positions) assert ( len(out_qpos_list) == self.sample_num @@ -277,6 +308,7 @@ def test_create_trajectory_with_qpos(self, is_linear): plan_result = self.motion_gen.generate( target_states=target_states, options=options ) + assert plan_result.success, "Motion planning failed" out_qpos_list = to_numpy(plan_result.positions) assert ( len(out_qpos_list) == self.sample_num