Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,12 @@ The challenge centers on vision-language fusion for cross-room end-to-end naviga
- A standardized debugging time and environment will be provided onsite. Teams may use model weights different from the online stage and make environment-specific configuration adjustments, but modifications to the core algorithm logic are strictly prohibited.

### 4.2 Procedure
Each team will receive 10 fixed-sequence instructions. Start from the first one.
Each team will receive 10 instructions. Start from any instruction.
For each instruction:
- Move the robot to the given starting position.
- Provide the instruction to the robot and raise your hand to signal the start.
- If execution fails, teams may retry (up to 3 attempts) or skip the instruction.
- After 3 failed attempts (timeout, collision, human intervention, etc.), the instruction must be skipped.
- Skipped instructions cannot be retried later.
- If execution fails, teams may retry or skip the instruction.
- Instruction can be repeated if failed (timeout, collision, human intervention, etc.).
- Before each attempt, the algorithm state must be reset and previous observations cleared.

### 4.3 Time Limit
Expand Down Expand Up @@ -82,7 +81,7 @@ Successfully completing one instruction will be one success, and the completion
| Action | Score Impact |
|:--|:--|
| Successfully reach goal | success |
| Minor scrape more than 5 times or Collision with obstacle | fail |
| Minor scrape or Collision with obstacle | fail |

If there is a trend of continuous collisions, the referee has the right to terminate the robot’s current action, with the severity of the impact determined by the on-site referee.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@
- 比赛现场将提供统一的调试时间和环境,允许使用与线上赛不同的模型权重,允许进行环境适配性配置修改,但禁止修改核心算法逻辑。

### 4.2 比赛过程
每支队伍开始挑战时将会提供十条固定顺序的指令,从第一条指令开始
每支队伍开始挑战时将会提供十条导航指令,选手可选择指令进行
- 移动机器人到达该指令的起始位置后,将指令输入给机器人后举手示意作为开始挑战的信号。
- 指令执行过程中若中途失败,可选择重试,也可以选择跳过该指令,顺序执行下一条未执行指令。
- 每条指令可重试3次,若3次失败,则必须跳过该指令。(失败指选手选择重新开始,或其他条件,如超时,冲撞障碍物,人为干预等)
- 已经跳过的指令不能重复执行。
- 指令执行过程中若中途失败,可选择重试,也可以选择跳过该指令。
- 失败时可选择重新开始(如超时,冲撞障碍物,人为干预等)
- 每次测试前需重置算法状态并清除所有前序执行过程中获取的观测缓存。

### 4.3 时间限制
Expand All @@ -60,7 +59,7 @@
- 成功完成1条指令,算成功一次,并记录该指令的完成时间。(成功条件:在比赛中,每条指令均对应一个目标位置。目标位置定义为半径 2 米的圆形区域(不穿墙),机器人最终停止在该范围即可判定为该条指令导航成功。)

**扣分规则**:
- 若机器人频繁刮蹭(5次)或直接撞击障碍物,本次导航将被强制终止,该条指令失败。
- 若机器人连续刮蹭或直接撞击障碍物,本次导航将被强制终止,该条指令失败。

| Action | Score |
|:--|:--:|
Expand Down
3 changes: 2 additions & 1 deletion challenge/onsite_competition/sdk/cam.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def get_observation(self, timeout_ms: int = 1000) -> Dict:
if not color or not depth:
raise RuntimeError("can not align color/depth frame")

rgb = np.asanyarray(color.get_data()) # HxWx3, uint8 (BGR)
bgr = np.asanyarray(color.get_data()) # HxWx3, uint8 (BGR)
rgb = bgr[..., ::-1] # HxWx3, uint8 (convert to RGB)
depth_raw = np.asanyarray(depth.get_data()) # HxW, uint16
if depth_raw.shape != rgb.shape[:2]:
# Extreme fallback (theoretically should be consistent after alignment).
Expand Down
109 changes: 91 additions & 18 deletions challenge/onsite_competition/sdk/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def odom_callback(self, msg):
self.start_yaw = yaw
rospy.loginfo(f"start yaw: {math.degrees(self.start_yaw):.2f}")

# --- position (new)
p = msg.pose.pose.position
self.current_xy = (p.x, p.y)

def execute_turn(self):
if self.start_yaw is None:
rospy.loginfo("wait for init yaw state...")
Expand Down Expand Up @@ -87,17 +91,17 @@ class DiscreteRobotController(Turn90Degrees):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) # initialize parent class

def stand_still(self, duration=0.5):
def stand_still(self, duration=0.2):
twist = Twist()
twist.linear.x = 0.0
twist.angular.z = 0.0
self.cmd_vel_pub.publish(twist)
rospy.sleep(duration) # Maintain stand still for a short duration
rospy.loginfo("Stand still command executed.")

def move_forward(self, distance=0.25):
def move_forward(self, distance=0.5, speed=0.2):
twist = Twist()
twist.linear.x = 0.2 # Forward speed
twist.linear.x = speed # Forward speed
twist.angular.z = 0.0
duration = distance / twist.linear.x # Time to move forward the specified distance

Expand All @@ -108,34 +112,103 @@ def move_forward(self, distance=0.25):
self.cmd_vel_pub.publish(twist)
self.rate.sleep()

self.stand_still() # Stop after moving forward
# self.stand_still() # Stop after moving forward
rospy.loginfo("Move forward command executed.")

def turn_left(self, angle=15, speed=0.2):
self.turn_angle = math.radians(angle) # update angle
self.angular_speed = speed # Set positive angular speed for left turn
self.start_yaw = None # Reset start yaw to current position
self.turning = False # Reset turning flag
self.run()
self.stand_still() # Stop after moving forward
rospy.loginfo("Turn left command executed.")
# ---- NEW: feedback-controlled forward motion


def move_feedback(self, distance=0.25, speed=0.5, tol=0.02, timeout=None):
"""
里程计闭环直线移动:支持正/负 distance。
- distance: 目标路程(米)。>0 前进,<0 倒车。
- speed: 名义线速度(m/s),可给正/负,最终会按正数取绝对值。
- tol: 终止距离容差(米)
- timeout: 超时(秒);默认 max(3*|distance|/speed, 3.0)
"""
# 等待位姿
while self.current_xy is None and not rospy.is_shutdown():
rospy.loginfo_throttle(2.0, "Waiting for /odom...")
self.rate.sleep()
if rospy.is_shutdown():
return

# 方向与目标
direction = 1.0 if distance >= 0.0 else -1.0
target = abs(distance)
speed = abs(speed) if speed is not None else 0.5

start_xy = self.current_xy
start_time = rospy.Time.now()

if timeout is None:
timeout = max(3.0, 3.0 * (target / max(speed, 0.05)))

def turn_right(self, angle=15, speed=-0.2):
twist = Twist()

# 简单 P 控制,越靠近越慢
Kp = 1.5
min_speed = 0.06 # 防止轮子停转

rospy.loginfo(
f"move_linear: target={target:.3f} m, dir={'forward' if direction>0 else 'backward'}, "
f"speed≈{speed:.2f} m/s, tol={tol:.3f} m"
)

try:
while not rospy.is_shutdown():
# 超时
if (rospy.Time.now() - start_time).to_sec() > timeout:
rospy.logwarn("move_linear timeout reached; stopping.")
break

# 走过的距离(欧式距离,不区分前后,目标用 abs)
cx, cy = self.current_xy
sx, sy = start_xy
traveled = math.hypot(cx - sx, cy - sy)
remaining = target - traveled

# 达标退出
if remaining <= tol:
rospy.loginfo(f"Reached distance: traveled={traveled:.3f} m (tol {tol} m)")
break

# 速度控制(带方向)
v = Kp * remaining
v = max(min(v, speed), min_speed) # [min_speed, speed]
twist.linear.x = direction * v # 关键:按方向加符号
twist.angular.z = 0.0
self.cmd_vel_pub.publish(twist)

rospy.loginfo_throttle(
1.0, f"traveled={traveled:.3f} m, remaining={remaining:.3f} m, v={twist.linear.x:.2f} m/s"
)
self.rate.sleep()

finally:
# 停车
twist.linear.x = 0.0
twist.angular.z = 0.0
self.cmd_vel_pub.publish(twist)

rospy.loginfo("Move linear command executed.")

def turn(self, angle=15, speed=0.5):
self.turn_angle = math.radians(angle) # update angle
self.angular_speed = speed # Set positive angular speed for left turn
self.start_yaw = None # Reset start yaw to current position
self.turning = False # Reset turning flag
self.run()
self.stand_still() # Stop after moving forward
rospy.loginfo("Turn right command executed.")
rospy.loginfo("Turn left command executed.")


if __name__ == '__main__':
try:
control = DiscreteRobotController()
control.turn_left(10)
control.move_forward(0.1)
control.turn_right(10)
control.turn(15, 0.5) # left
control.turn(15, -0.5) # right
control.move_feedback(0.25, 0.5)
control.move_feedback(-0.25, 0.5)

except rospy.ROSInterruptException:
pass
103 changes: 72 additions & 31 deletions challenge/onsite_competition/sdk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
import sys

from real_world_env import RealWorldEnv
from stream import app, start_env
from stream import run, set_instruction

from internnav.agent.utils.client import AgentClient
from internnav.configs.evaluator.default_config import get_config
from internnav.utils.comm_utils.client import AgentClient


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--config",
type=str,
default='scripts/eval/configs/h1_cma_cfg.py',
default='challenge/onsite_competition/configs/h1_internvla_n1_cfg.py',
help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py',
)
parser.add_argument(
"--instruction",
type=str,
default='Go straight and pass the sofa and turn right into the hallway. Keep walking down, pass the kitchen and the bathroom, then enter the study room at the far end on the right with a desk, stop next to the white shelf.',
help='current instruction to follow',
)
parser.add_argument("--tag", type=str, help="tag for the run, saved by the tag name which is team-task-trail")
parser.add_argument("--uninteractive_mode", action='store_true', help="whether to confirm each step")
return parser.parse_args()


Expand All @@ -34,46 +34,87 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'):
return getattr(config_module, attr_name)


# TODO add logging for each step, saved by the tag name which is team-task-trail
def confirm(msg: str) -> bool:
"""
Ask user to confirm. Return True if user types 'y' (case-insensitive),
False for anything else (including empty input).
"""
try:
answer = input(f"{msg} [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print("\nCancelled.")
return False
return answer in ("", "y")


def get_instruction() -> int:
try:
import json

instruction_lst = json.load(open("challenge/onsite_competition/instructions.json"))
print("Available instructions:")
for i, item in enumerate(instruction_lst):
print(f"{i}: {item['instruction_title']}")
answer = input("input instruction id: ").strip().lower()
answer = int(answer)
except (EOFError, KeyboardInterrupt):
print("\nCancelled.")
sys.exit()
return instruction_lst[answer]['instruction'][0]


def action_to_word(action: int) -> str:
action = max(0, min(3, action))
wl = ["stand still", "move forward", "turn left", "turn right"]
return wl[action]


def main():
args = parse_args()
print("--- Loading config from:", args.config, "---")
evaluator_cfg = load_eval_cfg(args.config, attr_name='eval_cfg')
cfg = get_config(evaluator_cfg)
print(cfg)
cfg = load_eval_cfg(args.config, attr_name='eval_cfg')
agent_cfg = cfg.agent

# initialize user agent
agent = AgentClient(cfg.agent)
# initialize user's agent
agent = AgentClient(agent_cfg)

# initialize real world env
env = RealWorldEnv()
env = RealWorldEnv(fps=30, duration=0.1, distance=0.3, angle=15, move_speed=0.5, turn_speed=0.5)
env.reverse() # reverse move direction if using a rear camera
env.step(0)
obs = env.get_observation()

# start stream
start_env(env)
app.run(host="0.0.0.0", port=8080, threaded=True)
print("--- start running steam app ---")
run(env=env)

while True:
instruction = get_instruction()
print("\nNew instruction:", instruction)
set_instruction(instruction)

try:
while True:
# print("get observation...")
# obs contains {rgb, depth, instruction}
obs = env.get_observation()
obs["instruction"] = args.instruction
# print(obs)
obs["instruction"] = instruction

# print("agent step...")
print("agent step...")
# action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent)
try:
action = agent.step(obs)[0]['action'][0]
print(f"agent step success, action is {action}")
except Exception as e:
print(f"agent step error {e}")
continue

# print("env step...")
try:
action = agent.step([obs])[0]['action'][0]
print("agent step success, action:", action)

if args.uninteractive_mode or confirm(f"Execute this action [{action_to_word(action)}]?"):
print("env step...")
env.step(action)
print("env step success")
except Exception as e:
print(f"env step error {e}")
continue
finally:
env.close()
else:
print("Stop requested. Exiting loop.")
print("agent reset...")
agent.reset()
break


if __name__ == "__main__":
main()
Loading
Loading