InternRobotics · kew6688 · Oct 18, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/challenge/onsite_competition/onsite_competition_rules_en-US.md b/challenge/onsite_competition/onsite_competition_rules_en-US.md
@@ -38,13 +38,12 @@ The challenge centers on vision-language fusion for cross-room end-to-end naviga
 - A standardized debugging time and environment will be provided onsite. Teams may use model weights different from the online stage and make environment-specific configuration adjustments, but modifications to the core algorithm logic are strictly prohibited.
 
 ### 4.2 Procedure
-Each team will receive 10 fixed-sequence instructions. Start from the first one.
+Each team will receive 10 instructions. Start from any instruction.
 For each instruction:
 - Move the robot to the given starting position.
 - Provide the instruction to the robot and raise your hand to signal the start.
-- If execution fails, teams may retry (up to 3 attempts) or skip the instruction.
-- After 3 failed attempts (timeout, collision, human intervention, etc.), the instruction must be skipped.
-- Skipped instructions cannot be retried later.
+- If execution fails, teams may retry or skip the instruction.
+- Instruction can be repeated if failed (timeout, collision, human intervention, etc.).
 - Before each attempt, the algorithm state must be reset and previous observations cleared.
 
 ### 4.3 Time Limit
@@ -82,7 +81,7 @@ Successfully completing one instruction will be one success, and the completion
 | Action | Score Impact |
 |:--|:--|
 | Successfully reach goal | success |
-| Minor scrape more than 5 times or Collision with obstacle | fail |
+| Minor scrape or Collision with obstacle | fail |
 
 If there is a trend of continuous collisions, the referee has the right to terminate the robot’s current action, with the severity of the impact determined by the on-site referee.
 

diff --git a/challenge/onsite_competition/onsite_competition_rules_zh-CN.md b/challenge/onsite_competition/onsite_competition_rules_zh-CN.md
@@ -32,11 +32,10 @@
 - 比赛现场将提供统一的调试时间和环境，允许使用与线上赛不同的模型权重，允许进行环境适配性配置修改，但禁止修改核心算法逻辑。
 
 ### 4.2 比赛过程
-每支队伍开始挑战时将会提供十条固定顺序的指令，从第一条指令开始。
+每支队伍开始挑战时将会提供十条导航指令，选手可选择指令进行。
  - 移动机器人到达该指令的起始位置后，将指令输入给机器人后举手示意作为开始挑战的信号。
- - 指令执行过程中若中途失败，可选择重试，也可以选择跳过该指令，顺序执行下一条未执行指令。
- - 每条指令可重试3次，若3次失败，则必须跳过该指令。（失败指选手选择重新开始，或其他条件，如超时，冲撞障碍物，人为干预等）
- - 已经跳过的指令不能重复执行。
+ - 指令执行过程中若中途失败，可选择重试，也可以选择跳过该指令。
+ - 失败时可选择重新开始（如超时，冲撞障碍物，人为干预等）
  - 每次测试前需重置算法状态并清除所有前序执行过程中获取的观测缓存。
 
 ### 4.3 时间限制
@@ -60,7 +59,7 @@
 - 成功完成1条指令，算成功一次，并记录该指令的完成时间。（成功条件：在比赛中，每条指令均对应一个目标位置。目标位置定义为半径 2 米的圆形区域（不穿墙），机器人最终停止在该范围即可判定为该条指令导航成功。）
 
 **扣分规则**：
-- 若机器人频繁刮蹭（5次）或直接撞击障碍物，本次导航将被强制终止，该条指令失败。
+- 若机器人连续刮蹭或直接撞击障碍物，本次导航将被强制终止，该条指令失败。
 
 | Action | Score |
 |:--|:--:|

diff --git a/challenge/onsite_competition/sdk/cam.py b/challenge/onsite_competition/sdk/cam.py
@@ -99,7 +99,8 @@ def get_observation(self, timeout_ms: int = 1000) -> Dict:
  if not color or not depth:
  raise RuntimeError("can not align color/depth frame")
 
- rgb = np.asanyarray(color.get_data()) # HxWx3, uint8 (BGR)
+ bgr = np.asanyarray(color.get_data()) # HxWx3, uint8 (BGR)
+ rgb = bgr[..., ::-1] # HxWx3, uint8 (convert to RGB)
  depth_raw = np.asanyarray(depth.get_data()) # HxW, uint16
  if depth_raw.shape != rgb.shape[:2]:
  # Extreme fallback (theoretically should be consistent after alignment).

diff --git a/challenge/onsite_competition/sdk/control.py b/challenge/onsite_competition/sdk/control.py
@@ -35,6 +35,10 @@ def odom_callback(self, msg):
  self.start_yaw = yaw
  rospy.loginfo(f"start yaw: {math.degrees(self.start_yaw):.2f}")
 
+ # --- position (new)
+ p = msg.pose.pose.position
+ self.current_xy = (p.x, p.y)
+
  def execute_turn(self):
  if self.start_yaw is None:
  rospy.loginfo("wait for init yaw state...")
@@ -87,17 +91,17 @@ class DiscreteRobotController(Turn90Degrees):
  def __init__(self, *args, **kwargs):
  super().__init__(*args, **kwargs) # initialize parent class
 
- def stand_still(self, duration=0.5):
+ def stand_still(self, duration=0.2):
  twist = Twist()
  twist.linear.x = 0.0
  twist.angular.z = 0.0
  self.cmd_vel_pub.publish(twist)
  rospy.sleep(duration) # Maintain stand still for a short duration
  rospy.loginfo("Stand still command executed.")
 
- def move_forward(self, distance=0.25):
+ def move_forward(self, distance=0.5, speed=0.2):
  twist = Twist()
- twist.linear.x = 0.2 # Forward speed
+ twist.linear.x = speed # Forward speed
  twist.angular.z = 0.0
  duration = distance / twist.linear.x # Time to move forward the specified distance
 
@@ -108,34 +112,103 @@ def move_forward(self, distance=0.25):
  self.cmd_vel_pub.publish(twist)
  self.rate.sleep()
 
- self.stand_still() # Stop after moving forward
+ # self.stand_still() # Stop after moving forward
  rospy.loginfo("Move forward command executed.")
 
- def turn_left(self, angle=15, speed=0.2):
- self.turn_angle = math.radians(angle) # update angle
- self.angular_speed = speed # Set positive angular speed for left turn
- self.start_yaw = None # Reset start yaw to current position
- self.turning = False # Reset turning flag
- self.run()
- self.stand_still() # Stop after moving forward
- rospy.loginfo("Turn left command executed.")
+ # ---- NEW: feedback-controlled forward motion
+
+
+def move_feedback(self, distance=0.25, speed=0.5, tol=0.02, timeout=None):
+ """
+ 里程计闭环直线移动：支持正/负 distance。
+ - distance: 目标路程（米）。>0 前进，<0 倒车。
+ - speed: 名义线速度（m/s），可给正/负，最终会按正数取绝对值。
+ - tol: 终止距离容差（米）
+ - timeout: 超时（秒）；默认 max(3*|distance|/speed, 3.0)
+ """
+ # 等待位姿
+ while self.current_xy is None and not rospy.is_shutdown():
+ rospy.loginfo_throttle(2.0, "Waiting for /odom...")
+ self.rate.sleep()
+ if rospy.is_shutdown():
+ return
+
+ # 方向与目标
+ direction = 1.0 if distance >= 0.0 else -1.0
+ target = abs(distance)
+ speed = abs(speed) if speed is not None else 0.5
+
+ start_xy = self.current_xy
+ start_time = rospy.Time.now()
+
+ if timeout is None:
+ timeout = max(3.0, 3.0 * (target / max(speed, 0.05)))
 
- def turn_right(self, angle=15, speed=-0.2):
+ twist = Twist()
+
+ # 简单 P 控制，越靠近越慢
+ Kp = 1.5
+ min_speed = 0.06 # 防止轮子停转
+
+ rospy.loginfo(
+ f"move_linear: target={target:.3f} m, dir={'forward' if direction>0 else 'backward'}, "
+ f"speed≈{speed:.2f} m/s, tol={tol:.3f} m"
+ )
+
+ try:
+ while not rospy.is_shutdown():
+ # 超时
+ if (rospy.Time.now() - start_time).to_sec() > timeout:
+ rospy.logwarn("move_linear timeout reached; stopping.")
+ break
+
+ # 走过的距离（欧式距离，不区分前后，目标用 abs）
+ cx, cy = self.current_xy
+ sx, sy = start_xy
+ traveled = math.hypot(cx - sx, cy - sy)
+ remaining = target - traveled
+
+ # 达标退出
+ if remaining <= tol:
+ rospy.loginfo(f"Reached distance: traveled={traveled:.3f} m (tol {tol} m)")
+ break
+
+ # 速度控制（带方向）
+ v = Kp * remaining
+ v = max(min(v, speed), min_speed) # [min_speed, speed]
+ twist.linear.x = direction * v # 关键：按方向加符号
+ twist.angular.z = 0.0
+ self.cmd_vel_pub.publish(twist)
+
+ rospy.loginfo_throttle(
+ 1.0, f"traveled={traveled:.3f} m, remaining={remaining:.3f} m, v={twist.linear.x:.2f} m/s"
+ )
+ self.rate.sleep()
+
+ finally:
+ # 停车
+ twist.linear.x = 0.0
+ twist.angular.z = 0.0
+ self.cmd_vel_pub.publish(twist)
+
+ rospy.loginfo("Move linear command executed.")
+
+ def turn(self, angle=15, speed=0.5):
  self.turn_angle = math.radians(angle) # update angle
  self.angular_speed = speed # Set positive angular speed for left turn
  self.start_yaw = None # Reset start yaw to current position
  self.turning = False # Reset turning flag
  self.run()
- self.stand_still() # Stop after moving forward
- rospy.loginfo("Turn right command executed.")
+ rospy.loginfo("Turn left command executed.")
 
 
 if __name__ == '__main__':
  try:
  control = DiscreteRobotController()
- control.turn_left(10)
- control.move_forward(0.1)
- control.turn_right(10)
+ control.turn(15, 0.5) # left
+ control.turn(15, -0.5) # right
+ control.move_feedback(0.25, 0.5)
+ control.move_feedback(-0.25, 0.5)
 
  except rospy.ROSInterruptException:
  pass
diff --git a/challenge/onsite_competition/sdk/main.py b/challenge/onsite_competition/sdk/main.py
@@ -3,26 +3,26 @@
 import sys
 
 from real_world_env import RealWorldEnv
-from stream import app, start_env
+from stream import run, set_instruction
 
-from internnav.agent.utils.client import AgentClient
-from internnav.configs.evaluator.default_config import get_config
+from internnav.utils.comm_utils.client import AgentClient
 
 
 def parse_args():
  parser = argparse.ArgumentParser()
  parser.add_argument(
  "--config",
  type=str,
- default='scripts/eval/configs/h1_cma_cfg.py',
+ default='challenge/onsite_competition/configs/h1_internvla_n1_cfg.py',
  help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py',
  )
  parser.add_argument(
  "--instruction",
  type=str,
+ default='Go straight and pass the sofa and turn right into the hallway. Keep walking down, pass the kitchen and the bathroom, then enter the study room at the far end on the right with a desk, stop next to the white shelf.',
  help='current instruction to follow',
  )
- parser.add_argument("--tag", type=str, help="tag for the run, saved by the tag name which is team-task-trail")
+ parser.add_argument("--uninteractive_mode", action='store_true', help="whether to confirm each step")
  return parser.parse_args()
 
 
@@ -34,46 +34,87 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'):
  return getattr(config_module, attr_name)
 
 
-# TODO add logging for each step, saved by the tag name which is team-task-trail
+def confirm(msg: str) -> bool:
+ """
+ Ask user to confirm. Return True if user types 'y' (case-insensitive),
+ False for anything else (including empty input).
+ """
+ try:
+ answer = input(f"{msg} [y/N]: ").strip().lower()
+ except (EOFError, KeyboardInterrupt):
+ print("\nCancelled.")
+ return False
+ return answer in ("", "y")
+
+
+def get_instruction() -> int:
+ try:
+ import json
+
+ instruction_lst = json.load(open("challenge/onsite_competition/instructions.json"))
+ print("Available instructions:")
+ for i, item in enumerate(instruction_lst):
+ print(f"{i}: {item['instruction_title']}")
+ answer = input("input instruction id: ").strip().lower()
+ answer = int(answer)
+ except (EOFError, KeyboardInterrupt):
+ print("\nCancelled.")
+ sys.exit()
+ return instruction_lst[answer]['instruction'][0]
+
+
+def action_to_word(action: int) -> str:
+ action = max(0, min(3, action))
+ wl = ["stand still", "move forward", "turn left", "turn right"]
+ return wl[action]
+
+
 def main():
  args = parse_args()
  print("--- Loading config from:", args.config, "---")
- evaluator_cfg = load_eval_cfg(args.config, attr_name='eval_cfg')
- cfg = get_config(evaluator_cfg)
- print(cfg)
+ cfg = load_eval_cfg(args.config, attr_name='eval_cfg')
+ agent_cfg = cfg.agent
 
- # initialize user agent
- agent = AgentClient(cfg.agent)
+ # initialize user's agent
+ agent = AgentClient(agent_cfg)
 
  # initialize real world env
- env = RealWorldEnv()
+ env = RealWorldEnv(fps=30, duration=0.1, distance=0.3, angle=15, move_speed=0.5, turn_speed=0.5)
+ env.reverse() # reverse move direction if using a rear camera
+ env.step(0)
+ obs = env.get_observation()
 
  # start stream
- start_env(env)
- app.run(host="0.0.0.0", port=8080, threaded=True)
+ print("--- start running steam app ---")
+ run(env=env)
+
+ while True:
+ instruction = get_instruction()
+ print("\nNew instruction:", instruction)
+ set_instruction(instruction)
 
- try:
  while True:
  # print("get observation...")
  # obs contains {rgb, depth, instruction}
  obs = env.get_observation()
- obs["instruction"] = args.instruction
+ # print(obs)
+ obs["instruction"] = instruction
 
- # print("agent step...")
+ print("agent step...")
  # action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent)
- try:
- action = agent.step(obs)[0]['action'][0]
- print(f"agent step success, action is {action}")
- except Exception as e:
- print(f"agent step error {e}")
- continue
-
- # print("env step...")
- try:
+ action = agent.step([obs])[0]['action'][0]
+ print("agent step success, action:", action)
+
+ if args.uninteractive_mode or confirm(f"Execute this action [{action_to_word(action)}]?"):
+ print("env step...")
  env.step(action)
  print("env step success")
- except Exception as e:
- print(f"env step error {e}")
- continue
- finally:
- env.close()
+ else:
+ print("Stop requested. Exiting loop.")
+ print("agent reset...")
+ agent.reset()
+ break
+
+
+if __name__ == "__main__":
+ main()