Download the client test script, ensuring that the PC and the LLM630 Compute Kit are on the same network segment. Run the script, passing in the device IP address as a parameter.
python .\llm-qwen2.5-1B.py --host 192.168.20.24 Setup LLM... Setup LLM finished. import socket import json import argparse def create_tcp_connection(host, port): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) return sock def send_json(sock, data): json_data = json.dumps(data, ensure_ascii=False) + '\n' sock.sendall(json_data.encode('utf-8')) def receive_response(sock): response = '' while True: part = sock.recv(4096).decode('utf-8') response += part if '\n' in response: break return response.strip() def close_connection(sock): if sock: sock.close() def create_init_data(): return { "request_id": "llm_001", "work_id": "llm", "action": "setup", "object": "llm.setup", "data": { "model": "qwen2.5-0.5B-prefill-20e", "response_format": "llm.utf-8.stream", "input": "llm.utf-8.stream", "enoutput": True, "max_token_len": 1023, "prompt": "You are a knowledgeable assistant capable of answering various questions and providing information." } } def parse_setup_response(response_data, sent_request_id): error = response_data.get('error') request_id = response_data.get('request_id') if request_id != sent_request_id: print(f"Request ID mismatch: sent {sent_request_id}, received {request_id}") return None if error and error.get('code') != 0: print(f"Error Code: {error['code']}, Message: {error['message']}") return None return response_data.get('work_id') def setup(sock, init_data): sent_request_id = init_data['request_id'] send_json(sock, init_data) response = receive_response(sock) response_data = json.loads(response) return parse_setup_response(response_data, sent_request_id) def exit_session(sock, deinit_data): send_json(sock, deinit_data) response = receive_response(sock) response_data = json.loads(response) print("Exit Response:", response_data) def parse_inference_response(response_data): error = response_data.get('error') if error and error.get('code') != 0: print(f"Error Code: {error['code']}, Message: {error['message']}") return None return response_data.get('data') def main(host, port): sock = create_tcp_connection(host, port) try: print("Setup LLM...") init_data = create_init_data() llm_work_id = setup(sock, init_data) print("Setup LLM finished.") while True: user_input = input("Enter your message (or 'exit' to quit): ") if user_input.lower() == 'exit': break send_json(sock, { "request_id": "llm_001", "work_id": llm_work_id, "action": "inference", "object": "llm.utf-8.stream", "data": { "delta": user_input, "index": 0, "finish": True } }) while True: response = receive_response(sock) response_data = json.loads(response) data = parse_inference_response(response_data) if data is None: break delta = data.get('delta') finish = data.get('finish') print(delta, end='', flush=True) if finish: print() break exit_session(sock, { "request_id": "llm_exit", "work_id": llm_work_id, "action": "exit" }) finally: close_connection(sock) if __name__ == "__main__": parser = argparse.ArgumentParser(description='TCP Client to send JSON data.') parser.add_argument('--host', type=str, default='localhost', help='Server hostname (default: localhost)') parser.add_argument('--port', type=int, default=10001, help='Server port (default: 10001)') args = parser.parse_args() main(args.host, args.port) Enter your message (or 'exit' to quit): who are you? I am a large language model created by Alibaba Cloud. I am called Qwen. I am designed to assist with a wide range of tasks, from simple questions to complex problem-solving. I can answer a wide variety of questions and provide information on various topics. I can also help with language translation, writing, and more. My goal is to be a helpful and informative assistant to those who use me.