optidex/setup-qwen3-llm8850.sh at master · dtseng123/optidex · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/bin/bash
# =============================================================================
# Qwen3:1.7B on LLM8850 Setup for Optidex
# =============================================================================
# Runs the Qwen3 language model on the LLM8850 AI accelerator (port 8000).
# Chat will use this instead of Ollama when LLM_SERVER=LLM8850.
#
# Based on: https://github.com/PiSugar/whisplay-ai-chatbot/wiki/LLM8850-Integration
# Model: https://huggingface.co/AXERA-TECH/Qwen3-1.7B
# =============================================================================

set -e

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

echo -e "${BLUE}============================================${NC}"
echo -e "${BLUE}  Qwen3:1.7B on LLM8850 Setup for Optidex  ${NC}"
echo -e "${BLUE}============================================${NC}"
echo ""

# Prerequisites
echo -e "${GREEN}[1/4] Checking prerequisites...${NC}"
sudo apt update
sudo apt install -y python3-pip
pip install transformers jinja2 --break-system-packages 2>/dev/null || pip install transformers jinja2

# Clone model repo (uses Git LFS for model files)
QWEN_DIR="$HOME/Qwen3-1.7B"
if [ -d "$QWEN_DIR" ]; then
    echo "Qwen3 directory exists, updating..."
    cd "$QWEN_DIR"
    git pull || true
    git lfs pull 2>/dev/null || true
else
    echo "Cloning Qwen3-1.7B from Hugging Face..."
    cd "$HOME"
    git lfs install 2>/dev/null || true
    git clone https://huggingface.co/AXERA-TECH/Qwen3-1.7B
    cd "$QWEN_DIR"
    git lfs pull
fi

# Make binary executable
echo -e "${GREEN}[2/4] Setting up binary...${NC}"
chmod +x main_api_axcl_aarch64 2>/dev/null || true

# Create serve.sh
echo -e "${GREEN}[3/4] Creating serve.sh...${NC}"
PORT=12300
WORKDIR="$(pwd)"
SYSTEM_PROMPT="${SYSTEM_PROMPT:-You are a helpful assistant. Keep responses concise.}"

cat > "$WORKDIR/serve.sh" << 'SERVEEOF'
#!/bin/bash
PORT=12300
working_dir=$(pwd)
cd "$working_dir"
echo "Working directory: $working_dir"
echo "Starting tokenizer server on port $PORT..."
python3 qwen3_tokenizer_uid.py --port $PORT &
sleep 8
echo "Starting main API application (Qwen3 on LLM8850)..."
./main_api_axcl_aarch64 \
  --system_prompt "You are a helpful assistant. Keep responses concise." \
  --template_filename_axmodel "qwen3-1.7b-ax650/qwen3_p128_l%d_together.axmodel" \
  --axmodel_num 28 \
  --url_tokenizer_model "http://127.0.0.1:$PORT" \
  --filename_post_axmodel qwen3-1.7b-ax650/qwen3_post.axmodel \
  --filename_tokens_embed qwen3-1.7b-ax650/model.embed_tokens.weight.bfloat16.bin \
  --tokens_embed_num 151936 \
  --tokens_embed_size 2048 \
  --use_mmap_load_embed 1 \
  --devices 0
pkill -f "python3 qwen3_tokenizer_uid.py --port $PORT" 2>/dev/null || true
exit 0
SERVEEOF

# Fix template path if filenames differ (some repos use qwen3_p128_l%d_together.axmodel)
if [ ! -d "$QWEN_DIR/qwen3-1.7b-ax650" ]; then
    echo -e "${YELLOW}Note: Check model folder name (e.g. qwen3-1.7b-ax650) and update serve.sh paths if needed.${NC}"
    ls -la "$QWEN_DIR"
fi

chmod +x "$WORKDIR/serve.sh"

# Create systemd service
echo -e "${GREEN}[4/4] Creating systemd service...${NC}"
SERVICE_NAME="qwen3-llm8850"
SERVICE_FILE="/etc/systemd/system/$SERVICE_NAME.service"
sudo tee "$SERVICE_FILE" > /dev/null << EOF
[Unit]
Description=Qwen3 LLM8850 Service (port 8000)
After=network.target

[Service]
Type=simple
User=$USER
WorkingDirectory=$WORKDIR
ExecStart=$WORKDIR/serve.sh
Restart=on-failure
RestartSec=10

[Install]
WantedBy=multi-user.target
EOF

sudo systemctl daemon-reload

echo ""
echo -e "${BLUE}============================================${NC}"
echo -e "${GREEN}  Qwen3 on LLM8850 setup complete!${NC}"
echo -e "${BLUE}============================================${NC}"
echo ""
echo "Start the service:"
echo "  sudo systemctl start qwen3-llm8850"
echo "  sudo systemctl enable qwen3-llm8850  # optional: start on boot"
echo ""
echo "Then set in optidex .env:"
echo "  LLM_SERVER=LLM8850"
echo "  LLM8850_LLM_HOST=http://localhost:8000"
echo ""
echo "Note: If the model path in serve.sh does not match your repo (e.g. qwen3-1.7b-ax650), edit $WORKDIR/serve.sh."
echo "Check M5Stack docs: https://docs.m5stack.com/en/guide/ai_accelerator/llm-8850/m5_llm_8850_qwen3_1.7b"