-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup-qwen3-llm8850.sh
More file actions
executable file
·127 lines (112 loc) · 4.14 KB
/
setup-qwen3-llm8850.sh
File metadata and controls
executable file
·127 lines (112 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/bin/bash
# =============================================================================
# Qwen3:1.7B on LLM8850 Setup for Optidex
# =============================================================================
# Runs the Qwen3 language model on the LLM8850 AI accelerator (port 8000).
# Chat will use this instead of Ollama when LLM_SERVER=LLM8850.
#
# Based on: https://github.com/PiSugar/whisplay-ai-chatbot/wiki/LLM8850-Integration
# Model: https://huggingface.co/AXERA-TECH/Qwen3-1.7B
# =============================================================================
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BLUE}============================================${NC}"
echo -e "${BLUE} Qwen3:1.7B on LLM8850 Setup for Optidex ${NC}"
echo -e "${BLUE}============================================${NC}"
echo ""
# Prerequisites
echo -e "${GREEN}[1/4] Checking prerequisites...${NC}"
sudo apt update
sudo apt install -y python3-pip
pip install transformers jinja2 --break-system-packages 2>/dev/null || pip install transformers jinja2
# Clone model repo (uses Git LFS for model files)
QWEN_DIR="$HOME/Qwen3-1.7B"
if [ -d "$QWEN_DIR" ]; then
echo "Qwen3 directory exists, updating..."
cd "$QWEN_DIR"
git pull || true
git lfs pull 2>/dev/null || true
else
echo "Cloning Qwen3-1.7B from Hugging Face..."
cd "$HOME"
git lfs install 2>/dev/null || true
git clone https://huggingface.co/AXERA-TECH/Qwen3-1.7B
cd "$QWEN_DIR"
git lfs pull
fi
# Make binary executable
echo -e "${GREEN}[2/4] Setting up binary...${NC}"
chmod +x main_api_axcl_aarch64 2>/dev/null || true
# Create serve.sh
echo -e "${GREEN}[3/4] Creating serve.sh...${NC}"
PORT=12300
WORKDIR="$(pwd)"
SYSTEM_PROMPT="${SYSTEM_PROMPT:-You are a helpful assistant. Keep responses concise.}"
cat > "$WORKDIR/serve.sh" << 'SERVEEOF'
#!/bin/bash
PORT=12300
working_dir=$(pwd)
cd "$working_dir"
echo "Working directory: $working_dir"
echo "Starting tokenizer server on port $PORT..."
python3 qwen3_tokenizer_uid.py --port $PORT &
sleep 8
echo "Starting main API application (Qwen3 on LLM8850)..."
./main_api_axcl_aarch64 \
--system_prompt "You are a helpful assistant. Keep responses concise." \
--template_filename_axmodel "qwen3-1.7b-ax650/qwen3_p128_l%d_together.axmodel" \
--axmodel_num 28 \
--url_tokenizer_model "http://127.0.0.1:$PORT" \
--filename_post_axmodel qwen3-1.7b-ax650/qwen3_post.axmodel \
--filename_tokens_embed qwen3-1.7b-ax650/model.embed_tokens.weight.bfloat16.bin \
--tokens_embed_num 151936 \
--tokens_embed_size 2048 \
--use_mmap_load_embed 1 \
--devices 0
pkill -f "python3 qwen3_tokenizer_uid.py --port $PORT" 2>/dev/null || true
exit 0
SERVEEOF
# Fix template path if filenames differ (some repos use qwen3_p128_l%d_together.axmodel)
if [ ! -d "$QWEN_DIR/qwen3-1.7b-ax650" ]; then
echo -e "${YELLOW}Note: Check model folder name (e.g. qwen3-1.7b-ax650) and update serve.sh paths if needed.${NC}"
ls -la "$QWEN_DIR"
fi
chmod +x "$WORKDIR/serve.sh"
# Create systemd service
echo -e "${GREEN}[4/4] Creating systemd service...${NC}"
SERVICE_NAME="qwen3-llm8850"
SERVICE_FILE="/etc/systemd/system/$SERVICE_NAME.service"
sudo tee "$SERVICE_FILE" > /dev/null << EOF
[Unit]
Description=Qwen3 LLM8850 Service (port 8000)
After=network.target
[Service]
Type=simple
User=$USER
WorkingDirectory=$WORKDIR
ExecStart=$WORKDIR/serve.sh
Restart=on-failure
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
echo ""
echo -e "${BLUE}============================================${NC}"
echo -e "${GREEN} Qwen3 on LLM8850 setup complete!${NC}"
echo -e "${BLUE}============================================${NC}"
echo ""
echo "Start the service:"
echo " sudo systemctl start qwen3-llm8850"
echo " sudo systemctl enable qwen3-llm8850 # optional: start on boot"
echo ""
echo "Then set in optidex .env:"
echo " LLM_SERVER=LLM8850"
echo " LLM8850_LLM_HOST=http://localhost:8000"
echo ""
echo "Note: If the model path in serve.sh does not match your repo (e.g. qwen3-1.7b-ax650), edit $WORKDIR/serve.sh."
echo "Check M5Stack docs: https://docs.m5stack.com/en/guide/ai_accelerator/llm-8850/m5_llm_8850_qwen3_1.7b"