Skip to content

Commit 6641ff1

Browse files
author
liujialai
committed
[TENT][Sunrise] Add sunrise_link transport, platform support, and UT coverage
Integrate Sunrise platform/transport wiring across TENT runtime and examples, add SunriseLink end-to-end unit tests, and fix RDMA error logging pointer formatting to avoid crash during registration failure paths. Made-with: Cursor
1 parent 738b375 commit 6641ff1

23 files changed

Lines changed: 2200 additions & 125 deletions

File tree

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,16 @@ cmake-build-relwithdebinfo
190190
cmake-build-minsizerel
191191
cmake-build
192192

193+
# Out-of-tree / developer-local CMake build directories
194+
build-sunrise/
195+
build-mc-te/
196+
197+
# Local tooling (machine-specific; not part of upstream sources)
198+
.clangd
199+
200+
# Temporary local notes (e.g. PR description drafts); keep out of version control
201+
/tmp_sunrise_pr_description.md
202+
193203
libetcd_wrapper.h
194204

195205
mooncake-wheel/mooncake/allocator.py
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Sunrise Link Transport
2+
3+
## 概述
4+
Sunrise Link 是 Mooncake TENT 传输框架中的一个 GPU 传输后端。它通过 Tang Runtime(`tangrt`)提供设备内存分配、指针属性查询、Peer Copy 与 IPC 句柄相关能力,并在 TENT 中以 `SUNRISE_LINK` 传输类型注册。
5+
6+
在运行时,当编译开启 `USE_SUNRISE` 且配置项 `transports/sunrise_link/enable=true` 时,`TransferEngineImpl` 会加载 Sunrise Link Transport。
7+
8+
---
9+
10+
## 新增依赖
11+
Sunrise Link Transport 在 Mooncake 基础依赖外,额外依赖 Tang Runtime:
12+
13+
- **头文件路径**`/usr/local/tangrt/include`
14+
- **库路径**`/usr/local/tangrt/lib/linux-x86_64`
15+
- **动态库**`libtangrt_shared.so`(运行时按默认安装路径加载)
16+
17+
建议确认 `tangrt``ptml` 相关运行库可被动态链接器找到(例如通过 `LD_LIBRARY_PATH` 或系统库路径配置)。
18+
19+
---
20+
21+
## 构建与编译
22+
23+
**前置条件**
24+
25+
- 已安装可用的 Tang Runtime(默认安装到 `/usr/local/tangrt`
26+
- 编译环境可访问 Mooncake 及其基础依赖
27+
28+
**CMake 配置**
29+
30+
```bash
31+
# 克隆 Mooncake 仓库
32+
git clone https://github.com/kvcache-ai/Mooncake.git
33+
cd Mooncake
34+
35+
# 启用 TENT + Sunrise Link
36+
mkdir build && cd build
37+
cmake .. -DUSE_TENT=ON -DUSE_SUNRISE=ON
38+
39+
# 编译
40+
make -j$(nproc)
41+
```
42+
43+
---
44+
45+
## 运行与测试
46+
47+
`transfer_engine_bench` 支持 `sunrise_link` 协议,可用于基本连通性与性能验证。
48+
49+
```bash
50+
# 终端 1:目标端(Target)
51+
./transfer_engine_bench \
52+
--mode=target \
53+
--protocol=sunrise_link \
54+
--local_server_name=10.0.0.2 \
55+
--metadata_server=P2PHANDSHAKE \
56+
--gpu_id=0
57+
58+
# 终端 2:发起端(Initiator)
59+
./transfer_engine_bench \
60+
--mode=initiator \
61+
--protocol=sunrise_link \
62+
--metadata_server=P2PHANDSHAKE \
63+
--segment_id=10.0.0.2:$PORT \
64+
--gpu_id=0 \
65+
--block_size=8388608 \
66+
--batch_size=32
67+
```
68+
69+
> 说明:当 `metadata_server=P2PHANDSHAKE` 时,目标端实际监听端口可能为动态分配端口。请以目标端日志中打印的实际端口替换 `--segment_id` 中的 `$PORT`
70+
71+
---
72+
73+
## 配置项说明(TENT)
74+
75+
Sunrise Link 支持通过配置文件调整行为,常用项包括:
76+
77+
- `transports/sunrise_link/enable`:是否启用该传输(默认 `true`
78+
- `transports/sunrise_link/async_memcpy_threshold`:异步拷贝阈值(单位 MiB)
79+
80+
可根据业务负载与设备拓扑微调上述参数。
Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,65 @@
11
set(WORKSPACE "${CMAKE_CURRENT_SOURCE_DIR}")
22

3-
if (USE_HIP)
3+
if(USE_HIP)
44
file(GLOB EXAMPLE_SOURCES "*.cpp")
55
hipify_files(EXAMPLE_SOURCES)
66

7-
file(RELATIVE_PATH EXAMPLE_REL_PATH "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
7+
file(RELATIVE_PATH EXAMPLE_REL_PATH "${CMAKE_SOURCE_DIR}"
8+
"${CMAKE_CURRENT_SOURCE_DIR}")
89
set(WORKSPACE "${CMAKE_BINARY_DIR}/${EXAMPLE_REL_PATH}")
910
endif()
1011

1112
add_executable(transfer_engine_bench ${WORKSPACE}/transfer_engine_bench.cpp)
1213
target_link_libraries(transfer_engine_bench PUBLIC transfer_engine)
13-
if (USE_TENT)
14-
target_link_libraries(transfer_engine_bench PUBLIC tent)
15-
target_compile_definitions(transfer_engine_bench PRIVATE USE_TENT)
14+
if(USE_TENT)
15+
target_link_libraries(transfer_engine_bench PUBLIC tent)
16+
target_compile_definitions(transfer_engine_bench PRIVATE USE_TENT)
1617
endif()
1718

18-
add_executable(transfer_engine_validator ${WORKSPACE}/transfer_engine_validator.cpp)
19+
add_executable(transfer_engine_sunrise_bench
20+
${WORKSPACE}/transfer_engine_sunrise_bench.cpp)
21+
target_link_libraries(transfer_engine_sunrise_bench PUBLIC transfer_engine)
22+
if(USE_TENT)
23+
target_link_libraries(transfer_engine_sunrise_bench PUBLIC tent)
24+
target_compile_definitions(transfer_engine_sunrise_bench PRIVATE USE_TENT)
25+
endif()
26+
27+
add_executable(transfer_engine_validator
28+
${WORKSPACE}/transfer_engine_validator.cpp)
1929
target_link_libraries(transfer_engine_validator PUBLIC transfer_engine)
2030

21-
add_executable(transfer_engine_bench_with_notify ${WORKSPACE}/transfer_engine_bench_with_notify.cpp)
31+
add_executable(transfer_engine_bench_with_notify
32+
${WORKSPACE}/transfer_engine_bench_with_notify.cpp)
2233
target_link_libraries(transfer_engine_bench_with_notify PUBLIC transfer_engine)
2334

2435
add_executable(memory_pool ${WORKSPACE}/memory_pool.cpp)
2536
target_link_libraries(memory_pool PUBLIC transfer_engine)
2637

27-
if (USE_ASCEND)
28-
add_executable(transfer_engine_ascend_one_sided ${WORKSPACE}/transfer_engine_ascend_one_sided.cpp)
29-
target_link_libraries(transfer_engine_ascend_one_sided PUBLIC transfer_engine)
38+
if(USE_ASCEND)
39+
add_executable(transfer_engine_ascend_one_sided
40+
${WORKSPACE}/transfer_engine_ascend_one_sided.cpp)
41+
target_link_libraries(transfer_engine_ascend_one_sided PUBLIC transfer_engine)
3042

31-
add_executable(transfer_engine_ascend_perf ${WORKSPACE}/transfer_engine_ascend_perf.cpp)
32-
target_link_libraries(transfer_engine_ascend_perf PUBLIC transfer_engine)
43+
add_executable(transfer_engine_ascend_perf
44+
${WORKSPACE}/transfer_engine_ascend_perf.cpp)
45+
target_link_libraries(transfer_engine_ascend_perf PUBLIC transfer_engine)
3346
endif()
3447

35-
if (USE_ASCEND_DIRECT)
36-
add_executable(transfer_engine_ascend_direct_perf ${WORKSPACE}/transfer_engine_ascend_direct_perf.cpp)
37-
target_link_libraries(transfer_engine_ascend_direct_perf PUBLIC ascendcl transfer_engine)
48+
if(USE_ASCEND_DIRECT)
49+
add_executable(transfer_engine_ascend_direct_perf
50+
${WORKSPACE}/transfer_engine_ascend_direct_perf.cpp)
51+
target_link_libraries(transfer_engine_ascend_direct_perf
52+
PUBLIC ascendcl transfer_engine)
3853
endif()
3954

40-
if (USE_ASCEND_HETEROGENEOUS)
41-
add_executable(transfer_engine_heterogeneous_ascend_perf_initiator ${WORKSPACE}/transfer_engine_heterogeneous_ascend_perf_initiator.cpp)
42-
target_link_libraries(transfer_engine_heterogeneous_ascend_perf_initiator PUBLIC transfer_engine)
55+
if(USE_ASCEND_HETEROGENEOUS)
56+
add_executable(
57+
transfer_engine_heterogeneous_ascend_perf_initiator
58+
${WORKSPACE}/transfer_engine_heterogeneous_ascend_perf_initiator.cpp)
59+
target_link_libraries(transfer_engine_heterogeneous_ascend_perf_initiator
60+
PUBLIC transfer_engine)
4361
endif()
4462

45-
if (USE_UBSHMEM)
46-
target_link_libraries(transfer_engine_bench PUBLIC transfer_engine)
47-
endif()
63+
if(USE_UBSHMEM)
64+
target_link_libraries(transfer_engine_bench PUBLIC transfer_engine)
65+
endif()

mooncake-transfer-engine/example/transfer_engine_bench.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#endif
4444

4545
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
46-
defined(USE_MACA) || defined(USE_UBSHMEM)
46+
defined(USE_MACA) || defined(USE_UBSHMEM) || defined(USE_SUNRISE)
4747
#include <cassert>
4848

4949
#if defined(USE_MNNVL) || defined(USE_UBSHMEM)
@@ -88,7 +88,8 @@ DEFINE_string(mode, "initiator",
8888
DEFINE_string(operation, "read", "Operation type: read or write");
8989

9090
DEFINE_string(protocol, "rdma",
91-
"Transfer protocol: rdma|barex|tcp|efa|nvlink|nvlink_intra|hip");
91+
"Transfer protocol: "
92+
"rdma|barex|tcp|efa|nvlink|nvlink_intra|hip|sunrise_link");
9293

9394
DEFINE_string(device_name, "mlx5_2",
9495
"Device name to use, valid if protocol=rdma");
@@ -107,7 +108,7 @@ DEFINE_uint32(report_precision, 2, "Report precision");
107108
DEFINE_string(backend, "classic", "Backend to use: classic|tent");
108109

109110
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
110-
defined(USE_MACA) || defined(USE_UBSHMEM)
111+
defined(USE_MACA) || defined(USE_UBSHMEM) || defined(USE_SUNRISE)
111112
DEFINE_bool(use_vram, true, "Allocate memory from GPU/NPU VRAM");
112113
DEFINE_bool(init_mem, true, "Initialize allocated memory");
113114
DEFINE_int32(gpu_id, 0,
@@ -119,7 +120,7 @@ using namespace mooncake;
119120
static void *allocateMemoryPool(size_t size, int buffer_id,
120121
bool from_vram = false) {
121122
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
122-
defined(USE_MACA) || defined(USE_UBSHMEM)
123+
defined(USE_MACA) || defined(USE_UBSHMEM) || defined(USE_SUNRISE)
123124
if (from_vram) {
124125
int gpu_id;
125126
if (FLAGS_gpu_id == -1) {
@@ -190,7 +191,7 @@ static void *allocateMemoryPool(size_t size, int buffer_id,
190191

191192
static void freeMemoryPool(void *addr, size_t size) {
192193
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
193-
defined(USE_MACA) || defined(USE_UBSHMEM)
194+
defined(USE_MACA) || defined(USE_UBSHMEM) || defined(USE_SUNRISE)
194195
if (FLAGS_protocol == "nvlink" || FLAGS_protocol == "hip") {
195196
#ifdef USE_MNNVL
196197
if (FLAGS_use_vram) {
@@ -214,6 +215,10 @@ static void freeMemoryPool(void *addr, size_t size) {
214215
#endif
215216
} else {
216217
#ifndef USE_UBSHMEM
218+
if (!FLAGS_use_vram) {
219+
numa_free(addr, size);
220+
return;
221+
}
217222
// check pointer on GPU
218223
cudaPointerAttributes attributes;
219224
checkCudaError(cudaPointerGetAttributes(&attributes, addr),
@@ -271,10 +276,21 @@ static inline std::string calculateRate(uint64_t data_bytes, double duration) {
271276
volatile bool running = true;
272277
std::atomic<size_t> total_batch_count(0);
273278

279+
// Ensure each worker thread has a valid GPU context before issuing transfers.
280+
static inline void setWorkerDeviceIfNeeded() {
281+
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
282+
defined(USE_MACA) || defined(USE_SUNRISE)
283+
if (FLAGS_use_vram && FLAGS_gpu_id >= 0) {
284+
checkCudaError(cudaSetDevice(FLAGS_gpu_id),
285+
"Failed to set device in worker");
286+
}
287+
#endif
288+
}
289+
274290
// Common helper to determine buffer count based on GPU/NUMA configuration
275291
static int determineBufferCount() {
276292
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
277-
defined(USE_MACA)
293+
defined(USE_MACA) || defined(USE_SUNRISE)
278294
if (FLAGS_use_vram) {
279295
int gpu_num;
280296
LOG(INFO) << "VRAM is used";
@@ -305,7 +321,7 @@ static std::vector<void *> allocateBuffers() {
305321
buffer_num = determineBufferCount();
306322
std::vector<void *> addr(buffer_num);
307323
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
308-
defined(USE_MACA) || defined(USE_UBSHMEM)
324+
defined(USE_MACA) || defined(USE_UBSHMEM) || defined(USE_SUNRISE)
309325
for (int i = 0; i < buffer_num; ++i) {
310326
addr[i] = allocateMemoryPool(FLAGS_buffer_size, i, FLAGS_use_vram);
311327
}
@@ -328,7 +344,7 @@ static void freeBuffers(std::vector<void *> &addr) {
328344
// Helper to get location name for classic backend
329345
static std::string getLocationName(int buffer_id) {
330346
#if defined(USE_CUDA) || defined(USE_MUSA) || defined(USE_HIP) || \
331-
defined(USE_MACA) || defined(USE_UBSHMEM)
347+
defined(USE_MACA) || defined(USE_UBSHMEM) || defined(USE_SUNRISE)
332348
if (FLAGS_use_vram) {
333349
int name_suffix = (FLAGS_gpu_id == -1) ? buffer_id : FLAGS_gpu_id;
334350
return std::string(GPU_PREFIX) + std::to_string(name_suffix);
@@ -476,7 +492,8 @@ static Transport *installTransportFromFlags(TransferEngine *engine) {
476492
xport = engine->installTransport("efa", nullptr);
477493
} else if (FLAGS_protocol == "tcp" || FLAGS_protocol == "nvlink" ||
478494
FLAGS_protocol == "hip" || FLAGS_protocol == "nvlink_intra" ||
479-
FLAGS_protocol == "ubshmem") {
495+
FLAGS_protocol == "ubshmem" ||
496+
FLAGS_protocol == "sunrise_link") {
480497
xport = engine->installTransport(FLAGS_protocol.c_str(), nullptr);
481498
} else {
482499
LOG(ERROR) << "Unsupported protocol: " << FLAGS_protocol;
@@ -644,6 +661,7 @@ void initiatorWorker(mooncake::tent::TransferEngine *engine,
644661
void *addr,
645662
const mooncake::tent::SegmentInfo &segment_info) {
646663
bindToSocket(thread_id % NR_SOCKETS);
664+
setWorkerDeviceIfNeeded();
647665
mooncake::tent::Request::OpCode opcode;
648666
if (FLAGS_operation == "read")
649667
opcode = mooncake::tent::Request::READ;

mooncake-transfer-engine/include/cuda_alike.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
#include "gpu_vendor/ubshmem.h"
1414
#elif defined(USE_MACA)
1515
#include "gpu_vendor/maca.h"
16+
#elif defined(USE_SUNRISE)
17+
#include "gpu_vendor/sunrise.h"
1618
#endif
1719

1820
#if !defined(USE_HIP) && !defined(USE_MUSA) && !defined(USE_MLU) && \
19-
!defined(USE_UBSHMEM) && !defined(USE_MACA)
21+
!defined(USE_UBSHMEM) && !defined(USE_MACA) && !defined(USE_SUNRISE)
2022
#include <string>
2123
const static std::string GPU_PREFIX = "cuda:";
2224
#endif
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
3+
#include <string>
4+
5+
#include <tang_runtime_api.h>
6+
7+
// SunriseLink uses CUDA-style location naming in metadata.
8+
const static std::string GPU_PREFIX = "cuda:";
9+
10+
// Minimal CUDA-like API shim for benchmark paths.
11+
#define cudaError_t tangError_t
12+
#define cudaSuccess tangSuccess
13+
#define cudaGetErrorString tangGetErrorString
14+
15+
#define cudaSetDevice tangSetDevice
16+
#define cudaGetDeviceCount tangGetDeviceCount
17+
18+
#define cudaMalloc tangMalloc
19+
#define cudaFree tangFree
20+
#define cudaMemset tangMemset
21+
22+
#define cudaPointerAttributes tangPointerAttributes
23+
#define cudaPointerGetAttributes tangPointerGetAttributes
24+
#define cudaMemoryTypeDevice tangMemoryTypeDevice
25+
#define cudaMemoryTypeHost tangMemoryTypeHost
26+
#define cudaMemoryTypeUnregistered tangMemoryTypeUnregistered
27+
28+
#define cudaStreamSynchronize tangStreamSynchronize

mooncake-transfer-engine/tent/include/tent/common/types.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,10 @@ enum TransportType {
7979
IOURING,
8080
TCP,
8181
AscendDirect,
82+
SUNRISE_LINK,
8283
UNSPEC
8384
};
84-
const static int kSupportedTransportTypes = 8;
85+
const static int kSupportedTransportTypes = (int)TransportType::UNSPEC;
8586

8687
struct MemoryOptions {
8788
Location location = kWildcardLocation;
@@ -107,4 +108,4 @@ struct SegmentInfo {
107108
} // namespace tent
108109
} // namespace mooncake
109110

110-
#endif // TENT_TYPES_H
111+
#endif // TENT_TYPES_H

0 commit comments

Comments
 (0)