-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathswap_copy.zig
More file actions
91 lines (74 loc) Β· 3.06 KB
/
swap_copy.zig
File metadata and controls
91 lines (74 loc) Β· 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/// cuBLAS SWAP + COPY Example
///
/// SWAP: Exchange two vectors in-place
/// COPY: Copy one vector to another
///
/// Reference: CUDALibrarySamples/cuBLAS/Level-1/swap + Level-1/copy
const std = @import("std");
const cuda = @import("zcuda");
pub fn main() !void {
std.debug.print("=== cuBLAS SWAP + COPY Example ===\n\n", .{});
const ctx = try cuda.driver.CudaContext.new(0);
defer ctx.deinit();
const stream = ctx.defaultStream();
const blas = try cuda.cublas.CublasContext.init(ctx);
defer blas.deinit();
const n: i32 = 5;
const a_data = [_]f32{ 1.0, 2.0, 3.0, 4.0, 5.0 };
const b_data = [_]f32{ 10.0, 20.0, 30.0, 40.0, 50.0 };
// --- SCOPY: y = x ---
std.debug.print("βββ SCOPY: y = x βββ\n", .{});
{
const d_x = try stream.cloneHtoD(f32, &a_data);
defer d_x.deinit();
const d_y = try stream.cloneHtoD(f32, &b_data);
defer d_y.deinit();
std.debug.print(" Before: x = [ ", .{});
for (&a_data) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("] y = [ ", .{});
for (&b_data) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("]\n", .{});
try blas.scopy(n, d_x, d_y);
var h_y: [5]f32 = undefined;
try stream.memcpyDtoH(f32, &h_y, d_y);
std.debug.print(" After: y = [ ", .{});
for (&h_y) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("]\n", .{});
for (&a_data, &h_y) |expected, actual| {
if (@abs(expected - actual) > 1e-5) return error.ValidationFailed;
}
std.debug.print(" β y now contains a copy of x\n\n", .{});
}
// --- SSWAP: swap(x, y) ---
std.debug.print("βββ SSWAP: swap(x, y) βββ\n", .{});
{
const d_x = try stream.cloneHtoD(f32, &a_data);
defer d_x.deinit();
const d_y = try stream.cloneHtoD(f32, &b_data);
defer d_y.deinit();
std.debug.print(" Before: x = [ ", .{});
for (&a_data) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("] y = [ ", .{});
for (&b_data) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("]\n", .{});
try blas.sswap(n, d_x, d_y);
var h_x: [5]f32 = undefined;
var h_y: [5]f32 = undefined;
try stream.memcpyDtoH(f32, &h_x, d_x);
try stream.memcpyDtoH(f32, &h_y, d_y);
std.debug.print(" After: x = [ ", .{});
for (&h_x) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("] y = [ ", .{});
for (&h_y) |v| std.debug.print("{d:.0} ", .{v});
std.debug.print("]\n", .{});
// x should now have b's data, y should have a's data
for (&b_data, &h_x) |expected, actual| {
if (@abs(expected - actual) > 1e-5) return error.ValidationFailed;
}
for (&a_data, &h_y) |expected, actual| {
if (@abs(expected - actual) > 1e-5) return error.ValidationFailed;
}
std.debug.print(" β Vectors swapped successfully\n", .{});
}
std.debug.print("\nβ cuBLAS SWAP + COPY complete\n", .{});
}