Skip to content

Commit 6549c56

Browse files
authored
[stack-switching] Larger stack cache (#598)
1 parent 4937d39 commit 6549c56

4 files changed

Lines changed: 44 additions & 13 deletions

File tree

src/engine/Tuning.v3

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,8 @@ component SpcTuning {
6969
var inlineGlobalAccess = true; // enable inline access of (primitive) globals
7070
var disableMemoryBoundsChecks = false; // unsafe! don't emit bounds checks
7171
}
72+
73+
// Tuning settings for stack-switching (have no effect on correctness).
74+
component StackTuning {
75+
def stackCacheSize = 8; // number of stacks to allocate in a batch in the stack manager.
76+
}

src/engine/x86-64/Mmap.v3

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,7 @@ class MemoryRange {
4949
def range(offset: int, length: int) -> Range<byte> {
5050
return CiRuntime.forgeRange(this.start + offset, length);
5151
}
52+
def render(buf: StringBuilder) -> StringBuilder {
53+
return buf.put2("MemRange[0x%x, 0x%x)", start - Pointer.NULL, end - Pointer.NULL);
54+
}
5255
}

src/engine/x86-64/V3Offsets.v3

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class V3Offsets {
6363
def NativeWasmMemory_num_pages = int.view(Pointer.atField(mem.num_pages) - Pointer.atObject(mem));
6464

6565
def X86_64Runtime_curStack = Pointer.atField(RT.curStack);
66+
def X86_64StackManager_cache = Pointer.atField(X86_64StackManager.cache);
6667
def Interpreter_dispatchTable = Pointer.atField(I.dispatchTable);
6768
def X86_64Stack_version = int.view(Pointer.atField(vs.version) - Pointer.atObject(vs));
6869
def X86_64Stack_vsp = int.view(Pointer.atField(vs.vsp) - Pointer.atObject(vs));
@@ -73,6 +74,7 @@ class V3Offsets {
7374
def X86_64Stack_bottom = int.view(Pointer.atField(vs.cont_bottom) - Pointer.atObject(vs));
7475
def X86_64Stack_state = int.view(Pointer.atField(vs.state_) - Pointer.atObject(vs));
7576
def X86_64Stack_return_results = int.view(Pointer.atField(vs.return_results) - Pointer.atObject(vs));
77+
def X86_64Stack_next_stack = int.view(Pointer.atField(vs.next_stack) - Pointer.atObject(vs));
7678

7779
def WasmFunction_typeId = Pointer.atObject(wf).load<int>();
7880

src/engine/x86-64/X86_64Stack.v3

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ class X86_64Stack extends WasmStack {
2222
var return_results: Array<ValueType>;
2323
var state_: StackState;
2424

25+
// Single-linked list for stack manager cache.
26+
var next_stack: X86_64Stack;
27+
2528
new(size) {
2629
mapping = Target.mmap_reserve(size, Mmap.PROT_READ | Mmap.PROT_WRITE);
2730
if (mapping == null) fatal("out of memory allocating value stack");
@@ -769,8 +772,18 @@ def genStackReturnParentStub(ic: X86_64InterpreterCode, w: DataWriter) {
769772
masm.emit_mov_m_l(MasmAddr(r_stack, masm.offsets.X86_64Stack_parent), 0);
770773
// mov [%stack.parent_rsp_ptr], nullptr
771774
masm.emit_mov_m_l(MasmAddr(r_stack, masm.offsets.X86_64Stack_parent_rsp_ptr), 0);
775+
772776
// l_return:
773777
masm.bindLabel(l_return);
778+
779+
// recycle %stack (already set at this point)
780+
masm.emit_mov_m_m(
781+
ValueKind.REF,
782+
MasmAddr(r_stack, masm.offsets.X86_64Stack_next_stack),
783+
MasmAddr(Reg(0), int.!(masm.offsets.X86_64StackManager_cache - Pointer.NULL))
784+
);
785+
masm.emit_mov_m_r(ValueKind.REF, MasmAddr(Reg(0), int.!(masm.offsets.X86_64StackManager_cache - Pointer.NULL)), r_stack);
786+
774787
// mov [cur_stack], %parent
775788
masm.emit_set_curstack(r_parent);
776789
// pop %rsp
@@ -1153,29 +1166,37 @@ private class X86_64FrameWriter extends FrameWriter {
11531166
component X86_64StackManager {
11541167
var cache: X86_64Stack;
11551168

1156-
def getFreshStack() -> X86_64Stack {
1157-
var result: X86_64Stack;
1158-
if (cache == null) {
1159-
result = X86_64Stack.new(EngineOptions.STACK_SIZE.get());
1160-
} else {
1161-
result = cache;
1162-
cache = null;
1169+
def allocStackBatch() {
1170+
if (Trace.stack) Trace.OUT.put1("Batch allocating %d stacks", StackTuning.stackCacheSize).ln();
1171+
for (i < StackTuning.stackCacheSize) {
1172+
var curr = X86_64Stack.new(EngineOptions.STACK_SIZE.get());
1173+
curr.next_stack = cache;
1174+
cache = curr;
11631175
}
1164-
if (Trace.stack) Trace.OUT.put1(
1165-
"Requested fresh x86_stack @ 0x%x", Pointer.atObject(result) - Pointer.NULL
1166-
).ln();
1167-
return result;
11681176
}
1177+
1178+
def getFreshStack() -> X86_64Stack {
1179+
if (cache == null) allocStackBatch();
1180+
1181+
var result = cache;
1182+
cache = cache.next_stack;
1183+
result.next_stack = null;
1184+
1185+
if (Trace.stack) Trace.OUT.put1("Requested fresh x86_stack @ 0x%x", Pointer.atObject(result) - Pointer.NULL).ln();
1186+
return result.clear();
1187+
}
1188+
11691189
def recycleStack(stack: X86_64Stack) {
1170-
if (cache == null) cache = stack; // XXX: save the larger/smaller of the stacks?
1190+
stack.next_stack = cache;
1191+
cache = stack;
11711192
}
1193+
11721194
def runOnFreshStack(f: Function, args: Range<Value>) -> Result {
11731195
// Always run functions on a separate, fresh stack.
11741196
var prev = X86_64Runtime.curStack; // handle reentrancy
11751197
var stack = X86_64StackManager.getFreshStack();
11761198
var result = stack.reset(f).bind(args).resume();
11771199
X86_64Runtime.curStack = prev;
1178-
X86_64StackManager.recycleStack(stack.clear());
11791200
return result;
11801201
}
11811202
}

0 commit comments

Comments
 (0)