Skip to content

Commit a87ebbf

Browse files
committed
runtime: Introduce the locals stack + use it for (inline) methods/blocks
This is a bit of a large commit because the changes are all interconnected. The main idea is that now, instead of getting locals from the activation object, we have a "locals stack" that is responsible for storing all of the locals that are used by the various activations. Every time a method is activated, the method needs to allocate some space on the locals stack. This space is the total count of all locals used by the method itself, and any inline methods/blocks defined within (as blocks/inline methods also have access to the method's locals). To calculate all of the locals required, AstGen now has the ObjectDescriptorLink struct which is weaved through code generation and calculates the local index for each local that's accessible from a given activation within a containing method. The sum is then recorded on the CreateMethod instruction, and when the method is activated, the locals stack gets the memory reserved. The GetLocal/PutLocal instructions now offset into the locals stack instead of using the activation object. In essence, we no longer use any of the assignable slot values of the activation object itself anymore. This gives us -15% wall clock time on the Fibonacci example, on top of the -18.5% wall clock time with the initial locals implementation, bringing the total performance boost of locals to -31% or 1.45x which is pretty huge. Note that because we still create the activation object, we currently have to copy the assignable and argument slot values to the locals stack, so there is still a lot of performance gains to be had. When constants are obtainable without going through the activation object, and the activation object is completely removed, the arguments will be used directly off the argument stack instead of being copied which should bring a further performance boost.
1 parent 2f2e09e commit a87ebbf

12 files changed

Lines changed: 411 additions & 186 deletions

File tree

src/runtime/Activation.zig

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,17 @@ target_location: bytecode.RegisterLocation,
3131
/// block's own activation object. Not used with methods, since we don't want to
3232
/// inherit previous activation objects in methods (that would make the language
3333
/// dynamically scoped :^).
34+
// TODO: To be removed when the activation object is removed, as we will no
35+
// longer perform lookups through the activation chain.
3436
parent_activation: ?ActivationRef = null,
3537
/// Will be used as the target activation that a non-local return needs to rise
3638
/// to. Must be non-null when a non-local return is encountered, and when
3739
/// non-null, must point to an activation where
3840
/// `nonlocal_return_target_activation` is null.
3941
nonlocal_return_target_activation: ?ActivationRef = null,
42+
/// The offset within the local stack where this activation's local variables
43+
/// start. "Local 0" is reserved for the receiver of the method or block.
44+
local_stack_offset: u32,
4045

4146
// --- Activation creation info ---
4247

@@ -64,6 +69,7 @@ pub fn initInPlace(
6469
target_location: bytecode.RegisterLocation,
6570
creator_message: ByteArray,
6671
created_from: SourceRange,
72+
local_stack_offset: u32,
6773
) void {
6874
self.* = .{
6975
.activation_id = newActivationID(),
@@ -72,6 +78,7 @@ pub fn initInPlace(
7278
.stack_snapshot = context.getVM().takeStackSnapshot(),
7379
.creator_message = creator_message,
7480
.created_from = created_from.copy(),
81+
.local_stack_offset = local_stack_offset,
7582
};
7683
}
7784

@@ -226,9 +233,14 @@ pub const ActivationStack = struct {
226233
);
227234
defer token.deinit();
228235

236+
// FIXME: Eventually top-level activations will have locals too, so this will be invalid.
237+
// We should instead receive method objects here, and reserve space in the actor's
238+
// locals stack.
239+
const local_stack_offset: u32 = @intCast(vm.current_actor.locals_stack.height());
240+
229241
const toplevel_context_method = try MethodObject.createTopLevelContextForExecutable(vm.allocator, &vm.heap, &token, new_executable, new_executable.value.getEntrypointBlock());
230242
const activation_slot = try self.getNewActivationSlot(vm.allocator);
231-
toplevel_context_method.activateMethod(&token, context.getActor().id, vm.lobby_object, &.{}, target_location, source_range, activation_slot);
243+
toplevel_context_method.activateMethod(&token, context.getActor().id, vm.lobby_object, &.{}, target_location, source_range, activation_slot, local_stack_offset);
232244
}
233245
};
234246

src/runtime/Actor.zig

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,18 @@ register_file: bytecode.lowcode.RegisterFile = .{},
5858
argument_stack: ArgumentStack = .{},
5959
saved_register_stack: Stack(SavedRegister, "Saved register stack", false) = .{},
6060

61+
/// The locals for each method activation on this actor. Each method activation
62+
/// stores its own set of locals here when activated.
63+
///
64+
/// For each method, the locals are laid out as follows:
65+
///
66+
/// [receiver][method local 0]...[method local N]
67+
///
68+
/// The total number of locals for a method is the maximum number of possible locals
69+
/// that can be used at any point during the method's execution, which is the
70+
/// deepest block local depth. Some locals may be unused at certain points during
71+
/// execution, and blocks can reuse locals from their parent blocks.
72+
locals_stack: Stack(Value, "Locals stack", false) = .{},
6173

6274
/// The currently active source range. This is updated by the source_range
6375
/// instruction.
@@ -81,6 +93,7 @@ pub const StackSnapshot = struct {
8193
argument_height: usize,
8294
argument_sentinel_height: ArgumentStack.SentinelIndex,
8395
saved_register_height: usize,
96+
locals_height: usize,
8497

8598
/// Bump just the argument stack height. This is necessary because the stack
8699
/// snapshot for an activation is created while the stack still contains the
@@ -210,6 +223,7 @@ fn init(self: *Actor, actor_object: ActorObject.Ptr) void {
210223
fn deinit(self: *Actor, allocator: Allocator) void {
211224
self.clearMailbox(allocator);
212225

226+
self.locals_stack.deinit(allocator);
213227
self.argument_stack.deinit(allocator);
214228
self.saved_register_stack.deinit(allocator);
215229

@@ -221,24 +235,39 @@ fn deinit(self: *Actor, allocator: Allocator) void {
221235

222236
pub fn activateMethod(
223237
self: *Actor,
238+
vm: *VirtualMachine,
224239
token: *heap.AllocationToken,
225240
method: MethodObject.Ptr,
241+
arguments: []const Value,
226242
target_location: bytecode.RegisterLocation,
227243
source_range: SourceRange,
228244
) !void {
229-
return try self.activateMethodWithContext(token, self.actor_object.get().context, method, target_location, source_range);
245+
return try self.activateMethodWithContext(vm, token, self.actor_object.get().context, method, arguments, target_location, source_range);
230246
}
231247

232248
pub fn activateMethodWithContext(
233249
self: *Actor,
250+
vm: *VirtualMachine,
234251
token: *heap.AllocationToken,
235252
actor_context: Value,
236253
method: MethodObject.Ptr,
254+
arguments: []const Value,
237255
target_location: bytecode.RegisterLocation,
238256
source_range: SourceRange,
239257
) !void {
258+
// HACK: Transitional code. Copy initial assignable slot values and
259+
// arguments into the new activation's local stack.
260+
const assignable_slot_count = method.getMap().getAssignableSlotCount();
261+
const argument_slot_count = method.getMap().getArgumentSlotCount();
262+
263+
// NOTE: +1 because local 0 is reserved for the receiver.
264+
const local_stack_offset = try self.locals_stack.reserveSpace(vm.allocator, @intCast(method.getMap().local_depth.get() + 1), vm.global_nil);
265+
self.locals_stack.allItems()[local_stack_offset] = actor_context;
266+
@memcpy(self.locals_stack.allItems()[local_stack_offset + 1 .. local_stack_offset + 1 + assignable_slot_count], method.getAssignableSlots());
267+
@memcpy(self.locals_stack.allItems()[local_stack_offset + 1 + assignable_slot_count .. local_stack_offset + 1 + assignable_slot_count + argument_slot_count], arguments);
268+
240269
const activation_slot = try self.activation_stack.getNewActivationSlot(context.getVM().allocator);
241-
method.activateMethod(token, self.id, actor_context, &.{}, target_location, source_range, activation_slot);
270+
method.activateMethod(token, self.id, actor_context, arguments, target_location, source_range, activation_slot, @intCast(local_stack_offset));
242271
}
243272

244273
pub fn pushContext(self: *Actor) void {
@@ -270,9 +299,7 @@ pub fn execute(self: *Actor) !ActorResult {
270299
defer token.deinit();
271300
method = message.method.get();
272301

273-
const actor_context = self.actor_object.get().context;
274-
const new_activation = try self.activation_stack.getNewActivationSlot(vm.allocator);
275-
method.activateMethod(&token, self.id, actor_context, message.arguments, .zero, message.source_range, new_activation);
302+
try self.activateMethod(vm, &token, method, message.arguments, .zero, message.source_range);
276303

277304
self.message_sender = message.sender;
278305

@@ -379,12 +406,14 @@ pub fn takeStackSnapshot(self: Actor) StackSnapshot {
379406
.argument_height = self.argument_stack.height(),
380407
.argument_sentinel_height = self.argument_stack.sentinelHeight(),
381408
.saved_register_height = self.saved_register_stack.height(),
409+
.locals_height = self.locals_stack.height(),
382410
};
383411
}
384412

385413
pub fn restoreStackSnapshot(self: *Actor, snapshot: StackSnapshot) void {
386414
self.argument_stack.restoreTo(snapshot.argument_height, snapshot.argument_sentinel_height);
387415
self.saved_register_stack.restoreTo(snapshot.saved_register_height, {});
416+
self.locals_stack.restoreTo(snapshot.locals_height, {});
388417
}
389418

390419
pub fn readRegister(self: Actor, location: bytecode.RegisterLocation) Value {
@@ -428,6 +457,10 @@ pub fn visitEdges(
428457
try visitor.visit(&saved_register.value, null);
429458
}
430459

460+
for (self.locals_stack.allItems()) |*local| {
461+
try visitor.visit(local, null);
462+
}
463+
431464
{
432465
var it = self.mailbox.first;
433466
while (it) |node| : (it = node.next) {
@@ -451,6 +484,7 @@ pub fn unwindStacks(self: *Actor) void {
451484

452485
self.argument_stack.restoreTo(0, if (ArgumentStackHasSentinel) 0 else {});
453486
self.saved_register_stack.restoreTo(0, {});
487+
self.locals_stack.restoreTo(0, {});
454488
}
455489

456490
pub fn putMessageInMailbox(

src/runtime/Interpreter.zig

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ pub fn execute(self: *Interpreter) Error!Actor.ActorResult {
311311
executable.value.getObjectDescriptor(payload.descriptor_index),
312312
payload.block_index,
313313
payload.is_inline,
314+
payload.local_depth,
314315
);
315316
self.vm.writeRegister(block.getTargetLocation(index), object.asValue());
316317

@@ -331,6 +332,7 @@ pub fn execute(self: *Interpreter) Error!Actor.ActorResult {
331332
executable,
332333
executable.value.getObjectDescriptor(payload.descriptor_index),
333334
payload.block_index,
335+
payload.method_local_offset,
334336
);
335337
self.vm.writeRegister(block.getTargetLocation(index), object.asValue());
336338

@@ -388,12 +390,9 @@ pub fn execute(self: *Interpreter) Error!Actor.ActorResult {
388390
const block = self.getCurrentBytecodeBlock();
389391
const index = self.getInstructionIndex();
390392

391-
const receiver: ActivationObject.Ptr = self.actor.activation_stack.getCurrent().activation_object.get();
392-
const local_index = block.getTypedPayload(index, .GetLocal).local_index;
393-
394-
// TODO: Transitional code. Eventually we will eliminate the activation
395-
// object entirely.
396-
const value = receiver.getAssignableSlotFromLocalIndex(local_index).*;
393+
const local_stack_offset = self.getCurrentActivation().local_stack_offset;
394+
const local_index = block.getTypedPayload(index, .GetLocal).local_index.get();
395+
const value = self.actor.locals_stack.allItems()[local_stack_offset + local_index];
397396
self.vm.writeRegister(block.getTargetLocation(index), value);
398397

399398
_ = self.getCurrentActivation().advanceInstruction();
@@ -410,15 +409,14 @@ pub fn execute(self: *Interpreter) Error!Actor.ActorResult {
410409
const receiver: ActivationObject.Ptr = self.actor.activation_stack.getCurrent().activation_object.get();
411410
const payload = block.getTypedPayload(index, .PutLocal);
412411

413-
// TODO: Transitional code. Eventually we will eliminate the activation
414-
// object entirely.
415-
const target_ptr = receiver.getAssignableSlotFromLocalIndex(payload.local_index);
412+
const local_stack_offset = self.getCurrentActivation().local_stack_offset;
413+
const local_index = payload.local_index.get();
416414
const value = self.vm.readRegister(payload.value_location);
417415

418416
// David will remember that.
419417
_ = try self.vm.heap.rememberObjectReference(receiver.asValue(), value);
420418

421-
target_ptr.* = value;
419+
self.actor.locals_stack.allItems()[local_stack_offset + local_index] = value;
422420
// Assignment messages return self!
423421
self.vm.writeRegister(block.getTargetLocation(index), receiver.asValue());
424422

@@ -747,6 +745,9 @@ fn executeBlock(
747745

748746
const parent_activation_object = block.getMap().parent_activation.get(self.actor.activation_stack).?.activation_object;
749747
const activation_slot = try self.actor.activation_stack.getNewActivationSlot(self.vm.allocator);
748+
749+
// The method contains the locals of the block.
750+
const method_stack_offset = block.getMap().nonlocal_return_target_activation.get(self.actor.activation_stack).?.local_stack_offset;
750751
block.activateBlock(
751752
&token,
752753
parent_activation_object.value,
@@ -755,9 +756,25 @@ fn executeBlock(
755756
try message_name.get(self.vm.allocator),
756757
source_range,
757758
activation_slot,
759+
method_stack_offset,
758760
);
761+
762+
// HACK: Transitional code. Copy initial assignable slot values and
763+
// arguments into the new activation's local stack.
764+
const assignable_slot_count = block.getMap().getAssignableSlotCount();
765+
const argument_slot_count = block.getMap().getArgumentSlotCount();
766+
767+
// Our locals start after the method's locals.
768+
// NOTE: +1 because local 0 is reserved for the receiver.
769+
const local_stack_offset = method_stack_offset + 1 + @as(u32, @intCast(block.getMap().method_local_offset.get()));
770+
@memcpy(self.actor.locals_stack.allItems()[local_stack_offset .. local_stack_offset + assignable_slot_count], block.getAssignableSlots());
771+
@memcpy(self.actor.locals_stack.allItems()[local_stack_offset + assignable_slot_count .. local_stack_offset + assignable_slot_count + argument_slot_count], arguments);
759772
}
760773

774+
// FIXME: While inline methods *are* methods in a technical sense, they are not
775+
// initialized like normal methods because they don't have their own
776+
// space on the locals stack. Perhaps they should be split into their own
777+
// type/function?
761778
fn executeMethod(
762779
self: *const Interpreter,
763780
const_receiver: Value,
@@ -795,8 +812,37 @@ fn executeMethod(
795812
}
796813
}
797814

815+
const local_stack_offset = if (const_method.getMap().isInlineMethod())
816+
// NOTE: Because an inline method is only visible from the method that
817+
// defines it, or any other blocks/inline methods defined within
818+
// it, the currently running activation MUST be one whose local
819+
// source is the method. Therefore, we can simply reuse the
820+
// current activation's local stack offset.
821+
self.getCurrentActivation().local_stack_offset
822+
else
823+
// NOTE: +1 because local 0 is reserved for the receiver.
824+
try self.actor.locals_stack.reserveSpace(self.vm.allocator, @intCast(method.getMap().local_depth.get() + 1), self.vm.global_nil);
825+
798826
const activation_slot = try self.actor.activation_stack.getNewActivationSlot(self.vm.allocator);
799-
method.activateMethod(&token, self.actor.id, receiver_of_method, arguments, target_location, source_range, activation_slot);
827+
method.activateMethod(&token, self.actor.id, receiver_of_method, arguments, target_location, source_range, activation_slot, @intCast(local_stack_offset));
828+
829+
// HACK: Transitional code. Copy initial assignable slot values and
830+
// arguments into the new activation's local stack.
831+
const assignable_slot_count = method.getMap().getAssignableSlotCount();
832+
const argument_slot_count = method.getMap().getArgumentSlotCount();
833+
834+
const copy_base = if (method.getMap().isInlineMethod())
835+
// For inline methods, the local_depth field is reinterpreted as
836+
// the offset on top of the local stack offset.
837+
local_stack_offset + 1 + method.getMap().local_depth.get()
838+
else
839+
local_stack_offset + 1;
840+
841+
if (!method.getMap().isInlineMethod()) {
842+
self.actor.locals_stack.allItems()[local_stack_offset] = receiver_of_method;
843+
}
844+
@memcpy(self.actor.locals_stack.allItems()[copy_base .. copy_base + assignable_slot_count], method.getAssignableSlots());
845+
@memcpy(self.actor.locals_stack.allItems()[copy_base + assignable_slot_count .. copy_base + assignable_slot_count + argument_slot_count], arguments);
800846
}
801847

802848
fn createObject(
@@ -825,6 +871,7 @@ fn createMethod(
825871
object_descriptor: bytecode.ObjectDescriptor,
826872
block_index: u32,
827873
is_inline: bool,
874+
local_depth: u32,
828875
) !MethodObject.Ptr {
829876
const slot_count: u16 = @intCast(object_descriptor.slots.len);
830877
const byte_array_required_memory = slot_count * ByteArray.requiredSizeForAllocation();
@@ -847,6 +894,7 @@ fn createMethod(
847894
method_name,
848895
block,
849896
executable,
897+
local_depth,
850898
);
851899
const method_object = MethodObject.create(&token, self.actor.id, method_map);
852900
try self.writeObjectSlots(MethodMap, &token, object_descriptor, method_map, method_object);
@@ -858,6 +906,7 @@ fn createBlock(
858906
executable: bytecode.Executable.Ref,
859907
object_descriptor: bytecode.ObjectDescriptor,
860908
block_index: u32,
909+
method_local_offset: u32,
861910
) !BlockObject.Ptr {
862911
const block = executable.value.getBlock(block_index);
863912
// The latest activation is where the block was created, so it will always
@@ -896,6 +945,7 @@ fn createBlock(
896945
nonlocal_return_target_activation,
897946
block,
898947
executable,
948+
method_local_offset,
899949
);
900950
const block_object = BlockObject.create(&token, self.actor.id, block_map);
901951
try self.writeObjectSlots(BlockMap, &token, object_descriptor, block_map, block_object);

src/runtime/bytecode.zig

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ pub const lowcode = @import("./bytecode/lowcode.zig");
1414

1515
/// Index of a local slot within an activation record.
1616
pub const LocalIndex = enum(u8) {
17+
Receiver = 0,
1718
_,
1819

19-
pub fn init(value: u8) LocalIndex {
20-
return @enumFromInt(value);
20+
/// Initialize from a raw index value.
21+
pub fn initIndex(index: u8) LocalIndex {
22+
return @enumFromInt(index + 1);
2123
}
2224

2325
pub fn get(self: LocalIndex) u8 {

0 commit comments

Comments
 (0)