Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
cb66913
Add Linux auto-standby controller and e2e coverage
sjmiller609 Apr 4, 2026
14cff19
Merge branch 'main' into codex/auto-standby-e2e
sjmiller609 Apr 4, 2026
d2892cb
Expose auto-standby policy in Stainless config
sjmiller609 Apr 4, 2026
d301651
Address auto-standby review feedback
sjmiller609 Apr 5, 2026
875dd25
Default-skip compression standby integration test
sjmiller609 Apr 5, 2026
4043a13
Clarify auto-standby update metadata handling
sjmiller609 Apr 6, 2026
5c1df95
Wire auto-standby controller into API app
sjmiller609 Apr 6, 2026
789a08c
Make auto-standby event-driven and observable
sjmiller609 Apr 6, 2026
16e6c20
Add periodic auto-standby snapshot sync
sjmiller609 Apr 6, 2026
767b8a2
Fix auto-standby review follow-ups
sjmiller609 Apr 6, 2026
73758cd
Format auto-standby files for CI
sjmiller609 Apr 6, 2026
e9bf333
Add scope for auto-standby status route
sjmiller609 Apr 6, 2026
53f2017
Consolidate instance lifecycle subscriptions
sjmiller609 Apr 6, 2026
134628d
Add config for lifecycle event buffer size
sjmiller609 Apr 6, 2026
165502c
Merge consolidate instance lifecycle bus
sjmiller609 Apr 6, 2026
91fa483
Merge remote-tracking branch 'origin/main' into codex/auto-standby-e2e
sjmiller609 Apr 6, 2026
c81e780
Merge remote-tracking branch 'origin/main' into codex/auto-standby-e2e
sjmiller609 Apr 7, 2026
9e480be
Fix Linux conntrack event test encoding
sjmiller609 Apr 7, 2026
ed02ddb
Clear stale auto-standby runtime on ineligible updates
sjmiller609 Apr 7, 2026
6f3c229
Preserve auto-standby runtime on lifecycle events
sjmiller609 Apr 7, 2026
76caba7
Clone snapshot policy in metadata copies
sjmiller609 Apr 7, 2026
896ee1a
Harden auto-standby startup and reconnects
sjmiller609 Apr 7, 2026
f9800fe
Add auto-standby timer and lifecycle tests
sjmiller609 Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/api/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package api

import (
"github.com/kernel/hypeman/cmd/api/config"
"github.com/kernel/hypeman/lib/autostandby"
"github.com/kernel/hypeman/lib/builds"
"github.com/kernel/hypeman/lib/devices"
"github.com/kernel/hypeman/lib/guestmemory"
Expand All @@ -27,6 +28,7 @@ type ApiService struct {
BuildManager builds.Manager
ResourceManager *resources.Manager
GuestMemoryController guestmemory.Controller
AutoStandbyController *autostandby.Controller
VMMetricsManager *vm_metrics.Manager
}

Expand All @@ -44,6 +46,7 @@ func New(
buildManager builds.Manager,
resourceManager *resources.Manager,
guestMemoryController guestmemory.Controller,
autoStandbyController *autostandby.Controller,
vmMetricsManager *vm_metrics.Manager,
) *ApiService {
return &ApiService{
Expand All @@ -57,6 +60,7 @@ func New(
BuildManager: buildManager,
ResourceManager: resourceManager,
GuestMemoryController: guestMemoryController,
AutoStandbyController: autoStandbyController,
VMMetricsManager: vmMetricsManager,
}
}
62 changes: 62 additions & 0 deletions cmd/api/api/auto_standby.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package api

import (
"fmt"

"github.com/kernel/hypeman/lib/autostandby"
"github.com/kernel/hypeman/lib/oapi"
"github.com/samber/lo"
)

func toDomainAutoStandbyPolicy(policy *oapi.AutoStandbyPolicy) (*autostandby.Policy, error) {
if policy == nil {
return nil, nil
}

out := &autostandby.Policy{}
if policy.Enabled != nil {
out.Enabled = *policy.Enabled
}
if policy.IdleTimeout != nil {
out.IdleTimeout = *policy.IdleTimeout
}
if policy.IgnoreSourceCidrs != nil {
out.IgnoreSourceCIDRs = append([]string(nil), (*policy.IgnoreSourceCidrs)...)
}
if policy.IgnoreDestinationPorts != nil {
out.IgnoreDestinationPorts = make([]uint16, 0, len(*policy.IgnoreDestinationPorts))
for _, port := range *policy.IgnoreDestinationPorts {
if port < 1 || port > 65535 {
return nil, fmt.Errorf("auto_standby.ignore_destination_ports must be between 1 and 65535")
}
out.IgnoreDestinationPorts = append(out.IgnoreDestinationPorts, uint16(port))
}
}

return out, nil
}

func toOAPIAutoStandbyPolicy(policy *autostandby.Policy) *oapi.AutoStandbyPolicy {
if policy == nil {
return nil
}

out := &oapi.AutoStandbyPolicy{
Enabled: lo.ToPtr(policy.Enabled),
}
if policy.IdleTimeout != "" {
out.IdleTimeout = lo.ToPtr(policy.IdleTimeout)
}
if len(policy.IgnoreSourceCIDRs) > 0 {
out.IgnoreSourceCidrs = lo.ToPtr(append([]string(nil), policy.IgnoreSourceCIDRs...))
}
if len(policy.IgnoreDestinationPorts) > 0 {
ports := make([]int, 0, len(policy.IgnoreDestinationPorts))
for _, port := range policy.IgnoreDestinationPorts {
ports = append(ports, int(port))
}
out.IgnoreDestinationPorts = &ports
}

return out
}
79 changes: 79 additions & 0 deletions cmd/api/api/auto_standby_status.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package api

import (
"context"

"github.com/kernel/hypeman/lib/autostandby"
"github.com/kernel/hypeman/lib/instances"
"github.com/kernel/hypeman/lib/logger"
"github.com/kernel/hypeman/lib/oapi"
"github.com/samber/lo"
)

func (s *ApiService) GetAutoStandbyStatus(ctx context.Context, request oapi.GetAutoStandbyStatusRequestObject) (oapi.GetAutoStandbyStatusResponseObject, error) {
log := logger.FromContext(ctx)

inst, err := s.InstanceManager.GetInstance(ctx, request.Id)
if err != nil {
if err == instances.ErrNotFound || err == instances.ErrAmbiguousName {
return oapi.GetAutoStandbyStatus404JSONResponse{
Code: "not_found",
Message: "instance not found",
}, nil
}
log.ErrorContext(ctx, "failed to resolve instance for auto-standby status", "instance_id", request.Id, "error", err)
return oapi.GetAutoStandbyStatus500JSONResponse{
Code: "internal_error",
Message: "failed to load instance",
}, nil
}

snapshot := autostandby.StatusSnapshot{
Supported: false,
Configured: inst.AutoStandby != nil,
Enabled: inst.AutoStandby != nil && inst.AutoStandby.Enabled,
TrackingMode: "conntrack_events_v4_tcp",
Status: autostandby.StatusUnsupported,
Reason: autostandby.ReasonUnsupportedPlatform,
}
if s.AutoStandbyController != nil {
snapshot = s.AutoStandbyController.Describe(instanceToAutoStandby(*inst))
}

return oapi.GetAutoStandbyStatus200JSONResponse(toOAPIAutoStandbyStatus(snapshot)), nil
}

func instanceToAutoStandby(inst instances.Instance) autostandby.Instance {
return autostandby.Instance{
ID: inst.Id,
Name: inst.Name,
State: string(inst.State),
NetworkEnabled: inst.NetworkEnabled,
IP: inst.IP,
HasVGPU: inst.GPUProfile != "" || inst.GPUMdevUUID != "",
AutoStandby: inst.AutoStandby,
}
}

func toOAPIAutoStandbyStatus(status autostandby.StatusSnapshot) oapi.AutoStandbyStatus {
out := oapi.AutoStandbyStatus{
ActiveInboundConnections: status.ActiveInboundCount,
Configured: status.Configured,
Eligible: status.Eligible,
Enabled: status.Enabled,
Reason: oapi.AutoStandbyStatusReason(status.Reason),
Status: oapi.AutoStandbyStatusStatus(status.Status),
Supported: status.Supported,
TrackingMode: status.TrackingMode,
}
if status.IdleTimeout != "" {
out.IdleTimeout = lo.ToPtr(status.IdleTimeout)
}
out.IdleSince = status.IdleSince
out.LastInboundActivityAt = status.LastInboundActivityAt
out.NextStandbyAt = status.NextStandbyAt
if status.CountdownRemaining != nil {
out.CountdownRemaining = lo.ToPtr(status.CountdownRemaining.String())
}
return out
}
170 changes: 170 additions & 0 deletions cmd/api/api/auto_standby_status_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package api

import (
"context"
"net/netip"
"testing"
"time"

"github.com/kernel/hypeman/lib/autostandby"
"github.com/kernel/hypeman/lib/instances"
"github.com/kernel/hypeman/lib/oapi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type captureStatusManager struct {
instances.Manager
instance *instances.Instance
err error
}

func (m *captureStatusManager) GetInstance(context.Context, string) (*instances.Instance, error) {
if m.err != nil {
return nil, m.err
}
return m.instance, nil
}

type statusStore struct {
instances []autostandby.Instance
runtime map[string]*autostandby.Runtime
events chan autostandby.InstanceEvent
}

func (s *statusStore) ListInstances(context.Context) ([]autostandby.Instance, error) {
return append([]autostandby.Instance(nil), s.instances...), nil
}

func (s *statusStore) StandbyInstance(context.Context, string) error { return nil }

func (s *statusStore) SetRuntime(_ context.Context, id string, runtime *autostandby.Runtime) error {
if s.runtime == nil {
s.runtime = make(map[string]*autostandby.Runtime)
}
s.runtime[id] = runtime
return nil
}

func (s *statusStore) SubscribeInstanceEvents() (<-chan autostandby.InstanceEvent, func(), error) {
if s.events == nil {
s.events = make(chan autostandby.InstanceEvent)
}
return s.events, func() {}, nil
}

type statusConnectionSource struct {
connections []autostandby.Connection
}

func (s *statusConnectionSource) ListConnections(context.Context) ([]autostandby.Connection, error) {
return append([]autostandby.Connection(nil), s.connections...), nil
}

func (s *statusConnectionSource) OpenStream(context.Context) (autostandby.ConnectionStream, error) {
return &statusConnectionStream{
events: make(chan autostandby.ConnectionEvent),
errs: make(chan error),
}, nil
}

type statusConnectionStream struct {
events chan autostandby.ConnectionEvent
errs chan error
}

func (s *statusConnectionStream) Events() <-chan autostandby.ConnectionEvent { return s.events }

func (s *statusConnectionStream) Errors() <-chan error { return s.errs }

func (s *statusConnectionStream) Close() error { return nil }

func TestGetAutoStandbyStatusUnsupportedWithoutController(t *testing.T) {
t.Parallel()

base := newTestService(t)
base.InstanceManager = &captureStatusManager{
Manager: base.InstanceManager,
instance: &instances.Instance{
StoredMetadata: instances.StoredMetadata{
Id: "inst-1",
Name: "inst-1",
NetworkEnabled: true,
IP: "192.168.100.10",
AutoStandby: &autostandby.Policy{Enabled: true, IdleTimeout: "5m"},
},
State: instances.StateRunning,
},
}

resp, err := base.GetAutoStandbyStatus(ctx(), oapi.GetAutoStandbyStatusRequestObject{Id: "inst-1"})
require.NoError(t, err)

statusResp, ok := resp.(oapi.GetAutoStandbyStatus200JSONResponse)
require.True(t, ok)
assert.False(t, statusResp.Supported)
assert.Equal(t, oapi.AutoStandbyStatusStatusUnsupported, statusResp.Status)
assert.Equal(t, oapi.AutoStandbyStatusReasonUnsupportedPlatform, statusResp.Reason)
}

func TestGetAutoStandbyStatusActive(t *testing.T) {
t.Parallel()

inst := &instances.Instance{
StoredMetadata: instances.StoredMetadata{
Id: "inst-2",
Name: "inst-2",
NetworkEnabled: true,
IP: "192.168.100.20",
AutoStandby: &autostandby.Policy{Enabled: true, IdleTimeout: "5m"},
},
State: instances.StateRunning,
}

now := time.Date(2026, 4, 6, 12, 0, 0, 0, time.UTC)
store := &statusStore{
instances: []autostandby.Instance{{
ID: "inst-2",
Name: "inst-2",
State: autostandby.StateRunning,
NetworkEnabled: true,
IP: "192.168.100.20",
AutoStandby: &autostandby.Policy{Enabled: true, IdleTimeout: "5m"},
}},
}
source := &statusConnectionSource{connections: []autostandby.Connection{{
OriginalSourceIP: mustStatusAddr("1.2.3.4"),
OriginalSourcePort: 51234,
OriginalDestinationIP: mustStatusAddr("192.168.100.20"),
OriginalDestinationPort: 8080,
TCPState: autostandby.TCPStateEstablished,
}}}
controller := autostandby.NewController(store, source, autostandby.ControllerOptions{
Now: func() time.Time { return now },
})
require.NoError(t, controller.Run(withCanceledContext(t)))

base := newTestService(t)
base.InstanceManager = &captureStatusManager{Manager: base.InstanceManager, instance: inst}
base.AutoStandbyController = controller

resp, err := base.GetAutoStandbyStatus(ctx(), oapi.GetAutoStandbyStatusRequestObject{Id: "inst-2"})
require.NoError(t, err)

statusResp, ok := resp.(oapi.GetAutoStandbyStatus200JSONResponse)
require.True(t, ok)
assert.True(t, statusResp.Supported)
assert.Equal(t, oapi.AutoStandbyStatusStatusActive, statusResp.Status)
assert.Equal(t, 1, statusResp.ActiveInboundConnections)
}

func withCanceledContext(t *testing.T) context.Context {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
cancel()
return ctx
}

func mustStatusAddr(raw string) netip.Addr {
return netip.MustParseAddr(raw)
}
19 changes: 18 additions & 1 deletion cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,13 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
if request.Body.Cmd != nil {
cmd = *request.Body.Cmd
}
autoStandby, err := toDomainAutoStandbyPolicy(request.Body.AutoStandby)
if err != nil {
return oapi.CreateInstance400JSONResponse{
Code: "invalid_auto_standby",
Message: err.Error(),
}, nil
}

domainReq := instances.CreateInstanceRequest{
Name: request.Body.Name,
Expand All @@ -302,6 +309,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
Cmd: cmd,
SkipKernelHeaders: request.Body.SkipKernelHeaders != nil && *request.Body.SkipKernelHeaders,
SkipGuestAgent: request.Body.SkipGuestAgent != nil && *request.Body.SkipGuestAgent,
AutoStandby: autoStandby,
}
if request.Body.SnapshotPolicy != nil {
snapshotPolicy, err := toInstanceSnapshotPolicy(*request.Body.SnapshotPolicy)
Expand Down Expand Up @@ -924,9 +932,17 @@ func (s *ApiService) UpdateInstance(ctx context.Context, request oapi.UpdateInst
if request.Body.Env != nil {
env = *request.Body.Env
}
autoStandby, err := toDomainAutoStandbyPolicy(request.Body.AutoStandby)
if err != nil {
return oapi.UpdateInstance400JSONResponse{
Code: "invalid_auto_standby",
Message: err.Error(),
}, nil
}

result, err := s.InstanceManager.UpdateInstance(ctx, inst.Id, instances.UpdateInstanceRequest{
Env: env,
Env: env,
AutoStandby: autoStandby,
})
if err != nil {
switch {
Expand Down Expand Up @@ -1057,6 +1073,7 @@ func instanceToOAPI(inst instances.Instance) oapi.Instance {
oapiPolicy := toOAPISnapshotPolicy(*inst.SnapshotPolicy)
oapiInst.SnapshotPolicy = &oapiPolicy
}
oapiInst.AutoStandby = toOAPIAutoStandbyPolicy(inst.AutoStandby)

// Convert volume attachments
if len(inst.Volumes) > 0 {
Expand Down
Loading
Loading