Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions cmd/lm/lm.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,19 @@ import (
)

var prepareCommand = cli.Command{
Name: "prepare",
Usage: "Prepares the sandbox for migration",
ArgsUsage: "[flags] <pipe> <config output file> <resources output file>",
Name: "prepare",
Usage: "Prepares the sandbox for migration",
ArgsUsage: "[flags] <pipe> <config output file> <resources output file>",
Flags: []cli.Flag{
cli.BoolFlag{
Name: "checksum-verification",
Usage: "Enable memory checksum verification during live migration",
},
cli.BoolFlag{
Name: "perf-tracing",
Usage: "Enable HCS performance tracing for migration",
},
},
SkipArgReorder: true,
Before: appargs.Validate(appargs.String, appargs.String, appargs.String),
Action: func(clictx *cli.Context) error {
Expand All @@ -44,7 +54,12 @@ var prepareCommand = cli.Command{

ctx := context.Background()

resp, err := svc.PrepareSandbox(ctx, &lmproto.PrepareSandboxRequest{})
resp, err := svc.PrepareSandbox(ctx, &lmproto.PrepareSandboxRequest{
InitializeOptions: &lmproto.InitializeOptions{
ChecksumVerification: clictx.Bool("checksum-verification"),
PerfTracingEnabled: clictx.Bool("perf-tracing"),
},
})
if err != nil {
return err
}
Expand Down Expand Up @@ -260,6 +275,14 @@ var specCommand = cli.Command{
cli.StringSliceFlag{
Name: "anno",
},
cli.BoolFlag{
Name: "checksum-verification",
Usage: "Enable memory checksum verification during live migration",
},
cli.BoolFlag{
Name: "perf-tracing",
Usage: "Enable HCS performance tracing for migration",
},
},
SkipArgReorder: true,
Before: appargs.Validate(appargs.String, appargs.String),
Expand Down Expand Up @@ -295,10 +318,12 @@ var specCommand = cli.Command{
}

spec := &lmproto.SandboxLMSpec{
Config: &config,
Resources: &resources,
Netns: clictx.String("netns"),
Annotations: make(map[string]string),
Config: &config,
Resources: &resources,
Netns: clictx.String("netns"),
Annotations: make(map[string]string),
ChecksumVerification: clictx.Bool("checksum-verification"),
PerfTracingEnabled: clictx.Bool("perf-tracing"),
}
for _, anno := range clictx.StringSlice("anno") {
fields := strings.SplitN(anno, "=", 2)
Expand Down
125 changes: 114 additions & 11 deletions internal/computecore/computecore.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package computecore

import (
"fmt"
"syscall"
"unsafe"

Expand Down Expand Up @@ -89,22 +90,123 @@ const (
HcsOperationTypeCrash
)

func (op HCS_OPERATION_TYPE) String() string {
switch op {
case HcsOperationTypeNone:
return "None"
case HcsOperationTypeEnumerate:
return "Enumerate"
case HcsOperationTypeCreate:
return "Create"
case HcsOperationTypeStart:
return "Start"
case HcsOperationTypeShutdown:
return "Shutdown"
case HcsOperationTypePause:
return "Pause"
case HcsOperationTypeResume:
return "Resume"
case HcsOperationTypeSave:
return "Save"
case HcsOperationTypeTerminate:
return "Terminate"
case HcsOperationTypeModify:
return "Modify"
case HcsOperationTypeGetProperties:
return "GetProperties"
case HcsOperationTypeCreateProcess:
return "CreateProcess"
case HcsOperationTypeSignalProcess:
return "SignalProcess"
case HcsOperationTypeGetProcessInfo:
return "GetProcessInfo"
case HcsOperationTypeGetProcessProperties:
return "GetProcessProperties"
case HcsOperationTypeModifyProcess:
return "ModifyProcess"
case HcsOperationTypeCrash:
return "Crash"
default:
return fmt.Sprintf("Unknown: %d", op)
}
}

type HCS_EVENT_TYPE int

const (
HcsEventTypeInvalid HCS_EVENT_TYPE = iota
HcsEventTypeSystemExited
HcsEventTypeSystemCrashInitiated
HcsEventTypeSystemCrashReport
HcsEventTypeSystemRdpEnhancedModeStateChanged
HcsEventTypeSystemSiloJobCreated
HcsEventTypeSystemGuestConnectionClosed

HcsEventTypeProcessExited HCS_EVENT_TYPE = 0x00010000
// HcsEventTypeInvalid = 0x00000000
HcsEventTypeInvalid HCS_EVENT_TYPE = 0x00000000

// Events for HCS_SYSTEM handles

HcsEventTypeSystemExited HCS_EVENT_TYPE = 0x00000001
HcsEventTypeSystemCrashInitiated HCS_EVENT_TYPE = 0x00000002
HcsEventTypeSystemCrashReport HCS_EVENT_TYPE = 0x00000003
HcsEventTypeSystemRdpEnhancedModeStateChanged HCS_EVENT_TYPE = 0x00000004
HcsEventTypeSystemSiloJobCreated HCS_EVENT_TYPE = 0x00000005
HcsEventTypeSystemGuestConnectionClosed HCS_EVENT_TYPE = 0x00000006

// Events for HCS_PROCESS handles

HcsEventTypeProcessExited HCS_EVENT_TYPE = 0x00010000

// Common Events

HcsEventTypeOperationCallback HCS_EVENT_TYPE = 0x01000000
HcsEventTypeServiceDisconnect HCS_EVENT_TYPE = 0x02000000

// Event groups (enabled by HCS_EVENT_OPTIONS)

HcsEventTypeGroupVmLifecycle HCS_EVENT_TYPE = 0x80000002
HcsEventTypeGroupLiveMigration HCS_EVENT_TYPE = 0x80000003

// Events for HCS_OPERATION

HcsEventTypeGroupOperationInfo HCS_EVENT_TYPE = 0xC0000001
)

func (hn HCS_EVENT_TYPE) String() string {
switch hn {
case HcsEventTypeInvalid:
return "Invalid"

// System events
case HcsEventTypeSystemExited:
return "SystemExited"
case HcsEventTypeSystemCrashInitiated:
return "SystemCrashInitiated"
case HcsEventTypeSystemCrashReport:
return "SystemCrashReport"
case HcsEventTypeSystemRdpEnhancedModeStateChanged:
return "SystemRdpEnhancedModeStateChanged"
case HcsEventTypeSystemSiloJobCreated:
return "SystemSiloJobCreated"
case HcsEventTypeSystemGuestConnectionClosed:
return "SystemGuestConnectionClosed"

// Process events
case HcsEventTypeProcessExited:
return "ProcessExited"

// Common events
case HcsEventTypeOperationCallback:
return "OperationCallback"
case HcsEventTypeServiceDisconnect:
return "ServiceDisconnect"

// Groups
case HcsEventTypeGroupVmLifecycle:
return "GroupVmLifecycle"
case HcsEventTypeGroupLiveMigration:
return "GroupLiveMigration"
case HcsEventTypeGroupOperationInfo:
return "GroupOperationInfo"

default:
return fmt.Sprintf("Unknown: 0x%08X", uint32(hn))
}
}

type Event struct {
Type HCS_EVENT_TYPE
EventData *uint16
Expand All @@ -114,8 +216,9 @@ type Event struct {
type HCS_EVENT_OPTIONS int

const (
HcsEventOptionNone HCS_EVENT_OPTIONS = 0
HcsEventOptionEnableOperationCallbacks HCS_EVENT_OPTIONS = 1
HcsEventOptionNone HCS_EVENT_OPTIONS = 0
HcsEventOptionEnableOperationCallbacks HCS_EVENT_OPTIONS = 1
HcsEventOptionEnableLiveMigrationEvents HCS_EVENT_OPTIONS = 4
)

type HCS_RESOURCE_TYPE int
Expand Down
4 changes: 3 additions & 1 deletion internal/core/linuxvm/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ func NewMigrator(
annos map[string]string,
replacements *core.Replacements,
vmResources core.Resources,
checksumVerification bool,
perfTracingEnabled bool,
) (_ core.Migrator, err error) {
logrus.WithField("config", config).Info("creating lm sandbox with config")
vmConfig := statepkg.VMConfigToInternal(config.Vm.Config)
Expand Down Expand Up @@ -227,7 +229,7 @@ func NewMigrator(
}

vmID := fmt.Sprintf("%s@vm", id)
vm, err := vm.NewVM(ctx, vmID, vmConfig, vm.WithLM(config.Vm.CompatInfo))
vm, err := vm.NewVM(ctx, vmID, vmConfig, vm.WithLM(config.Vm.CompatInfo, checksumVerification, perfTracingEnabled))
if err != nil {
return nil, err
}
Expand Down
145 changes: 145 additions & 0 deletions internal/hcs/callbackV2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package hcs

import (
"encoding/json"
"fmt"
"os"
"sync"
"syscall"
"time"
"unsafe"

"github.com/Microsoft/hcsshim/internal/computecore"
"github.com/Microsoft/hcsshim/internal/interop"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)

var (
notificationWatcherCallbackV2 = syscall.NewCallback(notificationWatcherV2)

callbackLogFileV2 = sync.Mutex{}
)

// notificationWatcherV2 is the v2 callback handler registered via HcsSetComputeSystemCallback.
// It dispatches events to notification channels (for waitBackground) and logs LM events.
// Uses the main callbackMap (shared with v1 process callbacks).
func notificationWatcherV2(eventPtr uintptr, callbackNumber uintptr) uintptr {
e := (*computecore.Event)(unsafe.Pointer(eventPtr))
if e == nil {
return 0
}

// Look up context from the main callbackMap (shared with v1 process callbacks).
callbackMapLock.RLock()
context := callbackMap[callbackNumber]
callbackMapLock.RUnlock()

if context == nil {
return 0
}

eventData := ""
if e.EventData != nil {
eventData = windows.UTF16PtrToString(e.EventData)
}

log := logrus.WithFields(logrus.Fields{
"event-type": e.Type.String(),
"system-id": context.systemID,
})

switch e.Type {
case computecore.HcsEventTypeSystemExited:
log.Debug("HCS v2 notification: SystemExited")
if ch, ok := context.channels[hcsNotificationSystemExited]; ok {
// Try to extract error from event data (HRESULT in result JSON)
var result error
if eventData != "" {
var hcsRes hcsResult
if json.Unmarshal([]byte(eventData), &hcsRes) == nil && hcsRes.Error < 0 {
result = interop.Win32FromHresult(uintptr(hcsRes.Error))
}
}
ch <- result
}

case computecore.HcsEventTypeServiceDisconnect:
log.Debug("HCS v2 notification: ServiceDisconnect")
if ch, ok := context.channels[hcsNotificationServiceDisconnect]; ok {
ch <- nil
}

case computecore.HcsEventTypeOperationCallback:
// Map operation type to v1 notification channel
var notif hcsNotification
opType := computecore.HcsGetOperationType(e.Operation)
switch opType {
case computecore.HcsOperationTypeCreate:
notif = hcsNotificationSystemCreateCompleted
case computecore.HcsOperationTypeStart:
notif = hcsNotificationSystemStartCompleted
case computecore.HcsOperationTypePause:
notif = hcsNotificationSystemPauseCompleted
case computecore.HcsOperationTypeResume:
notif = hcsNotificationSystemResumeCompleted
case computecore.HcsOperationTypeSave:
notif = hcsNotificationSystemSaveCompleted
default:
log.WithField("operation-type", opType.String()).Debug("HCS v2 notification: OperationCallback (unhandled op type)")
break
}
if notif != hcsNotificationInvalid {
// Extract error from operation result
var result error
if _, opErr := e.Operation.Result(); opErr != nil {
result = opErr
}
log.WithField("operation-type", opType.String()).Debug("HCS v2 notification: OperationCallback")
if ch, ok := context.channels[notif]; ok {
ch <- result
}
}

case computecore.HcsEventTypeGroupLiveMigration:
log.WithField("event-data", eventData).Info("HCS v2 notification: LiveMigration event")
logLMEventToFile(context.systemID, e.Type, eventData)

default:
log.WithField("event-data", eventData).Debug("HCS v2 notification: unhandled event type")
}

return 0
}

// logLMEventToFile appends LM events to a diagnostic log file.
func logLMEventToFile(systemID string, eventType computecore.HCS_EVENT_TYPE, eventData string) {
dir := `C:\temp`
_ = os.MkdirAll(dir, 0o755)
filename := fmt.Sprintf(`%s\hcs_callback.json`, dir)

callbackLogFileV2.Lock()
defer callbackLogFileV2.Unlock()

f, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return
}
defer f.Close()

record := struct {
Timestamp string `json:"timestamp"`
SystemID string `json:"systemId"`
EventType string `json:"eventType"`
EventData string `json:"eventData"`
}{
Timestamp: time.Now().Format(time.RFC3339Nano),
SystemID: systemID,
EventType: eventType.String(),
EventData: eventData,
}

payload, _ := json.MarshalIndent(record, "", " ")
_, _ = f.Write(payload)
_, _ = f.WriteString("\n")
}
Loading
Loading