diff --git a/charts/karpenter-crd/templates/karpenter.azure.com_aksnodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.azure.com_aksnodeclasses.yaml index 18a8a9965..b6bd0872b 100644 --- a/charts/karpenter-crd/templates/karpenter.azure.com_aksnodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.azure.com_aksnodeclasses.yaml @@ -967,6 +967,16 @@ spec: maximum: 2048 minimum: 30 type: integer + osDiskType: + default: EphemeralWithFallbackToManaged + description: |- + OSDiskType is the type of disk to use for the OS. + If EphemeralWithFallbackToManaged, but the VM type does not support ephemeral disks + of at least OSDiskSizeGB size, we will fall back to Managed. + enum: + - EphemeralWithFallbackToManaged + - Managed + type: string security: description: Collection of security related karpenter fields properties: diff --git a/pkg/apis/crds/karpenter.azure.com_aksnodeclasses.yaml b/pkg/apis/crds/karpenter.azure.com_aksnodeclasses.yaml index 18a8a9965..b6bd0872b 100644 --- a/pkg/apis/crds/karpenter.azure.com_aksnodeclasses.yaml +++ b/pkg/apis/crds/karpenter.azure.com_aksnodeclasses.yaml @@ -967,6 +967,16 @@ spec: maximum: 2048 minimum: 30 type: integer + osDiskType: + default: EphemeralWithFallbackToManaged + description: |- + OSDiskType is the type of disk to use for the OS. + If EphemeralWithFallbackToManaged, but the VM type does not support ephemeral disks + of at least OSDiskSizeGB size, we will fall back to Managed. + enum: + - EphemeralWithFallbackToManaged + - Managed + type: string security: description: Collection of security related karpenter fields properties: diff --git a/pkg/apis/v1beta1/aksnodeclass.go b/pkg/apis/v1beta1/aksnodeclass.go index 65aa32f8a..20ac4c0b5 100644 --- a/pkg/apis/v1beta1/aksnodeclass.go +++ b/pkg/apis/v1beta1/aksnodeclass.go @@ -43,6 +43,12 @@ type AKSNodeClassSpec struct { // +kubebuilder:validation:Pattern=`(?i)^\/subscriptions\/[^\/]+\/resourceGroups\/[a-zA-Z0-9_\-().]{0,89}[a-zA-Z0-9_\-()]\/providers\/Microsoft\.Network\/virtualNetworks\/[^\/]+\/subnets\/[^\/]+$` // +optional VNETSubnetID *string `json:"vnetSubnetID,omitempty"` + // OSDiskType is the type of disk to use for the OS. + // If EphemeralWithFallbackToManaged, but the VM type does not support ephemeral disks + // of at least OSDiskSizeGB size, we will fall back to Managed. + // +kubebuilder:default="EphemeralWithFallbackToManaged" + // +kubebuilder:validation:Enum:={"EphemeralWithFallbackToManaged","Managed"} + OSDiskType *string `json:"osDiskType,omitempty"` // +kubebuilder:default=128 // +kubebuilder:validation:Minimum=30 // +kubebuilder:validation:Maximum=2048 diff --git a/pkg/apis/v1beta1/crd_validation_cel_test.go b/pkg/apis/v1beta1/crd_validation_cel_test.go index 95f348417..1a9023340 100644 --- a/pkg/apis/v1beta1/crd_validation_cel_test.go +++ b/pkg/apis/v1beta1/crd_validation_cel_test.go @@ -140,6 +140,51 @@ var _ = Describe("CEL/Validation", func() { }) }) + Context("OSDiskType", func() { + It("should accept EphemeralWithFallbackToManaged OSDiskType", func() { + ephemeralOSDiskType := "EphemeralWithFallbackToManaged" + nodeClass := &v1beta1.AKSNodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())}, + Spec: v1beta1.AKSNodeClassSpec{ + OSDiskType: &ephemeralOSDiskType, + }, + } + Expect(env.Client.Create(ctx, nodeClass)).To(Succeed()) + }) + + It("should accept Managed OSDiskType", func() { + managedOSDiskType := "Managed" + nodeClass := &v1beta1.AKSNodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())}, + Spec: v1beta1.AKSNodeClassSpec{ + OSDiskType: &managedOSDiskType, + }, + } + Expect(env.Client.Create(ctx, nodeClass)).To(Succeed()) + }) + + It("should accept omitted OSDiskType", func() { + nodeClass := &v1beta1.AKSNodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())}, + Spec: v1beta1.AKSNodeClassSpec{ + // OSDiskType is nil - should be accepted + }, + } + Expect(env.Client.Create(ctx, nodeClass)).To(Succeed()) + }) + + It("should reject invalid OSDiskType", func() { + invalidOSDiskType := "asdf" + nodeClass := &v1beta1.AKSNodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())}, + Spec: v1beta1.AKSNodeClassSpec{ + OSDiskType: &invalidOSDiskType, + }, + } + Expect(env.Client.Create(ctx, nodeClass)).ToNot(Succeed()) + }) + }) + Context("FIPSMode", func() { It("should reject invalid FIPSMode", func() { invalidFIPSMode := v1beta1.FIPSMode("123") diff --git a/pkg/apis/v1beta1/zz_generated.deepcopy.go b/pkg/apis/v1beta1/zz_generated.deepcopy.go index e0a673ed1..4e0bcadda 100644 --- a/pkg/apis/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/v1beta1/zz_generated.deepcopy.go @@ -93,6 +93,11 @@ func (in *AKSNodeClassSpec) DeepCopyInto(out *AKSNodeClassSpec) { *out = new(string) **out = **in } + if in.OSDiskType != nil { + in, out := &in.OSDiskType, &out.OSDiskType + *out = new(string) + **out = **in + } if in.OSDiskSizeGB != nil { in, out := &in.OSDiskSizeGB, &out.OSDiskSizeGB *out = new(int32) diff --git a/pkg/providers/imagefamily/resolver.go b/pkg/providers/imagefamily/resolver.go index a26afda0f..ae9cfc268 100644 --- a/pkg/providers/imagefamily/resolver.go +++ b/pkg/providers/imagefamily/resolver.go @@ -203,12 +203,11 @@ func (r *defaultResolver) getStorageProfile(ctx context.Context, instanceType *c return "", nil, err } - _, placement = instancetype.FindMaxEphemeralSizeGBAndPlacement(sku) - if instancetype.UseEphemeralDisk(sku, nodeClass) { + _, placement = instancetype.FindMaxEphemeralSizeGBAndPlacement(sku) return consts.StorageProfileEphemeral, placement, nil } - return consts.StorageProfileManagedDisks, placement, nil + return consts.StorageProfileManagedDisks, nil, nil } func mapToImageDistro(imageID string, fipsMode *v1beta1.FIPSMode, imageFamily ImageFamily, useSIG bool) (string, error) { diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 6fca93aad..29f6ee0e2 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -29,6 +29,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" @@ -672,6 +673,98 @@ var _ = Describe("VMInstanceProvider", func() { Expect(lo.FromPtr(nic.Properties.NetworkSecurityGroup.ID)).To(Equal(expectedNSGID)) }) + It("should create VM with managed OS disk type", func() { + nodeClass.Spec.OSDiskType = lo.ToPtr("Managed") + + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + + pod := coretest.UnschedulablePod(coretest.PodOptions{}) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + + Expect(vm.Properties.StorageProfile.OSDisk.ManagedDisk).ToNot(BeNil()) + }) + + It("should fall back to managed disk if instance type doesn't have enough ephemeral disk space for the OS disk", func() { + // Standard_D2s_v3 has 53GB of CacheDisk space and 16GB temp disk, so with default 128GB OS disk it falls back to managed + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"Standard_D2s_v3"}, + }, + }) + + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + + pod := coretest.UnschedulablePod(coretest.PodOptions{}) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + + // Should use managed disk since ephemeral disk is too small + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).To(BeNil()) + Expect(vm.Properties.StorageProfile.OSDisk.ManagedDisk).ToNot(BeNil()) + }) + + It("should create VM with ephemeral disk when instance type has enough space", func() { + // Standard_D64s_v3 has 1600GB of CacheDisk space, plenty for the default 128GB OS disk + nodeClass.Spec.OSDiskType = lo.ToPtr("EphemeralWithFallbackToManaged") + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"Standard_D64s_v3"}, + }, + }) + + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + + pod := coretest.UnschedulablePod(coretest.PodOptions{}) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + + // Should use ephemeral disk + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).ToNot(BeNil()) + Expect(lo.FromPtr(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings.Option)).To(Equal(armcompute.DiffDiskOptionsLocal)) + Expect(vm.Properties.StorageProfile.OSDisk.ManagedDisk).To(BeNil()) + }) + + It("should choose ephemeral disk by default when not specified and instance type supports it", func() { + // Don't set OSDiskType - it should default to ephemeral when available + // Standard_D64s_v3 has 1600GB of CacheDisk space + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"Standard_D64s_v3"}, + }, + }) + + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + + pod := coretest.UnschedulablePod(coretest.PodOptions{}) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + + // Default behavior should prefer ephemeral disk when instance type supports it + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).ToNot(BeNil()) + Expect(lo.FromPtr(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings.Option)).To(Equal(armcompute.DiffDiskOptionsLocal)) + Expect(vm.Properties.StorageProfile.OSDisk.ManagedDisk).To(BeNil()) + }) + Context("Update", func() { It("should update only VM when no tags are included", func() { // Ensure that the VM already exists in the fake environment diff --git a/pkg/providers/instance/vminstance.go b/pkg/providers/instance/vminstance.go index 173cd83ba..bb5da18ed 100644 --- a/pkg/providers/instance/vminstance.go +++ b/pkg/providers/instance/vminstance.go @@ -592,6 +592,8 @@ func setVMPropertiesOSDiskType(vmProperties *armcompute.VirtualMachineProperties Placement: lo.ToPtr(placement), } vmProperties.StorageProfile.OSDisk.Caching = lo.ToPtr(armcompute.CachingTypesReadOnly) + } else { + vmProperties.StorageProfile.OSDisk.ManagedDisk = &armcompute.ManagedDiskParameters{} } } diff --git a/pkg/providers/instancetype/instancetypes.go b/pkg/providers/instancetype/instancetypes.go index b6d84836f..6ac41575d 100644 --- a/pkg/providers/instancetype/instancetypes.go +++ b/pkg/providers/instancetype/instancetypes.go @@ -452,6 +452,9 @@ func supportsNVMeEphemeralOSDisk(sku *skewer.SKU) bool { } func UseEphemeralDisk(sku *skewer.SKU, nodeClass *v1beta1.AKSNodeClass) bool { + if nodeClass.Spec.OSDiskType != nil && *nodeClass.Spec.OSDiskType == "Managed" { + return false + } sizeGB, _ := FindMaxEphemeralSizeGBAndPlacement(sku) return int64(*nodeClass.Spec.OSDiskSizeGB) <= sizeGB // use ephemeral disk if it is large enough } diff --git a/test/suites/nodeclaim/eph_osdisk_test.go b/test/suites/nodeclaim/eph_osdisk_test.go index 9eb5ce7fd..d2dc30688 100644 --- a/test/suites/nodeclaim/eph_osdisk_test.go +++ b/test/suites/nodeclaim/eph_osdisk_test.go @@ -51,6 +51,7 @@ var _ = Describe("Ephemeral OS Disk", func() { Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings.Placement).ToNot(BeNil()) Expect(string(lo.FromPtr(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings.Option))).To(Equal("Local")) }) + It("should provision VM with SKU that does not support ephemeral OS disk", func() { test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ @@ -66,6 +67,7 @@ var _ = Describe("Ephemeral OS Disk", func() { Expect(vm.Properties.StorageProfile.OSDisk).ToNot(BeNil()) Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).To(BeNil()) }) + It("should provision VM with SKU that does not support ephemeral OS disk, even if OS disk fits on cache disk", func() { test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ @@ -91,4 +93,95 @@ var _ = Describe("Ephemeral OS Disk", func() { Expect(vm.Properties.StorageProfile.OSDisk).ToNot(BeNil()) Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).To(BeNil()) }) + + It("should use managed disk when OSDiskType is explicitly set to Managed", func() { + // Select a SKU that supports ephemeral OS disk and Premium storage to prove Managed overrides it + test.ReplaceRequirements(nodePool, + karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1beta1.LabelSKUStorageEphemeralOSMaxSize, + Operator: corev1.NodeSelectorOpGt, + Values: []string{"50"}, + }}, + karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1beta1.LabelSKUStoragePremiumCapable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }}, + ) + + nodeClass.Spec.OSDiskType = lo.ToPtr("Managed") + nodeClass.Spec.OSDiskSizeGB = lo.ToPtr[int32](50) + + pod := test.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + vm := env.GetVM(pod.Spec.NodeName) + Expect(vm.Properties.StorageProfile.OSDisk).ToNot(BeNil()) + // Should use managed disk even though ephemeral is supported + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).To(BeNil()) + Expect(vm.Properties.StorageProfile.OSDisk.ManagedDisk).ToNot(BeNil()) + }) + + It("should fall back to managed disk when EphemeralWithFallbackToManaged is set but ephemeral disk is too small", func() { + // Select a SKU with small ephemeral disk capacity (Standard_D2s_v3 has 53GB cache disk) + // The "s" in the name indicates Premium storage support + test.ReplaceRequirements(nodePool, + karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"Standard_D2s_v3"}, + }}, + karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1beta1.LabelSKUStoragePremiumCapable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }}, + ) + + nodeClass.Spec.OSDiskType = lo.ToPtr("EphemeralWithFallbackToManaged") + nodeClass.Spec.OSDiskSizeGB = lo.ToPtr[int32](128) // Larger than 53GB cache disk + + pod := test.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + vm := env.GetVM(pod.Spec.NodeName) + Expect(vm.Properties.StorageProfile.OSDisk).ToNot(BeNil()) + // Should fall back to managed disk since ephemeral disk is too small + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).To(BeNil()) + Expect(vm.Properties.StorageProfile.OSDisk.ManagedDisk).ToNot(BeNil()) + }) + + It("should use an ephemeral disk when EphemeralWithFallbackToManaged is set and ephemeral disk is large enough", func() { + test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1beta1.LabelSKUStorageEphemeralOSMaxSize, + Operator: corev1.NodeSelectorOpGt, + // NOTE: this is the size of our nodeclass OSDiskSizeGB. + // If the size of the ephemeral disk requested is lower than AKSNodeClass OSDiskGB + // we fallback to managed disks, honoring OSDiskSizeGB + Values: []string{"50"}, + }}) + + pod := test.Pod() + nodeClass.Spec.OSDiskType = lo.ToPtr("EphemeralWithFallbackToManaged") + nodeClass.Spec.OSDiskSizeGB = lo.ToPtr[int32](50) + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + vm := env.GetVM(pod.Spec.NodeName) + Expect(vm.Properties.StorageProfile.OSDisk).ToNot(BeNil()) + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings).ToNot(BeNil()) + // We should be specifying os disk placement now + Expect(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings.Placement).ToNot(BeNil()) + Expect(string(lo.FromPtr(vm.Properties.StorageProfile.OSDisk.DiffDiskSettings.Option))).To(Equal("Local")) + }) })