Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions backend/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"syscall"
"time"

"github.com/Azure/ARO-HCP/backend/pkg/controllers/clusterprovisioningcontrollers"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
Expand Down Expand Up @@ -366,6 +367,9 @@ func Run(cmd *cobra.Command, args []string) error {
cosmosMatchingClusterController = controllerutils.NewClusterWatchingController(
"CosmosMatchingClusters", dbClient, subscriptionLister, 60*time.Minute,
mismatchcontrollers.NewCosmosClusterMatchingController(utilsclock.RealClock{}, dbClient, clusterServiceClient))
dnsReservationController = controllerutils.NewClusterWatchingController(
"DNSReservation", dbClient, subscriptionLister, 1*time.Minute,
clusterprovisioningcontrollers.NewDNSReservationController(dbClient))
)

le, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{
Expand All @@ -388,6 +392,7 @@ func Run(cmd *cobra.Command, args []string) error {
go cosmosMatchingNodePoolController.Run(ctx, 20)
go cosmosMatchingExternalAuthController.Run(ctx, 20)
go cosmosMatchingClusterController.Run(ctx, 20)
go dnsReservationController.Run(ctx, 20)
},
OnStoppedLeading: func() {
operationsScanner.LeaderGauge.Set(0)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2025 Microsoft Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package clusterprovisioningcontrollers

import (
"context"
"fmt"
"net/http"
"time"

"github.com/Azure/ARO-HCP/internal/api"
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/lru"

"github.com/Azure/ARO-HCP/backend/pkg/controllers/controllerutils"
"github.com/Azure/ARO-HCP/internal/database"
"github.com/Azure/ARO-HCP/internal/serverutils"
"github.com/Azure/ARO-HCP/internal/utils"
)

type dnsReservationController struct {
cosmosClient database.DBClient
}

// NewDataDumpController periodically lists all clusters and for each out when the cluster was created and its state.
func NewDNSReservationController(cosmosClient database.DBClient) controllerutils.ClusterSyncer {
c := &dnsReservationController{
cosmosClient: cosmosClient,
}

return c
}

func (c *dnsReservationController) SyncOnce(ctx context.Context, key controllerutils.HCPClusterKey) error {
logger := utils.LoggerFromContext(ctx)

customerDesiredCluster, err := c.cosmosClient.HCPClusters(key.SubscriptionID, key.ResourceGroupName).Get(ctx, key.HCPClusterName)
if database.IsResponseError(err, http.StatusNotFound) {
return nil // no work to do
}
if err != nil {
return utils.TrackError(fmt.Errorf("failed to get HCP cluster: %w", err))
}

serviceProviderCluster, err := c.cosmosClient.ServiceProviderClusters(key.SubscriptionID, key.ResourceGroupName, key.HCPClusterName).Get(ctx, "default")
if database.IsResponseError(err, http.StatusNotFound) {
// create it
serviceProviderCluster, err = c.cosmosClient.ServiceProviderClusters(key.SubscriptionID, key.ResourceGroupName, key.HCPClusterName).Create(
ctx,
&api.ServiceProviderCluster{
CosmosMetadata: api.CosmosMetadata{},
ResourceID: azcorearm.ResourceID{},
LoadBalancerResourceID: nil,
KubeAPIServerDNSReservation: nil,
},
nil)
}
if err != nil {
return utils.TrackError(fmt.Errorf("failed to get or create service provider cluster: %w", err))
}

if serviceProviderCluster.KubeAPIServerDNSReservation != nil {
// no work to do
return nil
}

// if we're here, we need to reserve a DNS name. Just create a random one. if it succeeds, the name is free and use it.
// if it fails, just return the error and the auto-retry will trigger us again soon. That handles both the conflict case
// and a general "it's down" case and we get free reporting.
dnsReservation, err := c.cosmosClient.DNSReservations(key.SubscriptionID).Create(
ctx,
&api.DNSReservation{
CosmosMetadata: api.CosmosMetadata{},
ResourceID: nil,
MustBindByTime: metav1.Time{},
OwningCluster: nil,
},
nil)
if err != nil {
return utils.TrackError(fmt.Errorf("failed to reserve DNS name: %w", err)
}
logger.Info("reserved DNS name", "kubeAPIServerDNSName", dnsReservation.ResourceID)

serviceProviderCluster.KubeAPIServerDNSReservation = dnsReservation.ResourceID
_, err = c.cosmosClient.ServiceProviderClusters(key.SubscriptionID, key.ResourceGroupName, key.HCPClusterName).Replace(ctx, serviceProviderCluster, nil)
if err != nil {
return utils.TrackError(fmt.Errorf("failed to update service provider cluster: %w", err))
}

// from here we get choices about granularity. I'd be fine to see this controller go on and create azure stuff.
// I'd also be find to see another controller create the azure stuff.

return nil
}
1 change: 1 addition & 0 deletions internal/api/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ const (
)

var (
DNSReservationResourceType = azcorearm.NewResourceType(ProviderNamespace, "dnsReservations")
OperationStatusResourceType = azcorearm.NewResourceType(ProviderNamespace, OperationStatusResourceTypeName)
ClusterResourceType = azcorearm.NewResourceType(ProviderNamespace, ClusterResourceTypeName)
ServiceProviderClusterResourceType = azcorearm.NewResourceType(ProviderNamespace, ClusterResourceTypeName+"/serviceProviderCluster")
Expand Down
43 changes: 43 additions & 0 deletions internal/api/types_dnsreservation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2025 Microsoft Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package api

import (
azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// DNSReservation is a logical (not real) resource that exists at a subscription level to provide a simple means of reserving a DNS reservation.
// It logically belongs
type DNSReservation struct {
// CosmosMetadata ResourceID is nested under the cluster so that association and cleanup work as expected
// it will be the ServiceProviderCluster type and the name default
CosmosMetadata `json:"cosmosMetadata"`

// this matches the resourcedocument and standard storage schema.
// we already store this field, but its currently done in conversion trickery. Update to directly serialize it.
// all items previously stored will read out and have this filled in.
// we need to be sure that all new records have it too.
ResourceID *azcorearm.ResourceID `json:"resourceId,omitempty"`

// MustBindByTime is the time by which a ServiceProviderClusterStatus must have claimed this DNSReservation.
// If a cleanup thread finds a DNSReservation that is not listed in a ServiceProviderClusterStatus after this time,
// then the DNSReservation will be deleted.
MustBindByTime metav1.Time `json:"mustBindByTime"`

// OwningCluster is the name of the cluster that this reservation is for. This allows for easy cleanup after MustBindByTime
// is expired.
OwningCluster *azcorearm.ResourceID `json:"owningCluster,omitempty"`
}
2 changes: 2 additions & 0 deletions internal/api/types_serviceprovider_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ type ServiceProviderCluster struct {
ResourceID azcorearm.ResourceID `json:"resourceId"`

LoadBalancerResourceID *azcorearm.ResourceID `json:"loadBalancerResourceID,omitempty"`

KubeAPIServerDNSReservation *azcorearm.ResourceID `json:"kubeAPIServerDNSReservation,omitempty"`
}
10 changes: 10 additions & 0 deletions internal/database/crud_hcpcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ type OperationCRUD interface {
ListActiveOperations(options *DBClientListActiveOperationDocsOptions) DBClientIterator[api.Operation]
}

func NewDNSReservationCRUD(containerClient *azcosmos.ContainerClient, subscriptionID string) ResourceCRUD[api.DNSReservation] {
parts := []string{
"/subscriptions",
strings.ToLower(subscriptionID),
}
parentResourceID := api.Must(azcorearm.ParseResourceID(path.Join(parts...)))

return NewCosmosResourceCRUD[api.DNSReservation, GenericDocument[api.DNSReservation]](containerClient, parentResourceID, api.OperationStatusResourceType)
}

type operationCRUD struct {
*nestedCosmosResourceCRUD[api.Operation, Operation]
}
Expand Down
6 changes: 6 additions & 0 deletions internal/database/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ type DBClient interface {
// to end users via ARM. They must also survive the thing they are deleting, so they live under a subscription directly.
Operations(subscriptionID string) OperationCRUD

DNSReservations(subscriptionID string) ResourceCRUD[api.DNSReservation]

Subscriptions() SubscriptionCRUD

ServiceProviderClusters(subscriptionID, resourceGroupName, clusterName string) ServiceProviderClusterCRUD
Expand Down Expand Up @@ -279,6 +281,10 @@ func (d *cosmosDBClient) ServiceProviderClusters(subscriptionID, resourceGroupNa
d.resources, clusterResourceID, api.ServiceProviderClusterResourceType)
}

func (d *cosmosDBClient) DNSReservations(subscriptionID string) ResourceCRUD[api.DNSReservation] {
return NewDNSReservationCRUD(d.resources, subscriptionID)
}

func (d *cosmosDBClient) UntypedCRUD(parentResourceID azcorearm.ResourceID) (UntypedResourceCRUD, error) {
return NewUntypedCRUD(d.resources, parentResourceID), nil
}
Expand Down