Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions .github/actions/e2e/create-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ inputs:
# required: false
# default: "1.27"
client-id:
description:
description:
required: true
tenant-id:
description:
description:
required: true
subscription-id:
description:
description:
required: true
resource_group:
description: "Name of the resource group to create the cluster within"
Expand All @@ -29,6 +29,9 @@ inputs:
location:
description: "The azure location to run the e2e test in"
default: "eastus"
provisionmode:
description: "The karpenter provisioning mode to run the e2e test in"
default: "aksscriptless"
runs:
using: "composite"
steps:
Expand All @@ -55,7 +58,7 @@ runs:
env:
AZURE_CLUSTER_NAME: ${{ inputs.cluster_name }}
AZURE_RESOURCE_GROUP: ${{ inputs.resource_group }}
AZURE_ACR_NAME: ${{ inputs.acr_name }}
AZURE_ACR_NAME: ${{ inputs.acr_name }}
AZURE_LOCATION: ${{ inputs.location }}
run: make az-mkaks-cilium
- name: az login 2
Expand All @@ -78,5 +81,9 @@ runs:
AZURE_LOCATION: ${{ inputs.location }}
AZURE_ACR_NAME: ${{ inputs.acr_name }}
run: |
make az-perm
if [ "${{ inputs.provisionmode }}" = "aksmachineapi" ]; then
make az-perm-aksmachine
else
make az-perm
fi
make az-perm-acr
1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ jobs:
acr_name: ${{ env.ACR_NAME }}
git_ref: ${{ inputs.git_ref }}
location: ${{ inputs.location }}
provisionmode: ${{ inputs.provisionmode }}
- name: build and publish karpenter
shell: bash
run: AZURE_ACR_NAME=${{ env.ACR_NAME }} make az-build
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ e2etests: ## Run the e2e suite against your local cluster
# -count 1: prevents caching
# -timeout: If a test binary runs longer than TEST_TIMEOUT, panic
# -v: verbose output
cd test && AZURE_CLUSTER_NAME=${AZURE_CLUSTER_NAME} AZURE_ACR_NAME=${AZURE_ACR_NAME} AZURE_RESOURCE_GROUP=${AZURE_RESOURCE_GROUP} AZURE_SUBSCRIPTION_ID=${AZURE_SUBSCRIPTION_ID} AZURE_LOCATION=${AZURE_LOCATION} go test \
cd test && AZURE_CLUSTER_NAME=${AZURE_CLUSTER_NAME} AZURE_ACR_NAME=${AZURE_ACR_NAME} AZURE_RESOURCE_GROUP=${AZURE_RESOURCE_GROUP} AZURE_SUBSCRIPTION_ID=${AZURE_SUBSCRIPTION_ID} AZURE_LOCATION=${AZURE_LOCATION} PROVISION_MODE=${PROVISION_MODE} go test \
-p 1 \
-count 1 \
-timeout ${TEST_TIMEOUT} \
Expand Down
15 changes: 13 additions & 2 deletions Makefile-az.mk
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ else
endif

AZURE_ACR_SUFFIX ?= azurecr.io
AZURE_SIG_SUBSCRIPTION_ID ?= $(AZURE_SUBSCRIPTION_ID)
AZURE_SIG_SUBSCRIPTION_ID ?= 10945678-1234-1234-1234-123456789012
AZURE_CLUSTER_NAME ?= $(COMMON_NAME)
AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)

Expand All @@ -22,6 +22,8 @@ KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID
CUSTOM_VNET_NAME ?= $(AZURE_CLUSTER_NAME)-vnet
CUSTOM_SUBNET_NAME ?= nodesubnet

PROVISION_MODE ?= aksscriptless

.DEFAULT_GOAL := help # make without arguments will show help

az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload
Expand Down Expand Up @@ -139,7 +141,7 @@ az-rmrg: ## Destroy test ACR and AKS cluster by deleting the resource group (use
az group delete --name $(AZURE_RESOURCE_GROUP)

az-configure-values: ## Generate cluster-related values for Karpenter Helm chart
hack/deploy/configure-values.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) $(KARPENTER_SERVICE_ACCOUNT_NAME) $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME)
hack/deploy/configure-values.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) $(KARPENTER_SERVICE_ACCOUNT_NAME) $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME) $(PROVISION_MODE)

az-mkvmssflex: ## Create VMSS Flex (optional, only if creating VMs referencing this VMSS)
az vmss create --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --location $(AZURE_LOCATION) \
Expand All @@ -156,6 +158,15 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator"
@echo Consider "make az-configure-values"!

az-perm-aksmachine: ## Create role assignments for AKS machine API operations
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Azure Kubernetes Service Contributor Role"
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor"
$(eval CLUSTER_IDENTITY=$(shell az aks show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_CLUSTER_NAME}" --query 'identity.principalId' -otsv))
az role assignment create --assignee-object-id $(CLUSTER_IDENTITY) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor"
az role assignment create --assignee-object-id $(CLUSTER_IDENTITY) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor"
az role assignment create --assignee-object-id $(CLUSTER_IDENTITY) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator"

az-perm-sig: ## Create role assignments when testing with SIG images
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --role "Reader" --scope /subscriptions/$(AZURE_SIG_SUBSCRIPTION_ID)/resourceGroups/AKS-Ubuntu/providers/Microsoft.Compute/galleries/AKSUbuntu
Expand Down
14 changes: 11 additions & 3 deletions hack/deploy/configure-values.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ set -euo pipefail
# This script interrogates the AKS cluster and Azure resources to generate
# the karpenter-values.yaml file using the karpenter-values-template.yaml file as a template.

if [ "$#" -ne 4 ]; then
echo "Usage: $0 <cluster-name> <resource-group> <karpenter-service-account-name> <karpenter-user-assigned-identity-name>"
if [ "$#" -ne 5 ]; then
echo "Usage: $0 <cluster-name> <resource-group> <karpenter-service-account-name> <karpenter-user-assigned-identity-name> <provision-mode>"
exit 1
fi

Expand All @@ -14,6 +14,7 @@ CLUSTER_NAME=$1
AZURE_RESOURCE_GROUP=$2
KARPENTER_SERVICE_ACCOUNT_NAME=$3
AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME=$4
PROVISION_MODE=$5

# Optional values through env vars:
LOG_LEVEL=${LOG_LEVEL:-"info"}
Expand Down Expand Up @@ -68,9 +69,16 @@ NODE_IDENTITIES=$(jq -r ".identityProfile.kubeletidentity.resourceId" <<< "$AKS_
KARPENTER_USER_ASSIGNED_CLIENT_ID=$(az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)
KUBELET_IDENTITY_CLIENT_ID=$(jq -r ".identityProfile.kubeletidentity.clientId // empty" <<< "$AKS_JSON")

# For Machine API mode
USE_SIG="false"
AZURE_SIG_SUBSCRIPTION_ID=109a5e88-712a-48ae-9078-9ca8b3c81345
if [[ "${PROVISION_MODE:-}" == "aksmachineapi" ]]; then
USE_SIG="true"
fi

export CLUSTER_NAME AZURE_LOCATION AZURE_RESOURCE_GROUP_MC KARPENTER_SERVICE_ACCOUNT_NAME \
CLUSTER_ENDPOINT BOOTSTRAP_TOKEN SSH_PUBLIC_KEY VNET_SUBNET_ID KARPENTER_USER_ASSIGNED_CLIENT_ID NODE_IDENTITIES AZURE_SUBSCRIPTION_ID NETWORK_PLUGIN NETWORK_PLUGIN_MODE NETWORK_POLICY \
LOG_LEVEL VNET_GUID KUBELET_IDENTITY_CLIENT_ID
LOG_LEVEL VNET_GUID KUBELET_IDENTITY_CLIENT_ID PROVISION_MODE USE_SIG AZURE_SIG_SUBSCRIPTION_ID

# get karpenter-values-template.yaml, if not already present (e.g. outside of repo context)
if [ ! -f karpenter-values-template.yaml ]; then
Expand Down
8 changes: 6 additions & 2 deletions karpenter-values-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,17 @@ controller:

# managed karpenter settings
- name: USE_SIG
value: "false"
value: "${USE_SIG}"
- name: SIG_ACCESS_TOKEN_SERVER_URL
value: ""
- name: SIG_ACCESS_TOKEN_SCOPE
value: ""
- name: SIG_SUBSCRIPTION_ID
value: ""
value: "${AZURE_SIG_SUBSCRIPTION_ID}"
- name: PROVISION_MODE
value: ${PROVISION_MODE}
- name: ARM_RESOURCE_GROUP
value: ${AZURE_RESOURCE_GROUP}
serviceAccount:
name: ${KARPENTER_SERVICE_ACCOUNT_NAME}
annotations:
Expand Down
1 change: 1 addition & 0 deletions pkg/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ const (

ProvisionModeAKSScriptless = "aksscriptless"
ProvisionModeBootstrappingClient = "bootstrappingclient"
ProvisionModeAKSMachineAPI = "aksmachineapi"
)
38 changes: 31 additions & 7 deletions test/pkg/environment/azure/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
"github.com/Azure/karpenter-provider-azure/pkg/apis/v1beta1"
"github.com/Azure/karpenter-provider-azure/pkg/consts"
"github.com/Azure/karpenter-provider-azure/pkg/test"
"github.com/Azure/karpenter-provider-azure/pkg/test/azure"
"github.com/Azure/karpenter-provider-azure/test/pkg/environment/common"
Expand All @@ -61,7 +62,9 @@ type Environment struct {
VNETResourceGroup string
ACRName string
ClusterName string
MachineAgentPoolName string
ClusterResourceGroup string
ProvisionMode string

tracker *azure.Tracker

Expand All @@ -85,24 +88,30 @@ type Environment struct {
RBACManager *RBACManager
}

func readEnv(name string) string {
func readEnv(name string, required bool) string {
value, exists := os.LookupEnv(name)
if !exists {
panic(fmt.Sprintf("Environment variable %s is not set", name))
if required {
panic(fmt.Sprintf("Environment variable %s is not set", name))
}
return ""
}
if value == "" {
panic(fmt.Sprintf("Environment variable %s is set to an empty string", name))
if required {
panic(fmt.Sprintf("Environment variable %s is set to an empty string", name))
}
}
return value
}

func NewEnvironment(t *testing.T) *Environment {
azureEnv := &Environment{
Environment: common.NewEnvironment(t),
SubscriptionID: readEnv("AZURE_SUBSCRIPTION_ID"),
ClusterName: readEnv("AZURE_CLUSTER_NAME"),
ClusterResourceGroup: readEnv("AZURE_RESOURCE_GROUP"),
ACRName: readEnv("AZURE_ACR_NAME"),
SubscriptionID: readEnv("AZURE_SUBSCRIPTION_ID", true),
ClusterName: readEnv("AZURE_CLUSTER_NAME", true),
ClusterResourceGroup: readEnv("AZURE_RESOURCE_GROUP", true),
ACRName: readEnv("AZURE_ACR_NAME", true),
ProvisionMode: readEnv("PROVISION_MODE", false),
Region: lo.Ternary(os.Getenv("AZURE_LOCATION") == "", "westus2", os.Getenv("AZURE_LOCATION")),
tracker: azure.NewTracker(),
}
Expand All @@ -124,6 +133,17 @@ func NewEnvironment(t *testing.T) *Environment {
azureEnv.KeyVaultClient = lo.Must(armkeyvault.NewVaultsClient(azureEnv.SubscriptionID, cred, byokRetryOptions))
azureEnv.DiskEncryptionSetClient = lo.Must(armcompute.NewDiskEncryptionSetsClient(azureEnv.SubscriptionID, cred, byokRetryOptions))
azureEnv.RBACManager = lo.Must(NewRBACManager(azureEnv.SubscriptionID, cred))
// Default to reserved managed machine agentpool name for NAP
azureEnv.MachineAgentPoolName = "aksmanagedap"
if azureEnv.Environment.InClusterController {
azureEnv.MachineAgentPoolName = "testmpool"
}
// Create our BYO testing Machine Pool, if running self-hosted, with machine mode specified
// > Note: this only has to occur once per test, since its just a container for the machines
// > meaning that there is no risk of the tests modifying the Machine Pool itself.
if azureEnv.InClusterController && azureEnv.IsMachineMode() {
azureEnv.ExpectRunInClusterControllerWithMachineMode()
}
return azureEnv
}

Expand All @@ -148,6 +168,10 @@ func (env *Environment) ClientOptionsForRBACPropagation() *arm.ClientOptions {
}
}

func (env *Environment) IsMachineMode() bool {
return env.ProvisionMode == consts.ProvisionModeAKSMachineAPI
}

func (env *Environment) DefaultAKSNodeClass() *v1beta1.AKSNodeClass {
nodeClass := test.AKSNodeClass()
return nodeClass
Expand Down
21 changes: 21 additions & 0 deletions test/pkg/environment/azure/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
Expand All @@ -34,6 +35,26 @@ import (
"github.com/Azure/karpenter-provider-azure/pkg/providers/instance"
)

// ExpectRunInClusterControllerWithMachineMode should only need to be called once, and only for InClusterController mode
//
// This is because:
// - in running in NAP mode, the Machines AP will be created for us
// - machine agentpool is just a container, so no risk/concern of tests modifying the AP.
func (env *Environment) ExpectRunInClusterControllerWithMachineMode() containerservice.AgentPool {
GinkgoHelper()
Expect(env.InClusterController).To(BeTrue(), "Should only create a byo Machine Pool when running as an InClusterController")
By("Setup BYO Machine AgentPool for self-hosted testing")
byoMachineAP := env.ExpectCreatedMachineAgentPool()
env.ExpectSettingsOverridden([]v1.EnvVar{
{Name: "PROVISION_MODE", Value: "aksmachineapi"},
{Name: "MANAGE_EXISTING_AKS_MACHINES", Value: "true"},
{Name: "AKS_MACHINES_POOL_NAME", Value: *byoMachineAP.Name},
{Name: "USE_SIG", Value: "true"},
{Name: "AZURE_SIG_SUBSCRIPTION_ID", Value: "109a5e88-712a-48ae-9078-9ca8b3c81345"},
}...)
return byoMachineAP
}

func (env *Environment) EventuallyExpectKarpenterNicsToBeDeleted() {
GinkgoHelper()
Eventually(func() bool {
Expand Down
39 changes: 39 additions & 0 deletions test/pkg/environment/azure/expectations_agentpools.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package azure

import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"

containerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7"
)

func (env *Environment) ExpectCreatedMachineAgentPool() containerservice.AgentPool {
GinkgoHelper()
byoTestMachineAP := containerservice.AgentPool{
Properties: &containerservice.ManagedClusterAgentPoolProfileProperties{
Mode: lo.ToPtr(containerservice.AgentPoolModeMachines),
},
}
poller, err := env.agentpoolsClient.BeginCreateOrUpdate(env.Context, env.ClusterResourceGroup, env.ClusterName, env.MachineAgentPoolName, byoTestMachineAP, nil)
Expect(err).ToNot(HaveOccurred())
res, err := poller.PollUntilDone(env.Context, nil)
Expect(err).ToNot(HaveOccurred())
return res.AgentPool
}
43 changes: 43 additions & 0 deletions test/pkg/environment/azure/expectations_machines.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package azure

import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

containerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7"
)

func (env *Environment) ExpectListMachines() []*containerservice.Machine {
GinkgoHelper()
var machines []*containerservice.Machine
pager := env.machinesClient.NewListPager(env.ClusterResourceGroup, env.ClusterName, env.MachineAgentPoolName, nil)
Expect(pager).ToNot(BeNil())
for pager.More() {
page, err := pager.NextPage(env.Context)
Expect(err).ToNot(HaveOccurred())
machines = append(machines, page.Value...)
}

return machines
}

func (env *Environment) ExpectNoMachines() {
GinkgoHelper()
Expect(len(env.ExpectListMachines())).To(Equal(0))
}
3 changes: 3 additions & 0 deletions test/pkg/environment/azure/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ func (env *Environment) BeforeEach() {
func (env *Environment) Cleanup() {
env.Environment.Cleanup()
env.Environment.CleanupObjects(CleanableObjects...)
// > Note: under current usage no machines should exist here,
// > as scaledown should ensure the machines are deleted
env.ExpectNoMachines()

err := env.tracker.Cleanup()
Expect(err).ToNot(HaveOccurred(), "Failed to clean up Azure resources")
Expand Down
Loading
Loading