Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
1591111
feat: further AKS feature/quality/perf parity, through machine API in…
comtalyst Oct 1, 2025
79f231e
docs: update TODOs
comtalyst Oct 1, 2025
7e67087
feat: not logging error on create cleanup failure if not found
comtalyst Oct 7, 2025
beab993
fix: remove unnecessary perm assignments
comtalyst Oct 7, 2025
b225078
refactor: resolve changes from offerings refactor revamp
comtalyst Oct 7, 2025
f80c93f
fix: FIPS enablement logic
comtalyst Oct 9, 2025
9b13b25
feat: disable Artifact Streaming
comtalyst Oct 10, 2025
c71c516
test: update tests per new FIPS logic + stop depending on architecture
comtalyst Oct 16, 2025
76594df
feat: rework AKS machine/VM name handling
comtalyst Oct 17, 2025
113ffca
fix: missing parameter for CCP mode
comtalyst Oct 30, 2025
f7ee83b
feat: AKS machine visibility lock
comtalyst Nov 5, 2025
d44fbf7
feat: add PROVISION_MODE validation in configure-values.sh
comtalyst Dec 4, 2025
edeaa5b
style: new line on parameters
comtalyst Dec 4, 2025
61b0219
docs: update small comment
comtalyst Dec 4, 2025
9dc48d8
docs: add a comment on cluster config drift
comtalyst Dec 4, 2025
cc7d90c
refactor: rename to isMachineDrifted to match others
comtalyst Dec 4, 2025
9d8e943
docs: add a clarification comment
comtalyst Dec 4, 2025
070afe1
refactor: various inplaceupdate updates
comtalyst Dec 4, 2025
6fbf32b
chore: cleanup options
comtalyst Dec 4, 2025
1e059aa
chore: misc improvements in instance provider
comtalyst Dec 4, 2025
02cda6c
chore: misc improvements in inplaceupdate
comtalyst Dec 4, 2025
086faec
refactor: rework AKS machine patching flow
comtalyst Dec 5, 2025
c64112c
refactor: share ExpectLaunched() between VM and machine instances
comtalyst Dec 5, 2025
c1edb32
chore: conflicts resolution
comtalyst Dec 5, 2025
ccb60d2
chore: linter fixes
comtalyst Dec 5, 2025
5ebafe9
test: exclude label test for now
comtalyst Dec 5, 2025
793ce71
fix: use init taints instead of taints for Machine API
comtalyst Dec 5, 2025
cf367c8
chore: remove a dev script for now
comtalyst Dec 5, 2025
e542bd0
ci: fix actions for E2E run
comtalyst Dec 6, 2025
fec6c18
fix: again
comtalyst Dec 6, 2025
39977a1
refactor: catch panic within AKSMachinePromise.WaitFunc
comtalyst Dec 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/actions/e2e/install-karpenter/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ runs:
AZURE_CLUSTER_NAME: ${{ inputs.cluster_name }}
AZURE_LOCATION: ${{ inputs.location }}
PROVISION_MODE: ${{ inputs.provisionmode }}
run: make az-configure-values
run: |
if [ "$PROVISION_MODE" = "aksmachineapi" ]; then
make az-configure-values-aksmachine
else
make az-configure-values
fi
- name: deploy karpenter to cluster
shell: bash
env:
Expand Down
16 changes: 15 additions & 1 deletion Makefile-az.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ else
endif

AZURE_ACR_SUFFIX ?= azurecr.io
AZURE_SIG_SUBSCRIPTION_ID ?= $(AZURE_SUBSCRIPTION_ID)
AZURE_SIG_SUBSCRIPTION_ID ?= 10945678-1234-1234-1234-123456789012
AZURE_CLUSTER_NAME ?= $(COMMON_NAME)
AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)

Expand All @@ -23,6 +23,8 @@ KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID
CUSTOM_VNET_NAME ?= $(AZURE_CLUSTER_NAME)-vnet
CUSTOM_SUBNET_NAME ?= nodesubnet

AKS_MACHINES_POOL_NAME ?= testmpool

.DEFAULT_GOAL := help # make without arguments will show help

az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload
Expand All @@ -31,6 +33,8 @@ az-all-cniv1: az-login az-create-workload-msi az-mkaks-cniv1 az-cre

az-all-cni-overlay: az-login az-create-workload-msi az-mkaks-overlay az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload

az-all-aksmachine: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-perm-aksmachine az-add-aksmachinespool az-configure-values-aksmachine az-build az-run az-run-sample

az-all-perftest: az-login az-create-workload-msi az-mkaks-perftest az-create-federated-cred az-perm az-perm-acr az-configure-values
$(MAKE) az-mon-deploy
$(MAKE) az-pprof-enable
Expand Down Expand Up @@ -177,12 +181,18 @@ az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR
az aks get-credentials --resource-group $(AZURE_RESOURCE_GROUP) --name $(AZURE_CLUSTER_NAME) --overwrite-existing
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter

az-add-aksmachinespool:
hack/deploy/add-aks-machines-pool.sh $(AZURE_SUBSCRIPTION_ID) $(AZURE_RESOURCE_GROUP) $(AZURE_CLUSTER_NAME) $(AKS_MACHINES_POOL_NAME)

az-rmrg: ## Destroy test ACR and AKS cluster by deleting the resource group (use with care!)
az group delete --name $(AZURE_RESOURCE_GROUP)

az-configure-values: ## Generate cluster-related values for Karpenter Helm chart and set middleware logging flag
LOG_LEVEL=debug hack/deploy/configure-values.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) $(KARPENTER_SERVICE_ACCOUNT_NAME) $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME) $(ENABLE_AZURE_SDK_LOGGING)

az-configure-values-aksmachine: ## Generate cluster-related values for Karpenter Helm chart
hack/deploy/configure-values.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) $(KARPENTER_SERVICE_ACCOUNT_NAME) $(AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME) $(ENABLE_AZURE_SDK_LOGGING) aksmachineapi $(AKS_MACHINES_POOL_NAME)

az-mkvmssflex: ## Create VMSS Flex (optional, only if creating VMs referencing this VMSS)
az vmss create --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --location $(AZURE_LOCATION) \
--instance-count 0 --orchestration-mode Flexible --platform-fault-domain-count 1 --zones 1 2 3
Expand All @@ -198,6 +208,10 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator"
@echo Consider "make az-configure-values"!

az-perm-aksmachine: ## Create role assignments for AKS machine API operations
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Azure Kubernetes Service Contributor Role"

az-perm-sig: ## Create role assignments when testing with SIG images
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
az role assignment create --assignee-object-id $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --assignee-principal-type "ServicePrincipal" --role "Reader" --scope /subscriptions/$(AZURE_SIG_SUBSCRIPTION_ID)/resourceGroups/AKS-Ubuntu/providers/Microsoft.Compute/galleries/AKSUbuntu
Expand Down
2 changes: 2 additions & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func main() {
aksCloudProvider := cloudprovider.New(
op.InstanceTypesProvider,
op.VMInstanceProvider,
op.AKSMachineProvider,
op.EventRecorder,
op.GetClient(),
op.ImageProvider,
Expand Down Expand Up @@ -80,6 +81,7 @@ func main() {
op.EventRecorder,
aksCloudProvider,
op.VMInstanceProvider,
op.AKSMachineProvider,
// TODO: still need to refactor ImageProvider side of things.
op.KubernetesVersionProvider,
op.ImageProvider,
Expand Down
2 changes: 2 additions & 0 deletions cmd/controller/main_ccp.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func main() {
aksCloudProvider := cloudprovider.New(
op.InstanceTypesProvider,
op.VMInstanceProvider,
op.AKSMachineProvider,
op.EventRecorder,
op.GetClient(),
op.ImageProvider,
Expand Down Expand Up @@ -80,6 +81,7 @@ func main() {
op.EventRecorder,
aksCloudProvider,
op.VMInstanceProvider,
op.AKSMachineProvider,
// TODO: still need to refactor ImageProvider side of things.
op.KubernetesVersionProvider,
op.ImageProvider,
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2 v2.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7 v7.2.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7 v7.3.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8 v8.2.0-beta.1
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.5.0
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontai
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5 v5.0.0/go.mod h1:HcZY0PHPo/7d75p99lB6lK0qYOP4vLRJUBpiehYXtLQ=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v6 v6.6.0 h1:xkWEcbsnJWid3rOf/S/LOHy1I55JA+4kw/f8Tnm+Onc=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v6 v6.6.0/go.mod h1:OWKfCmX4X3Vp2w7GSx1LZn8566tOHJBA6K0IAUVNYx0=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7 v7.3.0 h1:owjZtM7eVTSYIh4XAdUvWig9rV+BEra4bEnOnpXOAco=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v7 v7.3.0/go.mod h1:bzstes8qsGAonl7WqKwIvWhGlfMCywgk1nons7nuNmw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8 v8.2.0-beta.1 h1:UQJfF+S5EmIkmzUOnBJEZBWX+zKEyeKbuwB9aU8wl+E=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8 v8.2.0-beta.1/go.mod h1:th/SdaKxIrBPp0AgLOb3wS3sZ39rirYadqU/JL29rLg=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.0.0 h1:lMW1lD/17LUA5z1XTURo7LcVG2ICBPlyMHjIUrcFZNQ=
Expand Down
24 changes: 20 additions & 4 deletions hack/deploy/configure-values.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ set -euo pipefail
# This script interrogates the AKS cluster and Azure resources to generate
# the karpenter-values.yaml file using the karpenter-values-template.yaml file as a template.

if [ "$#" -ne 5 ]; then
echo "Usage: $0 <cluster-name> <resource-group> <karpenter-service-account-name> <karpenter-user-assigned-identity-name> <enable-azure-sdk-logging>"
if [ "$#" -lt 5 ] || [ "$#" -gt 7 ]; then
echo "Usage: $0 <cluster-name> <resource-group> <karpenter-service-account-name> <karpenter-user-assigned-identity-name> <enable-azure-sdk-logging> [provision-mode] [aks-machines-pool-name]"
exit 1
fi

Expand All @@ -15,6 +15,14 @@ AZURE_RESOURCE_GROUP=$2
KARPENTER_SERVICE_ACCOUNT_NAME=$3
AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME=$4
ENABLE_AZURE_SDK_LOGGING=$5
PROVISION_MODE=${6:-}
AKS_MACHINES_POOL_NAME=${7:-testmpool}

# Validate PROVISION_MODE
if [[ -n "$PROVISION_MODE" && "$PROVISION_MODE" != "aksmachineapi" && "$PROVISION_MODE" != "bootstrappingclient" && "$PROVISION_MODE" != "aksscriptless" ]]; then
echo "Error: Invalid provision-mode '$PROVISION_MODE'. Must be one of: aksmachineapi, bootstrappingclient, aksscriptless, or empty"
exit 1
fi

# Optional values through env vars:
LOG_LEVEL=${LOG_LEVEL:-"info"}
Expand Down Expand Up @@ -69,9 +77,17 @@ NODE_IDENTITIES=$(jq -r ".identityProfile.kubeletidentity.resourceId" <<< "$AKS_
KARPENTER_USER_ASSIGNED_CLIENT_ID=$(az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)
KUBELET_IDENTITY_CLIENT_ID=$(jq -r ".identityProfile.kubeletidentity.clientId // empty" <<< "$AKS_JSON")

export CLUSTER_NAME AZURE_LOCATION AZURE_RESOURCE_GROUP_MC KARPENTER_SERVICE_ACCOUNT_NAME \
AZURE_SIG_SUBSCRIPTION_ID=""
USE_SIG="false"
# For Machine API mode
if [[ "${PROVISION_MODE:-}" == "aksmachineapi" ]]; then
USE_SIG="true"
AZURE_SIG_SUBSCRIPTION_ID=109a5e88-712a-48ae-9078-9ca8b3c81345
fi

export CLUSTER_NAME AZURE_LOCATION AZURE_RESOURCE_GROUP AZURE_RESOURCE_GROUP_MC KARPENTER_SERVICE_ACCOUNT_NAME \
CLUSTER_ENDPOINT BOOTSTRAP_TOKEN SSH_PUBLIC_KEY VNET_SUBNET_ID KARPENTER_USER_ASSIGNED_CLIENT_ID NODE_IDENTITIES AZURE_SUBSCRIPTION_ID NETWORK_PLUGIN NETWORK_PLUGIN_MODE NETWORK_POLICY \
LOG_LEVEL VNET_GUID KUBELET_IDENTITY_CLIENT_ID ENABLE_AZURE_SDK_LOGGING
LOG_LEVEL VNET_GUID KUBELET_IDENTITY_CLIENT_ID ENABLE_AZURE_SDK_LOGGING PROVISION_MODE USE_SIG AZURE_SIG_SUBSCRIPTION_ID AKS_MACHINES_POOL_NAME

# get karpenter-values-template.yaml, if not already present (e.g. outside of repo context)
if [ ! -f karpenter-values-template.yaml ]; then
Expand Down
10 changes: 8 additions & 2 deletions karpenter-values-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,19 @@ controller:

# managed karpenter settings
- name: USE_SIG
value: "false"
value: "${USE_SIG}"
- name: SIG_ACCESS_TOKEN_SERVER_URL
value: ""
- name: SIG_ACCESS_TOKEN_SCOPE
value: ""
- name: SIG_SUBSCRIPTION_ID
value: ""
value: "${AZURE_SIG_SUBSCRIPTION_ID}"
- name: PROVISION_MODE
value: ${PROVISION_MODE}
- name: AKS_MACHINES_POOL_NAME
value: "${AKS_MACHINES_POOL_NAME}"
- name: ARM_RESOURCE_GROUP
value: ${AZURE_RESOURCE_GROUP}
serviceAccount:
name: ${KARPENTER_SERVICE_ACCOUNT_NAME}
annotations:
Expand Down
191 changes: 0 additions & 191 deletions merge-branches-v2.sh

This file was deleted.

1 change: 1 addition & 0 deletions pkg/apis/v1beta1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ var (

AnnotationAKSNodeClassHash = apis.Group + "/aksnodeclass-hash"
AnnotationAKSNodeClassHashVersion = apis.Group + "/aksnodeclass-hash-version"
AnnotationAKSMachineResourceID = apis.Group + "/aks-machine-resource-id" // resource ID of the associated AKS machine
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least at some times in the past we've duplicated some of this between v1alpha1 and v1beta1. Not sure if we should keep doing it but may be worth doing at least for now til we drop v1alpha1 entirely?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see v1alpha1 to add anymore. I think we dropped it?
v1alpha2 is still there, but there is no labels duplication.
Btw, what is the expected impact from this?

@tallaxes, thoughts?

)

const (
Expand Down
Loading
Loading