Skip to content

Commit aa46b59

Browse files
chore: Standardized logging with consistent keys and WithValues usage (#968)
* chore: convert fmt.Sprintf logging to structured logging WithValues * resolved tallaxes comments * resolved tallaxes comments * fix: single-use WithValues() changed to direct key value pairs, pricing.go added region WithValues * fix: WARN messages edited to be V(0) logs with more descriptive messages * fix: WARN messages have V(-1) level * fix: WithValues logging removed * fix: revolsing tallaxes comments * chore: camelcase for key values for logs * chore: resourceName key edited to be more specific, Kubernetes resources capitalized, k8s expanded out * docs: logging guidelines * docs: more descriptive between V(-1) and Error + formatting * fix: drop V(-1) for fallback and change to kebab case for instanceType and capacityType * fix: drop all V(-1) to Info --------- Co-authored-by: Alex Leites <[email protected]>
1 parent 0faf842 commit aa46b59

File tree

19 files changed

+110
-84
lines changed

19 files changed

+110
-84
lines changed

pkg/cache/unavailableofferings.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,12 @@ func (u *UnavailableOfferings) MarkSpotUnavailableWithTTL(ctx context.Context, t
7575
// MarkUnavailableWithTTL allows us to mark an offering unavailable with a custom TTL
7676
func (u *UnavailableOfferings) MarkUnavailableWithTTL(ctx context.Context, unavailableReason, instanceType, zone, capacityType string, ttl time.Duration) {
7777
// even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL
78-
log.FromContext(ctx).WithValues(
78+
log.FromContext(ctx).V(1).Info("removing offering from offerings",
7979
"unavailable", unavailableReason,
8080
"instance-type", instanceType,
8181
"zone", zone,
8282
"capacity-type", capacityType,
83-
"ttl", ttl).V(1).Info("removing offering from offerings")
83+
"ttl", ttl)
8484
u.cache.Set(key(instanceType, zone, capacityType), struct{}{}, ttl)
8585
atomic.AddUint64(&u.SeqNum, 1)
8686
}

pkg/cloudprovider/cloudprovider.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,13 @@ func (c *CloudProvider) waitOnPromise(ctx context.Context, promise *instance.Vir
190190
vmName := lo.FromPtr(promise.VM.Name)
191191
err = c.instanceProvider.Delete(ctx, vmName)
192192
if cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil {
193-
log.FromContext(ctx).Error(err, fmt.Sprintf("failed to delete VM %s", vmName))
193+
log.FromContext(ctx).Error(err, "failed to delete VM", "vmName", vmName)
194194
}
195195

196196
if err = c.kubeClient.Delete(ctx, nodeClaim); err != nil {
197197
err = client.IgnoreNotFound(err)
198198
if err != nil {
199-
log.FromContext(ctx).Error(err, "failed to delete nodeclaim %s, will wait for liveness TTL", nodeClaim.Name)
199+
log.FromContext(ctx).Error(err, "failed to delete nodeclaim, will wait for liveness TTL", "NodeClaim", nodeClaim.Name)
200200
}
201201
}
202202
metrics.NodeClaimsDisruptedTotal.Inc(map[string]string{
@@ -259,7 +259,7 @@ func (c *CloudProvider) Get(ctx context.Context, providerID string) (*karpv1.Nod
259259
if err != nil {
260260
return nil, fmt.Errorf("getting vm name, %w", err)
261261
}
262-
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", vmName))
262+
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("vmName", vmName))
263263
instance, err := c.instanceProvider.Get(ctx, vmName)
264264
if err != nil {
265265
return nil, fmt.Errorf("getting instance, %w", err)
@@ -295,8 +295,7 @@ func (c *CloudProvider) GetInstanceTypes(ctx context.Context, nodePool *karpv1.N
295295
}
296296

297297
func (c *CloudProvider) Delete(ctx context.Context, nodeClaim *karpv1.NodeClaim) error {
298-
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("nodeclaim", nodeClaim.Name))
299-
298+
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("NodeClaim", nodeClaim.Name))
300299
vmName, err := utils.GetVMName(nodeClaim.Status.ProviderID)
301300
if err != nil {
302301
return fmt.Errorf("getting VM name, %w", err)
@@ -446,7 +445,7 @@ func (c *CloudProvider) instanceToNodeClaim(ctx context.Context, vm *armcompute.
446445
}
447446

448447
if zone, err := utils.GetZone(vm); err != nil {
449-
log.FromContext(ctx).Info(fmt.Sprintf("WARN: Failed to get zone for VM %s, %v", *vm.Name, err))
448+
log.FromContext(ctx).Info("failed to get zone for VM, zone label will be empty", "vmName", *vm.Name, "error", err)
450449
} else {
451450
labels[corev1.LabelTopologyZone] = zone
452451
}

pkg/cloudprovider/drift.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ func (c *CloudProvider) areStaticFieldsDrifted(ctx context.Context, nodeClaim *k
8888
}
8989

9090
if nodeClassHash != nodeClaimHash {
91-
logger.V(1).Info(fmt.Sprintf("drift triggered for %s, as nodeClassHash (%s) != nodeClaimHash (%s)", NodeClassDrift, nodeClassHash, nodeClaimHash))
91+
logger.V(1).Info("drift triggered as nodeClassHash != nodeClaimHash",
92+
"driftType", NodeClassDrift,
93+
"nodeClassHash", nodeClassHash,
94+
"nodeClaimHash", nodeClaimHash)
9295
return NodeClassDrift, nil
9396
}
9497

@@ -104,7 +107,7 @@ func (c *CloudProvider) isK8sVersionDrifted(ctx context.Context, nodeClaim *karp
104107
// Note: we don't consider this a hard failure for drift if the KubernetesVersion is invalid/not ready to use, so we ignore returning the error here.
105108
// We simply ensure the stored version is valid and ready to use, if we are to calculate potential Drift based on it.
106109
// TODO (charliedmcb): I'm wondering if we actually want to have these soft-error cases switch to return an error if no-drift condition was found across all of IsDrifted.
107-
logger.Info(fmt.Sprintf("WARN: Kubernetes version readiness invalid when checking drift: %s", err))
110+
logger.Info("kubernetes version not ready, skipping drift check", "error", err)
108111
return "", nil //nolint:nilerr
109112
}
110113

@@ -115,7 +118,10 @@ func (c *CloudProvider) isK8sVersionDrifted(ctx context.Context, nodeClaim *karp
115118

116119
nodeK8sVersion := strings.TrimPrefix(node.Status.NodeInfo.KubeletVersion, "v")
117120
if nodeK8sVersion != k8sVersion {
118-
logger.V(1).Info(fmt.Sprintf("drift triggered for %s, with expected k8s version %s, and actual k8s version %s", K8sVersionDrift, k8sVersion, nodeK8sVersion))
121+
logger.V(1).Info("drift triggered due to k8s version mismatch",
122+
"driftType", K8sVersionDrift,
123+
"expectedKubernetesVersion", k8sVersion,
124+
"actualKubernetesVersion", nodeK8sVersion)
119125
return K8sVersionDrift, nil
120126
}
121127
return "", nil
@@ -170,7 +176,7 @@ func (c *CloudProvider) isImageVersionDrifted(
170176
// Note: we don't consider this a hard failure for drift if the Images are not ready to use, so we ignore returning the error here.
171177
// The stored Images must be ready to use if we are to calculate potential Drift based on them.
172178
// TODO (charliedmcb): I'm wondering if we actually want to have these soft-error cases switch to return an error if no-drift condition was found across all of IsDrifted.
173-
logger.Info(fmt.Sprintf("WARN: NodeImage readiness invalid when checking drift: %s", err))
179+
logger.Info("node image not ready, skipping drift check", "error", err)
174180
return "", nil //nolint:nilerr
175181
}
176182
if len(nodeImages) == 0 {
@@ -185,7 +191,9 @@ func (c *CloudProvider) isImageVersionDrifted(
185191
}
186192
}
187193

188-
logger.V(1).Info(fmt.Sprintf("drift triggered for %s, as actual image id %s was not found in the set of currently available node images", ImageDrift, vmImageID))
194+
logger.V(1).Info("drift triggered as actual image id was not found in the set of currently available node images",
195+
"driftType", ImageDrift,
196+
"actualImageID", vmImageID)
189197
return ImageDrift, nil
190198
}
191199

@@ -233,12 +241,10 @@ func (c *CloudProvider) isKubeletIdentityDrifted(ctx context.Context, nodeClaim
233241
}
234242

235243
if kubeletIdentityClientID != opts.KubeletIdentityClientID {
236-
logger.V(1).Info(
237-
fmt.Sprintf("drift triggered for %s, with expected kubelet identity client id %s, and actual kubelet identity client id %s",
238-
KubeletIdentityDrift,
239-
opts.KubeletIdentityClientID,
240-
kubeletIdentityClientID),
241-
)
244+
logger.V(1).Info("drift triggered due to expected and actual kubelet identity client id mismatch",
245+
"driftType", KubeletIdentityDrift,
246+
"expectedKubeletIdentityClientID", opts.KubeletIdentityClientID,
247+
"actualKubeletIdentityClientID", kubeletIdentityClientID)
242248
return KubeletIdentityDrift, nil
243249
}
244250

@@ -257,7 +263,7 @@ func (c *CloudProvider) getNodeForDrift(ctx context.Context, nodeClaim *karpv1.N
257263
return nil, nil
258264
}
259265
if nodeclaimutils.IsDuplicateNodeError(err) {
260-
logger.V(1).Info("WARN: Duplicate node error, invariant violated.")
266+
logger.Info("duplicate node error detected, invariant violated")
261267
}
262268
return nil, err
263269
}

pkg/controllers/nodeclaim/garbagecollection/instance_garbagecollection.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func (c *VirtualMachine) Reconcile(ctx context.Context) (reconcile.Result, error
9494
}
9595

9696
func (c *VirtualMachine) garbageCollect(ctx context.Context, nodeClaim *karpv1.NodeClaim, nodeList *v1.NodeList) error {
97-
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("provider-id", nodeClaim.Status.ProviderID))
97+
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("providerID", nodeClaim.Status.ProviderID))
9898
if err := c.cloudProvider.Delete(ctx, nodeClaim); err != nil {
9999
return corecloudprovider.IgnoreNodeClaimNotFoundError(err)
100100
}
@@ -107,7 +107,7 @@ func (c *VirtualMachine) garbageCollect(ctx context.Context, nodeClaim *karpv1.N
107107
if err := c.kubeClient.Delete(ctx, &node); err != nil {
108108
return client.IgnoreNotFound(err)
109109
}
110-
log.FromContext(ctx).WithValues("node", node.Name).V(1).Info("garbage collected node")
110+
log.FromContext(ctx).V(1).Info("garbage collected node", "Node", node.Name)
111111
}
112112
return nil
113113
}

pkg/controllers/nodeclaim/garbagecollection/nic_garbagecollection.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func (c *NetworkInterface) Reconcile(ctx context.Context) (reconcile.Result, err
9696
return
9797
}
9898

99-
log.FromContext(ctx).WithValues("nic", nicName).Info("garbage collected NIC")
99+
log.FromContext(ctx).Info("garbage collected NIC", "nicName", nicName)
100100
}
101101
})
102102
return reconcile.Result{

pkg/controllers/nodeclaim/inplaceupdate/controller.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClaim *karpv1.NodeClaim)
8484
}
8585
actualHash := nodeClaim.Annotations[v1beta1.AnnotationInPlaceUpdateHash]
8686

87-
log.FromContext(ctx).V(1).Info(fmt.Sprintf("goal hash is: %q, actual hash is: %q", goalHash, actualHash))
87+
log.FromContext(ctx).V(1).Info("comparing hashes", "goalHash", goalHash, "actualHash", actualHash)
8888

8989
// If there's no difference from goal state, no need to do anything else
9090
if goalHash == actualHash {
@@ -95,6 +95,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClaim *karpv1.NodeClaim)
9595
if err != nil {
9696
return reconcile.Result{}, err
9797
}
98+
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("vmName", vmName))
9899

99100
vm, err := c.instanceProvider.Get(ctx, vmName)
100101
if err != nil {
@@ -184,6 +185,6 @@ func logVMPatch(ctx context.Context, update *armcompute.VirtualMachineUpdate) {
184185
raw, _ := json.Marshal(update)
185186
rawStr = string(raw)
186187
}
187-
log.FromContext(ctx).V(1).Info(fmt.Sprintf("applying patch to Azure VM: %s", rawStr))
188+
log.FromContext(ctx).V(1).Info("applying patch to Azure VM", "vmPatch", rawStr)
188189
}
189190
}

pkg/controllers/nodeclass/status/images.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ func (r *NodeImageReconciler) Reconcile(ctx context.Context, nodeClass *v1beta1.
161161

162162
// We care about the ordering of the slices here, as it translates to priority during selection, so not treating them as sets
163163
if utils.HasChanged(nodeClass.Status.Images, goalImages, &hashstructure.HashOptions{SlicesAsSets: false}) {
164-
logger.WithValues("existingImages", nodeClass.Status.Images).WithValues("newImages", goalImages).Info("new available images updated for nodeclass")
164+
logger.Info("new available images updated for nodeclass", "existingImages", nodeClass.Status.Images, "newImages", goalImages)
165165
}
166166
nodeClass.Status.Images = goalImages
167167
nodeClass.StatusConditions().SetTrue(v1beta1.ConditionTypeImagesReady)
@@ -203,7 +203,7 @@ func (r *NodeImageReconciler) isMaintenanceWindowOpen(ctx context.Context) (bool
203203
// TODO: In the longer run, the maintenance window handling should be factored out into a sharable provider, rather than being contained
204204
// within the image controller itself.
205205
if r.cm.HasChanged("nodeclass-maintenancewindowdata", mwConfigMap.Data) {
206-
logger.WithValues("maintenanceWindowData", mwConfigMap.Data).Info("new maintenance window data discovered")
206+
logger.Info("new maintenance window data discovered", "maintenanceWindowData", mwConfigMap.Data)
207207
}
208208
if len(mwConfigMap.Data) == 0 {
209209
// An empty configmap means there's no maintenance windows defined, and its up to us when to preform maintenance

pkg/controllers/nodeclass/status/kubernetesversion.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func (r *KubernetesVersionReconciler) Reconcile(ctx context.Context, nodeClass *
8080

8181
// Handles case 1: init, update kubernetes status to API server version found
8282
if !nodeClass.StatusConditions().Get(v1beta1.ConditionTypeKubernetesVersionReady).IsTrue() || nodeClass.Status.KubernetesVersion == "" {
83-
logger.V(1).Info(fmt.Sprintf("init kubernetes version: %s", goalK8sVersion))
83+
logger.V(1).Info("init kubernetes version", "goalKubernetesVersion", goalK8sVersion)
8484
} else {
8585
// Check if there is an upgrade
8686
newK8sVersion, err := semver.Parse(goalK8sVersion)
@@ -93,10 +93,10 @@ func (r *KubernetesVersionReconciler) Reconcile(ctx context.Context, nodeClass *
9393
}
9494
// Handles case 2: Upgrade kubernetes version [Note: we set node image to not ready, since we upgrade node image when there is a kubernetes upgrade]
9595
if newK8sVersion.GT(currentK8sVersion) {
96-
logger.V(1).Info(fmt.Sprintf("kubernetes upgrade detected: from %s (current), to %s (discovered)", currentK8sVersion.String(), newK8sVersion.String()))
96+
logger.V(1).Info("kubernetes upgrade detected", "currentKubernetesVersion", currentK8sVersion.String(), "discoveredKubernetesVersion", newK8sVersion.String())
9797
nodeClass.StatusConditions().SetFalse(v1beta1.ConditionTypeImagesReady, "KubernetesUpgrade", "Performing kubernetes upgrade, need to get latest images")
9898
} else if newK8sVersion.LT(currentK8sVersion) {
99-
logger.Info(fmt.Sprintf("WARN: detected potential kubernetes downgrade: from %s (current), to %s (discovered)", currentK8sVersion.String(), newK8sVersion.String()))
99+
logger.Info("detected potential kubernetes downgrade, keeping current version", "currentKubernetesVersion", currentK8sVersion.String(), "discoveredKubernetesVersion", newK8sVersion.String())
100100
// We do not currently support downgrading, so keep the kubernetes version the same
101101
goalK8sVersion = nodeClass.Status.KubernetesVersion
102102
}

pkg/operator/operator.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,20 +262,20 @@ func WaitForCRDs(ctx context.Context, timeout time.Duration, config *rest.Config
262262
return fmt.Errorf("creating dynamic rest mapper, %w", err)
263263
}
264264

265-
log.WithValues("timeout", timeout).Info("waiting for required CRDs to be available")
265+
log.Info("waiting for required CRDs to be available", "timeout", timeout)
266266
ctx, cancel := context.WithTimeout(ctx, timeout)
267267
defer cancel()
268268

269269
for _, gvk := range requiredGVKs {
270270
err := wait.PollUntilContextCancel(ctx, 10*time.Second, true, func(ctx context.Context) (bool, error) {
271271
if _, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version); err != nil {
272272
if meta.IsNoMatchError(err) {
273-
log.V(1).WithValues("gvk", gvk).Info("waiting for CRD to be available")
273+
log.V(1).Info("waiting for CRD to be available", "gvk", gvk)
274274
return false, nil
275275
}
276276
return false, err
277277
}
278-
log.V(1).WithValues("gvk", gvk).Info("CRD is available")
278+
log.V(1).Info("CRD is available", "gvk", gvk)
279279
return true, nil
280280
})
281281
if err != nil {

pkg/providers/imagefamily/resolver.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func (r *defaultResolver) Resolve(
115115
return nil, err
116116
}
117117

118-
log.FromContext(ctx).Info(fmt.Sprintf("Resolved image %s for instance type %s", imageID, instanceType.Name))
118+
log.FromContext(ctx).Info("resolved image", "imageID", imageID, "instance-type", instanceType.Name)
119119

120120
// TODO: as ProvisionModeBootstrappingClient path develops, we will eventually be able to drop the retrieval of imageDistro here.
121121
imageDistro, err := mapToImageDistro(imageID, imageFamily)

0 commit comments

Comments
 (0)