Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion internal/daemon/openshift/openshift.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (s *Service) Start(ctx context.Context, a any) {
metricCollectionRoutine := &recovery.RecoverableRoutine{
Routine: runMetricCollection,
RoutineArg: runMetricCollectionArgs{s},
ErrorCode: usagemetrics.OpenShiftMetricCollectionFailure,
ErrorCode: usagemetrics.OpenShiftGenericMetricsFailure,
UsageLogger: *usagemetrics.UsageLogger,
ExpectedMinDuration: s.Config.GetOpenshiftConfiguration().GetCollectionFrequency().AsDuration(),
}
Expand Down Expand Up @@ -130,6 +130,7 @@ func collectMetrics(ctx context.Context, args runMetricCollectionArgs) {
if err := metricClient.SendMetricsToWLM(ctx, args.s.Config, metrics); err != nil {
// This fails silently so that the loop keeps running.
log.CtxLogger(ctx).Errorw("failed to write metrics to WLM", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftWLMRequestFailure)
return
}
log.CtxLogger(ctx).Debug("Metrics successfully sent to WLM")
Expand Down
19 changes: 15 additions & 4 deletions internal/openshiftmetrics/openshiftmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/client-go/rest"
"google.golang.org/protobuf/encoding/protojson"
"github.com/GoogleCloudPlatform/workloadagent/internal/openshiftmetrics/clients/openshift"
"github.com/GoogleCloudPlatform/workloadagent/internal/usagemetrics"
"github.com/GoogleCloudPlatform/workloadagent/internal/workloadmanager"
"github.com/GoogleCloudPlatform/workloadagentplatform/sharedlibraries/log"

Expand Down Expand Up @@ -113,44 +114,54 @@ func (o *OpenShiftMetrics) CollectMetrics(ctx context.Context, versionData Metri
// not fail the entire collection process.
if err := o.collectCusterVersionData(ctx, payload); err != nil {
logger.Warnw("Failed to collect cluster version data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

namespaces, err := o.collectNamespaceData(ctx, payload)
if err != nil {
logger.Warnw("Failed to collect namespace data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectDeploymentData(ctx, namespaces, payload); err != nil {
logger.Warnw("Failed to collect deployment data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectPersistentVolumeClaims(ctx, namespaces, payload); err != nil {
logger.Warnw("Failed to collect persistent volume claims data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectStorageClasses(ctx, payload); err != nil {
logger.Warnw("Failed to collect storage classes data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

// TODO: Clean this up once we have a better way to handle config map we need.
if err := o.collectConfigMaps(ctx, []string{WLMNamespace}, payload); err != nil {
logger.Warnw("Failed to collect config maps data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectCSIDrivers(ctx, payload); err != nil {
logger.Warnw("Failed to collect CSI drivers data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectCloudCredentialConfig(ctx, payload); err != nil {
logger.Warnw("Failed to collect cloud credential config data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectCustomResourceDefinitions(ctx, payload); err != nil {
logger.Warnw("Failed to collect custom resource definitions data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

if err := o.collectNodes(ctx, payload); err != nil {
logger.Warnw("Failed to collect nodes data", "error", err)
usagemetrics.Error(usagemetrics.OpenShiftMetricCollectionFailure)
}

logger.Debugw("Metric payload after collection", "payload", payload)
Expand Down Expand Up @@ -308,8 +319,8 @@ func (o *OpenShiftMetrics) collectDeploymentData(ctx context.Context, namespaces
})
}
containers = append(containers, &ompb.Container{
Env: env,
EnvFrom: envFrom,
Env: env,
EnvFrom: envFrom,
VolumeMounts: volumeMounts,
})
}
Expand Down Expand Up @@ -349,8 +360,8 @@ func (o *OpenShiftMetrics) collectDeploymentData(ctx context.Context, namespaces
})
}
initContainers = append(initContainers, &ompb.Container{
Env: env,
EnvFrom: envFrom,
Env: env,
EnvFrom: envFrom,
VolumeMounts: volumeMounts,
})
}
Expand Down
2 changes: 2 additions & 0 deletions internal/usagemetrics/usagemetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ const (
MongoDBDiscoveryFailure = 34
StartDaemonFailure = 35
GuestActionsFailure = 36
OpenShiftGenericMetricsFailure = 37
OpenShiftWLMRequestFailure = 38
)

// Agent wide action mappings.
Expand Down
Loading