NewCompare CPU & GPU pricing across AWS, Azure & GCP
Pulumi

Workload Policies

Configure workload rightsizing policies and attach them to clusters.

Workload Policies

WorkloadPolicy defines how workloads should be rightsized -- CPU and memory vertical scaling, horizontal scaling, and the triggers that activate them. WorkloadPolicyTarget attaches a policy to one or more clusters with optional namespace and workload kind filters.

WorkloadPolicy

Example

import { resources, types } from "@devzero/pulumi-devzero";

const policy = new resources.WorkloadPolicy("cost-saving-policy", {
    name: "cost-saving-policy",                        // unique within team
    description: "Rightsize non-critical workloads",
    loopbackPeriodSeconds: 86400,                      // 1 day (shown as "Lookback Period" in the dashboard)
    cooldownMinutes: 300,                              // 5 hours between successive scale-down actions
    minDataPoints: 20,                                 // min samples before any recommendation
    actionTriggers: ["on_detection", "on_schedule"],   // when to apply recommendations
    cronSchedule: "*/15 * * * *",                      // every 15 min; required when "on_schedule" is set
    detectionTriggers: ["pod_creation", "pod_update"], // used when "on_detection" is set
    cpuVerticalScaling: {
        enabled: true,
        targetPercentile: 0.75,        // P75 of observed usage
        minRequest: 25,                // millicores; hard floor
        maxScaleUpPercent: 1000,       // % per step
        maxScaleDownPercent: 1,        // % per step
        minDataPoints: 20,             // min CPU samples
        adjustReqEvenIfNotSet: true,   // set requests even if workload has none
        limitsRemovalEnabled: true,    // strip CPU limits (cycles compress safely)
    },
    memoryVerticalScaling: {
        enabled: true,
        targetPercentile: 1,           // P100 — guard against OOMKills
        minRequest: 134217728,         // 128 MiB in bytes; hard floor
        maxScaleUpPercent: 1000,       // % per step
        maxScaleDownPercent: 1,        // % per step
        overheadMultiplier: 0.3,       // extra headroom over the recommendation
        limitsAdjustmentEnabled: true, // adjust limits alongside requests
        limitMultiplier: 1,            // limits = request × this
        minDataPoints: 20,             // min memory samples
        adjustReqEvenIfNotSet: true,   // set requests even if workload has none
    },
    enablePmaxProtection: true,                        // guard against spike-induced OOMKills
    pmaxRatioThreshold: 3,                             // raise requests 3× on an OOM event
    minChangePercent: 0.2,                             // apply only if change > 20%
});
from pulumi_devzero.resources import WorkloadPolicy, WorkloadPolicyArgs
from pulumi_devzero.resources.types import VerticalScalingArgs

policy = WorkloadPolicy("cost-saving-policy", args=WorkloadPolicyArgs(
    name="cost-saving-policy",  # unique policy name within team
    description="Rightsize non-critical workloads",
    loopback_period_seconds=86400,  # seconds (86400 seconds = 1 day) ('Lookback Period' on the dashboard)
    cooldown_minutes=300,           # minutes (300 minutes = 5 hours) (with time between successive scale-down recommendation)
    min_data_points=20,             # minimum data points before any recommendations are generated
    action_triggers=["on_detection", "on_schedule"],  # controls how/when recommendations are applied
    cron_schedule="*/15 * * * *",   # every 15 min
    detection_triggers=["pod_creation", "pod_update"],  # only applies for 'on_detection'
    cpu_vertical_scaling=VerticalScalingArgs(
        enabled=True,
        target_percentile=0.75,     # P75 (percentile of observed usage to target)
        min_request=25,             # millicores (hard floor for CPU requests)
        max_scale_up_percent=1000,  # max scale-up per step (% of current)
        max_scale_down_percent=1,   # max scale-down per step (% of current)
        min_data_points=20,         # minimum data points for CPU utilization before any recommendations are generated
        adjust_req_even_if_not_set=True,  # recommended CPU requests will be set even if the workload currently has no CPU requests defined.
        limits_removal_enabled=True,      # remove CPU limits since CPU cycles can be compressed
    ),
    memory_vertical_scaling=VerticalScalingArgs(
        enabled=True,
        target_percentile=1,        # P100 (max observed to prevent OOMkills)
        min_request=134217728,      # bytes (134217728 bytes = 128 MiB) (hard floor for memory requests)
        max_scale_up_percent=1000,  # max scale-up per step (% of current)
        max_scale_down_percent=1,   # max scale-down per step (% of current)
        overhead_multiplier=0.3,    # extra headroom on top of whatever the request would otherwise be set to
        limits_adjustment_enabled=True,  # adjust limits alongwith requests
        limit_multiplier=1,              # limits = request × limitMultiplier
        min_data_points=20,         # minimum data points for memory utilization before any recommendations are generated
        adjust_req_even_if_not_set=True,  # recommended memory requests will be set even if the workload currently has no memory requests defined.
    ),
    enable_pmax_protection=True,    # protect against spike-induced OOMKills
    pmax_ratio_threshold=3,         # multiplies existing memory requests by 3x on OOM event
    min_change_percent=0.2,         # recommendations are applied if change exceeds 20%
))
package main

import (
    "github.com/devzero-inc/pulumi-provider-devzero/sdk/go/devzero/resources"
    "github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        _, err := resources.NewWorkloadPolicy(ctx, "cost-saving-policy", &resources.WorkloadPolicyArgs{
            Name:                  pulumi.String("cost-saving-policy"),          // unique policy name within team
            Description:           pulumi.StringPtr("Rightsize non-critical workloads"),
            LoopbackPeriodSeconds: pulumi.IntPtr(86400),                         // seconds (86400 seconds = 1 day) ('Lookback Period' on the dashboard)
            CooldownMinutes:       pulumi.IntPtr(300),                           // minutes (300 minutes = 5 hours) (with time between successive scale-down recommendation)
            MinDataPoints:         pulumi.IntPtr(20),                            // minimum data points before any recommendations are generated
            ActionTriggers: pulumi.StringArray{
                pulumi.String("on_detection"),
                pulumi.String("on_schedule"),
            }, // controls how/when recommendations are applied
            CronSchedule: pulumi.StringPtr("*/15 * * * *"),                      // every 15 min
            DetectionTriggers: pulumi.StringArray{
                pulumi.String("pod_creation"),
                pulumi.String("pod_update"),
            }, // only applies for 'on_detection'
            CpuVerticalScaling: resources.VerticalScalingArgsArgs{
                Enabled:               pulumi.BoolPtr(true),
                TargetPercentile:      pulumi.Float64Ptr(0.75),                  // P75 (percentile of observed usage to target)
                MinRequest:            pulumi.IntPtr(25),                        // millicores (hard floor for CPU requests)
                MaxScaleUpPercent:     pulumi.Float64Ptr(1000),                  // max scale-up per step (% of current)
                MaxScaleDownPercent:   pulumi.Float64Ptr(1),                     // max scale-down per step (% of current)
                MinDataPoints:         pulumi.IntPtr(20),                        // minimum data points for CPU utilization before any recommendations are generated
                AdjustReqEvenIfNotSet: pulumi.BoolPtr(true),                    // recommended CPU requests will be set even if the workload currently has no CPU requests defined.
                LimitsRemovalEnabled:  pulumi.BoolPtr(true),                    // remove CPU limits since CPU cycles can be compressed
            }.ToVerticalScalingArgsPtrOutput(),
            MemoryVerticalScaling: resources.VerticalScalingArgsArgs{
                Enabled:                 pulumi.BoolPtr(true),
                TargetPercentile:        pulumi.Float64Ptr(1),                   // P100 (max observed to prevent OOMkills)
                MinRequest:              pulumi.IntPtr(134217728),               // bytes (134217728 bytes = 128 MiB) (hard floor for memory requests)
                MaxScaleUpPercent:       pulumi.Float64Ptr(1000),                // max scale-up per step (% of current)
                MaxScaleDownPercent:     pulumi.Float64Ptr(1),                   // max scale-down per step (% of current)
                OverheadMultiplier:      pulumi.Float64Ptr(0.3),                 // extra headroom on top of whatever the request would otherwise be set to
                LimitsAdjustmentEnabled: pulumi.BoolPtr(true),                  // adjust limits alongwith requests
                LimitMultiplier:         pulumi.Float64Ptr(1),                   // limits = request × limitMultiplier
                MinDataPoints:           pulumi.IntPtr(20),                      // minimum data points for memory utilization before any recommendations are generated
                AdjustReqEvenIfNotSet:   pulumi.BoolPtr(true),                  // recommended memory requests will be set even if the workload currently has no memory requests defined.
            }.ToVerticalScalingArgsPtrOutput(),
            EnablePmaxProtection: pulumi.BoolPtr(true),                         // protect against spike-induced OOMKills
            PmaxRatioThreshold:   pulumi.Float64Ptr(3),                         // multiplies existing memory requests by 3x on OOM event
            MinChangePercent:     pulumi.Float64Ptr(0.2),                       // recommendations are applied if change exceeds 20%
        })
        if err != nil {
            return err
        }

        return nil
    })
}

Arguments

ParameterTypeRequiredDescription
namestringYesUnique name for the policy
descriptionstringNoHuman-readable description
cpuVerticalScalingVerticalScalingArgsNoCPU vertical scaling configuration
memoryVerticalScalingVerticalScalingArgsNoMemory vertical scaling configuration
horizontalScalingHorizontalScalingArgsNoHorizontal scaling configuration
actionTriggersstring[]NoWhen to act: "on_detection", "on_schedule". Both can be used together.
cronSchedulestringNo5-field UTC cron expression for scheduled application. Required when actionTriggers includes "on_schedule". Example: "0 2 * * *"
detectionTriggersstring[]NoWhat triggers detection: "pod_creation", "pod_update", "pod_reschedule"
enablePmaxProtectionboolNoRaise requests to cover peak usage when max/recommendation ratio exceeds pmaxRatioThreshold. Default: false
pmaxRatioThresholdfloatNoMax-to-recommendation ratio that triggers pmax protection. Default: 3.0
loopbackPeriodSecondsintNoPeriod in seconds to look back for usage data. Default: 86400 (24 h)
startupPeriodSecondsintNoSeconds after workload start to exclude from usage data (avoids cold-start spikes). Example: 300
minDataPointsintNoGlobal minimum data points required before a recommendation is emitted. Default: 15
minChangePercentfloatNoGlobal minimum relative change (0–1) required before applying a recommendation. Default: 0.2 (20%)
stabilityCvMaxfloatNoMaximum coefficient of variation (stddev/mean) for a workload to be considered stable enough for VPA. Example: 0.3
hysteresisVsTargetfloatNoDead-band ratio around the HPA target to suppress VPA/HPA oscillation. Example: 0.1
driftDeltaPercentfloatNoPercentage change from baseline recommendation that triggers a VPA refresh. Example: 20.0
minVpaWindowDataPointsintNoMinimum data points in the VPA analysis window. Default: 30
cooldownMinutesintNoMinutes to wait between applying recommendations. Default: 300 (5h)
gpuVerticalScalingVerticalScalingArgsNoGPU core vertical scaling configuration (units: GPU millicores)
gpuVramVerticalScalingVerticalScalingArgsNoGPU VRAM vertical scaling configuration (units: bytes)
liveMigrationEnabledboolNoAllow live pod migration when applying recommendations without restart. Default: false
schedulerPluginsstring[]NoKubernetes scheduler plugins to activate. Example: ["binpacking"]
defragmentationSchedulestringNoCron expression for background node defragmentation. Example: "0 3 * * 0"

Python uses snake_case: cpu_vertical_scaling, memory_vertical_scaling, gpu_vertical_scaling, gpu_vram_vertical_scaling, horizontal_scaling, action_triggers, cron_schedule, detection_triggers, enable_pmax_protection, pmax_ratio_threshold, loopback_period_seconds, startup_period_seconds, min_data_points, min_change_percent, stability_cv_max, hysteresis_vs_target, drift_delta_percent, min_vpa_window_data_points, cooldown_minutes, live_migration_enabled, scheduler_plugins, defragmentation_schedule. Go uses PascalCase equivalents.

VerticalScalingArgs

ParameterTypeRequiredDescription
enabledboolYesWhether vertical scaling is active
targetPercentilefloatNoUsage percentile to target (e.g. 0.95)
minRequestintNoMinimum request value (millicores for CPU, MiB for memory)
maxRequestintNoMaximum request value
maxScaleUpPercentfloatNoMaximum upward scaling percentage per step (e.g. 100 = 100%). Default: 1000
maxScaleDownPercentfloatNoMaximum downward scaling percentage per step (e.g. 25 = 25%). Default: 25
overheadMultiplierfloatNoSafety margin multiplier applied to recommendations
limitsAdjustmentEnabledboolNoWhether to adjust limits alongside requests
limitMultiplierfloatNoLimits are set to request * limitMultiplier
minDataPointsintNoMinimum data points required before a recommendation is emitted. Default: 20
adjustReqEvenIfNotSetboolNoRecommend requests even when the workload has no existing requests set. Default: false
limitsRemovalEnabledboolNoActively remove limits from workloads (CPU axis only — memory limits removal is not supported). Takes precedence over limitsAdjustmentEnabled. Default: false

Python uses snake_case: target_percentile, min_request, max_request, max_scale_up_percent, max_scale_down_percent, overhead_multiplier, limits_adjustment_enabled, limit_multiplier, min_data_points, adjust_req_even_if_not_set, limits_removal_enabled. Go uses PascalCase: TargetPercentile, MinRequest, MaxRequest, etc.

HorizontalScalingArgs

ParameterTypeRequiredDescription
enabledboolYesEnable horizontal (replica) scaling
minReplicasintNoMinimum number of replicas to maintain
maxReplicasintNoMaximum number of replicas to scale to
targetUtilizationfloatNoTarget utilization ratio (0–1) for the primary metric. Example: 0.7
primaryMetricstringNoMetric driving HPA: cpu | memory | gpu | network_ingress | network_egress
minDataPointsintNoMinimum data points before a recommendation is emitted
maxReplicaChangePercentfloatNoMaximum % change in replica count per cycle. Example: 50.0

Python: min_replicas, max_replicas, target_utilization, primary_metric, min_data_points, max_replica_change_percent. Go: MinReplicas, MaxReplicas, TargetUtilization, PrimaryMetric, etc.

WorkloadPolicyTarget

WorkloadPolicyTarget attaches a WorkloadPolicy to one or more clusters. You can optionally filter by workload kind and namespace.

Example

        import { resources } from "@devzero/pulumi-devzero";

        const target = new resources.WorkloadPolicyTarget("production-target", {
            name: "production-target",
            policyId: policy.id,
            clusterIds: [cluster.id],
            kindFilter: ["Deployment", "StatefulSet"],
            // Match namespaces by name pattern — useful when namespaces follow a naming
            // convention but aren't consistently labeled (e.g. team-*, prod-*).
            namespacePattern: {
                pattern: "^prod-",
                flags: "i", // case-insensitive
            },
            enabled: true,
        });
        from pulumi_devzero.resources import WorkloadPolicyTarget, WorkloadPolicyTargetArgs, NamePatternArgs

        target = WorkloadPolicyTarget("production-target", args=WorkloadPolicyTargetArgs(
            name="production-target",
            policy_id=policy.id,
            cluster_ids=[cluster.id],
            kind_filter=["Deployment", "StatefulSet"],
            # Match namespaces by name pattern — useful when namespaces follow a naming
            # convention but aren't consistently labeled (e.g. team-*, prod-*).
            namespace_pattern=NamePatternArgs(pattern="^prod-", flags="i"),  # case-insensitive
            enabled=True,
        ))
        _, err = resources.NewWorkloadPolicyTarget(ctx, "production-target", &resources.WorkloadPolicyTargetArgs{
            Name:       pulumi.String("production-target"),
            PolicyId:   policy.ID(),
            ClusterIds: pulumi.StringArray{cluster.ID()},
            KindFilter: pulumi.StringArray{
                pulumi.String("Deployment"),
                pulumi.String("StatefulSet"),
            },
            // Match namespaces by name pattern — useful when namespaces follow a naming
            // convention but aren't consistently labeled (e.g. team-*, prod-*).
            NamespacePattern: &resources.NamePatternArgsArgs{
                Pattern: pulumi.String("^prod-"),
                Flags:   pulumi.StringPtr("i"), // case-insensitive
            },
            Enabled: pulumi.BoolPtr(true),
        })
        if err != nil {
            return err
        }

Arguments

ParameterTypeRequiredDescription
namestringYesUnique name for the target
policyIdstringYesID of the WorkloadPolicy to attach
clusterIdsstring[]YesList of cluster IDs to apply the policy to
descriptionstringNoHuman-readable description
priorityintNoEvaluation priority; higher value wins when targets overlap
kindFilterstring[]NoWorkload kinds to include (see below)
workloadNamesstring[]NoExplicit list of workload names to include
nodeGroupNamesstring[]NoRestrict matching to specific node groups by name
namePatternNamePatternArgsNoRegex pattern to match workload names
namespacePatternNamePatternArgsNoRegex pattern to match namespace names. Use this when namespaces follow a naming convention but aren't consistently labeled (e.g. ^prod-, ^team-). Combined with other filters using AND logic.
namespaceSelectorLabelSelectorArgsNoSelect namespaces by name using include/exclude expressions (matchExpressions)
workloadSelectorLabelSelectorArgsNoSelect workloads by labels
enabledboolNoWhether the target is active (default: true)

Python uses snake_case: policy_id, cluster_ids, kind_filter, workload_names, node_group_names, name_pattern, namespace_pattern, namespace_selector, workload_selector. Go uses PascalCase: PolicyId, ClusterIds, KindFilter, NamespacePattern, NamespaceSelector, etc.

Supported kind filter values: Pod, Deployment, StatefulSet, DaemonSet, Job, CronJob, ReplicaSet, ReplicationController, Rollout

NamePatternArgs

ParameterTypeRequiredDescription
patternstringYesRegular expression to match against the name
flagsstringNoOptional regex flags. Use "i" for case-insensitive matching. Can also be embedded inline with (?i).

Python: pattern, flags. Go: Pattern, Flags.

On this page