Workload Policies

WorkloadPolicy defines how workloads should be rightsized -- CPU and memory vertical scaling, horizontal scaling, and the triggers that activate them. WorkloadPolicyTarget attaches a policy to one or more clusters with optional namespace and workload kind filters.

WorkloadPolicy

Example

import { resources, types } from "@devzero/pulumi-devzero";

const policy = new resources.WorkloadPolicy("cost-saving-policy", {
    name: "cost-saving-policy",                        // unique within team
    description: "Rightsize non-critical workloads",
    loopbackPeriodSeconds: 86400,                      // 1 day (shown as "Lookback Period" in the dashboard)
    cooldownMinutes: 300,                              // 5 hours between successive scale-down actions
    minDataPoints: 20,                                 // min samples before any recommendation
    actionTriggers: ["on_detection", "on_schedule"],   // when to apply recommendations
    cronSchedule: "*/15 * * * *",                      // every 15 min; required when "on_schedule" is set
    detectionTriggers: ["pod_creation", "pod_update"], // used when "on_detection" is set
    cpuVerticalScaling: {
        enabled: true,
        targetPercentile: 0.75,        // P75 of observed usage
        minRequest: 25,                // millicores; hard floor
        maxScaleUpPercent: 1000,       // % per step
        maxScaleDownPercent: 1,        // % per step
        minDataPoints: 20,             // min CPU samples
        adjustReqEvenIfNotSet: true,   // set requests even if workload has none
        limitsRemovalEnabled: true,    // strip CPU limits (cycles compress safely)
    },
    memoryVerticalScaling: {
        enabled: true,
        targetPercentile: 1,           // P100 — guard against OOMKills
        minRequest: 134217728,         // 128 MiB in bytes; hard floor
        maxScaleUpPercent: 1000,       // % per step
        maxScaleDownPercent: 1,        // % per step
        overheadMultiplier: 0.3,       // extra headroom over the recommendation
        limitsAdjustmentEnabled: true, // adjust limits alongside requests
        limitMultiplier: 1,            // limits = request × this
        minDataPoints: 20,             // min memory samples
        adjustReqEvenIfNotSet: true,   // set requests even if workload has none
    },
    enablePmaxProtection: true,                        // guard against spike-induced OOMKills
    pmaxRatioThreshold: 3,                             // raise requests 3× on an OOM event
    minChangePercent: 0.2,                             // apply only if change > 20%
});

from pulumi_devzero.resources import WorkloadPolicy, WorkloadPolicyArgs
from pulumi_devzero.resources.types import VerticalScalingArgs

policy = WorkloadPolicy("cost-saving-policy", args=WorkloadPolicyArgs(
    name="cost-saving-policy",  # unique policy name within team
    description="Rightsize non-critical workloads",
    loopback_period_seconds=86400,  # seconds (86400 seconds = 1 day) ('Lookback Period' on the dashboard)
    cooldown_minutes=300,           # minutes (300 minutes = 5 hours) (with time between successive scale-down recommendation)
    min_data_points=20,             # minimum data points before any recommendations are generated
    action_triggers=["on_detection", "on_schedule"],  # controls how/when recommendations are applied
    cron_schedule="*/15 * * * *",   # every 15 min
    detection_triggers=["pod_creation", "pod_update"],  # only applies for 'on_detection'
    cpu_vertical_scaling=VerticalScalingArgs(
        enabled=True,
        target_percentile=0.75,     # P75 (percentile of observed usage to target)
        min_request=25,             # millicores (hard floor for CPU requests)
        max_scale_up_percent=1000,  # max scale-up per step (% of current)
        max_scale_down_percent=1,   # max scale-down per step (% of current)
        min_data_points=20,         # minimum data points for CPU utilization before any recommendations are generated
        adjust_req_even_if_not_set=True,  # recommended CPU requests will be set even if the workload currently has no CPU requests defined.
        limits_removal_enabled=True,      # remove CPU limits since CPU cycles can be compressed
    ),
    memory_vertical_scaling=VerticalScalingArgs(
        enabled=True,
        target_percentile=1,        # P100 (max observed to prevent OOMkills)
        min_request=134217728,      # bytes (134217728 bytes = 128 MiB) (hard floor for memory requests)
        max_scale_up_percent=1000,  # max scale-up per step (% of current)
        max_scale_down_percent=1,   # max scale-down per step (% of current)
        overhead_multiplier=0.3,    # extra headroom on top of whatever the request would otherwise be set to
        limits_adjustment_enabled=True,  # adjust limits alongwith requests
        limit_multiplier=1,              # limits = request × limitMultiplier
        min_data_points=20,         # minimum data points for memory utilization before any recommendations are generated
        adjust_req_even_if_not_set=True,  # recommended memory requests will be set even if the workload currently has no memory requests defined.
    ),
    enable_pmax_protection=True,    # protect against spike-induced OOMKills
    pmax_ratio_threshold=3,         # multiplies existing memory requests by 3x on OOM event
    min_change_percent=0.2,         # recommendations are applied if change exceeds 20%
))

package main

import (
    "github.com/devzero-inc/pulumi-provider-devzero/sdk/go/devzero/resources"
    "github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        _, err := resources.NewWorkloadPolicy(ctx, "cost-saving-policy", &resources.WorkloadPolicyArgs{
            Name:                  pulumi.String("cost-saving-policy"),          // unique policy name within team
            Description:           pulumi.StringPtr("Rightsize non-critical workloads"),
            LoopbackPeriodSeconds: pulumi.IntPtr(86400),                         // seconds (86400 seconds = 1 day) ('Lookback Period' on the dashboard)
            CooldownMinutes:       pulumi.IntPtr(300),                           // minutes (300 minutes = 5 hours) (with time between successive scale-down recommendation)
            MinDataPoints:         pulumi.IntPtr(20),                            // minimum data points before any recommendations are generated
            ActionTriggers: pulumi.StringArray{
                pulumi.String("on_detection"),
                pulumi.String("on_schedule"),
            }, // controls how/when recommendations are applied
            CronSchedule: pulumi.StringPtr("*/15 * * * *"),                      // every 15 min
            DetectionTriggers: pulumi.StringArray{
                pulumi.String("pod_creation"),
                pulumi.String("pod_update"),
            }, // only applies for 'on_detection'
            CpuVerticalScaling: resources.VerticalScalingArgsArgs{
                Enabled:               pulumi.BoolPtr(true),
                TargetPercentile:      pulumi.Float64Ptr(0.75),                  // P75 (percentile of observed usage to target)
                MinRequest:            pulumi.IntPtr(25),                        // millicores (hard floor for CPU requests)
                MaxScaleUpPercent:     pulumi.Float64Ptr(1000),                  // max scale-up per step (% of current)
                MaxScaleDownPercent:   pulumi.Float64Ptr(1),                     // max scale-down per step (% of current)
                MinDataPoints:         pulumi.IntPtr(20),                        // minimum data points for CPU utilization before any recommendations are generated
                AdjustReqEvenIfNotSet: pulumi.BoolPtr(true),                    // recommended CPU requests will be set even if the workload currently has no CPU requests defined.
                LimitsRemovalEnabled:  pulumi.BoolPtr(true),                    // remove CPU limits since CPU cycles can be compressed
            }.ToVerticalScalingArgsPtrOutput(),
            MemoryVerticalScaling: resources.VerticalScalingArgsArgs{
                Enabled:                 pulumi.BoolPtr(true),
                TargetPercentile:        pulumi.Float64Ptr(1),                   // P100 (max observed to prevent OOMkills)
                MinRequest:              pulumi.IntPtr(134217728),               // bytes (134217728 bytes = 128 MiB) (hard floor for memory requests)
                MaxScaleUpPercent:       pulumi.Float64Ptr(1000),                // max scale-up per step (% of current)
                MaxScaleDownPercent:     pulumi.Float64Ptr(1),                   // max scale-down per step (% of current)
                OverheadMultiplier:      pulumi.Float64Ptr(0.3),                 // extra headroom on top of whatever the request would otherwise be set to
                LimitsAdjustmentEnabled: pulumi.BoolPtr(true),                  // adjust limits alongwith requests
                LimitMultiplier:         pulumi.Float64Ptr(1),                   // limits = request × limitMultiplier
                MinDataPoints:           pulumi.IntPtr(20),                      // minimum data points for memory utilization before any recommendations are generated
                AdjustReqEvenIfNotSet:   pulumi.BoolPtr(true),                  // recommended memory requests will be set even if the workload currently has no memory requests defined.
            }.ToVerticalScalingArgsPtrOutput(),
            EnablePmaxProtection: pulumi.BoolPtr(true),                         // protect against spike-induced OOMKills
            PmaxRatioThreshold:   pulumi.Float64Ptr(3),                         // multiplies existing memory requests by 3x on OOM event
            MinChangePercent:     pulumi.Float64Ptr(0.2),                       // recommendations are applied if change exceeds 20%
        })
        if err != nil {
            return err
        }

        return nil
    })
}

Arguments

Parameter	Type	Required	Description
`name`	string	Yes	Unique name for the policy
`description`	string	No	Human-readable description
`cpuVerticalScaling`	VerticalScalingArgs	No	CPU vertical scaling configuration
`memoryVerticalScaling`	VerticalScalingArgs	No	Memory vertical scaling configuration
`horizontalScaling`	HorizontalScalingArgs	No	Horizontal scaling configuration
`actionTriggers`	string[]	No	When to act: `"on_detection"`, `"on_schedule"`. Both can be used together.
`cronSchedule`	string	No	5-field UTC cron expression for scheduled application. Required when `actionTriggers` includes `"on_schedule"`. Example: `"0 2 * * *"`
`detectionTriggers`	string[]	No	What triggers detection: `"pod_creation"`, `"pod_update"`, `"pod_reschedule"`
`enablePmaxProtection`	bool	No	Raise requests to cover peak usage when max/recommendation ratio exceeds `pmaxRatioThreshold`. Default: `false`
`pmaxRatioThreshold`	float	No	Max-to-recommendation ratio that triggers pmax protection. Default: `3.0`
`loopbackPeriodSeconds`	int	No	Period in seconds to look back for usage data. Default: `86400` (24 h)
`startupPeriodSeconds`	int	No	Seconds after workload start to exclude from usage data (avoids cold-start spikes). Example: `300`
`minDataPoints`	int	No	Global minimum data points required before a recommendation is emitted. Default: `15`
`minChangePercent`	float	No	Global minimum relative change (0–1) required before applying a recommendation. Default: `0.2` (20%)
`stabilityCvMax`	float	No	Maximum coefficient of variation (stddev/mean) for a workload to be considered stable enough for VPA. Example: `0.3`
`hysteresisVsTarget`	float	No	Dead-band ratio around the HPA target to suppress VPA/HPA oscillation. Example: `0.1`
`driftDeltaPercent`	float	No	Percentage change from baseline recommendation that triggers a VPA refresh. Example: `20.0`
`minVpaWindowDataPoints`	int	No	Minimum data points in the VPA analysis window. Default: `30`
`cooldownMinutes`	int	No	Minutes to wait between applying recommendations. Default: `300` (5h)
`gpuVerticalScaling`	VerticalScalingArgs	No	GPU core vertical scaling configuration (units: GPU millicores)
`gpuVramVerticalScaling`	VerticalScalingArgs	No	GPU VRAM vertical scaling configuration (units: bytes)
`liveMigrationEnabled`	bool	No	Allow live pod migration when applying recommendations without restart. Default: `false`
`schedulerPlugins`	string[]	No	Kubernetes scheduler plugins to activate. Example: `["binpacking"]`
`defragmentationSchedule`	string	No	Cron expression for background node defragmentation. Example: `"0 3 * * 0"`

Python uses snake_case: cpu_vertical_scaling, memory_vertical_scaling, gpu_vertical_scaling, gpu_vram_vertical_scaling, horizontal_scaling, action_triggers, cron_schedule, detection_triggers, enable_pmax_protection, pmax_ratio_threshold, loopback_period_seconds, startup_period_seconds, min_data_points, min_change_percent, stability_cv_max, hysteresis_vs_target, drift_delta_percent, min_vpa_window_data_points, cooldown_minutes, live_migration_enabled, scheduler_plugins, defragmentation_schedule. Go uses PascalCase equivalents.

VerticalScalingArgs

Parameter	Type	Required	Description
`enabled`	bool	Yes	Whether vertical scaling is active
`targetPercentile`	float	No	Usage percentile to target (e.g. `0.95`)
`minRequest`	int	No	Minimum request value (millicores for CPU, MiB for memory)
`maxRequest`	int	No	Maximum request value
`maxScaleUpPercent`	float	No	Maximum upward scaling percentage per step (e.g. `100` = 100%). Default: `1000`
`maxScaleDownPercent`	float	No	Maximum downward scaling percentage per step (e.g. `25` = 25%). Default: `25`
`overheadMultiplier`	float	No	Safety margin multiplier applied to recommendations
`limitsAdjustmentEnabled`	bool	No	Whether to adjust limits alongside requests
`limitMultiplier`	float	No	Limits are set to `request * limitMultiplier`
`minDataPoints`	int	No	Minimum data points required before a recommendation is emitted. Default: `20`
`adjustReqEvenIfNotSet`	bool	No	Recommend requests even when the workload has no existing requests set. Default: `false`
`limitsRemovalEnabled`	bool	No	Actively remove limits from workloads (CPU axis only — memory limits removal is not supported). Takes precedence over `limitsAdjustmentEnabled`. Default: `false`

Python uses snake_case: target_percentile, min_request, max_request, max_scale_up_percent, max_scale_down_percent, overhead_multiplier, limits_adjustment_enabled, limit_multiplier, min_data_points, adjust_req_even_if_not_set, limits_removal_enabled. Go uses PascalCase: TargetPercentile, MinRequest, MaxRequest, etc.

HorizontalScalingArgs

Parameter	Type	Required	Description
`enabled`	bool	Yes	Enable horizontal (replica) scaling
`minReplicas`	int	No	Minimum number of replicas to maintain
`maxReplicas`	int	No	Maximum number of replicas to scale to
`targetUtilization`	float	No	Target utilization ratio (0–1) for the primary metric. Example: `0.7`
`primaryMetric`	string	No	Metric driving HPA: `cpu` \| `memory` \| `gpu` \| `network_ingress` \| `network_egress`
`minDataPoints`	int	No	Minimum data points before a recommendation is emitted
`maxReplicaChangePercent`	float	No	Maximum % change in replica count per cycle. Example: `50.0`

Python: min_replicas, max_replicas, target_utilization, primary_metric, min_data_points, max_replica_change_percent. Go: MinReplicas, MaxReplicas, TargetUtilization, PrimaryMetric, etc.

WorkloadPolicyTarget

WorkloadPolicyTarget attaches a WorkloadPolicy to one or more clusters. You can optionally filter by workload kind and namespace.

Example

        import { resources } from "@devzero/pulumi-devzero";

        const target = new resources.WorkloadPolicyTarget("production-target", {
            name: "production-target",
            policyId: policy.id,
            clusterIds: [cluster.id],
            kindFilter: ["Deployment", "StatefulSet"],
            // Match namespaces by name pattern — useful when namespaces follow a naming
            // convention but aren't consistently labeled (e.g. team-*, prod-*).
            namespacePattern: {
                pattern: "^prod-",
                flags: "i", // case-insensitive
            },
            enabled: true,
        });

        from pulumi_devzero.resources import WorkloadPolicyTarget, WorkloadPolicyTargetArgs, NamePatternArgs

        target = WorkloadPolicyTarget("production-target", args=WorkloadPolicyTargetArgs(
            name="production-target",
            policy_id=policy.id,
            cluster_ids=[cluster.id],
            kind_filter=["Deployment", "StatefulSet"],
            # Match namespaces by name pattern — useful when namespaces follow a naming
            # convention but aren't consistently labeled (e.g. team-*, prod-*).
            namespace_pattern=NamePatternArgs(pattern="^prod-", flags="i"),  # case-insensitive
            enabled=True,
        ))

        _, err = resources.NewWorkloadPolicyTarget(ctx, "production-target", &resources.WorkloadPolicyTargetArgs{
            Name:       pulumi.String("production-target"),
            PolicyId:   policy.ID(),
            ClusterIds: pulumi.StringArray{cluster.ID()},
            KindFilter: pulumi.StringArray{
                pulumi.String("Deployment"),
                pulumi.String("StatefulSet"),
            },
            // Match namespaces by name pattern — useful when namespaces follow a naming
            // convention but aren't consistently labeled (e.g. team-*, prod-*).
            NamespacePattern: &resources.NamePatternArgsArgs{
                Pattern: pulumi.String("^prod-"),
                Flags:   pulumi.StringPtr("i"), // case-insensitive
            },
            Enabled: pulumi.BoolPtr(true),
        })
        if err != nil {
            return err
        }

Arguments

Parameter	Type	Required	Description
`name`	string	Yes	Unique name for the target
`policyId`	string	Yes	ID of the WorkloadPolicy to attach
`clusterIds`	string[]	Yes	List of cluster IDs to apply the policy to
`description`	string	No	Human-readable description
`priority`	int	No	Evaluation priority; higher value wins when targets overlap
`kindFilter`	string[]	No	Workload kinds to include (see below)
`workloadNames`	string[]	No	Explicit list of workload names to include
`nodeGroupNames`	string[]	No	Restrict matching to specific node groups by name
`namePattern`	NamePatternArgs	No	Regex pattern to match workload names
`namespacePattern`	NamePatternArgs	No	Regex pattern to match namespace names. Use this when namespaces follow a naming convention but aren't consistently labeled (e.g. `^prod-`, `^team-`). Combined with other filters using AND logic.
`namespaceSelector`	LabelSelectorArgs	No	Select namespaces by name using include/exclude expressions (`matchExpressions`)
`workloadSelector`	LabelSelectorArgs	No	Select workloads by labels
`enabled`	bool	No	Whether the target is active (default: `true`)

Python uses snake_case: policy_id, cluster_ids, kind_filter, workload_names, node_group_names, name_pattern, namespace_pattern, namespace_selector, workload_selector. Go uses PascalCase: PolicyId, ClusterIds, KindFilter, NamespacePattern, NamespaceSelector, etc.

Supported kind filter values: Pod, Deployment, StatefulSet, DaemonSet, Job, CronJob, ReplicaSet, ReplicationController, Rollout

NamePatternArgs

Parameter	Type	Required	Description
`pattern`	string	Yes	Regular expression to match against the name
`flags`	string	No	Optional regex flags. Use `"i"` for case-insensitive matching. Can also be embedded inline with `(?i)`.

Python: pattern, flags. Go: Pattern, Flags.

Workload Policies

On this page