Workload Policies
Configure workload rightsizing policies and attach them to clusters.
Workload Policies
WorkloadPolicy defines how workloads should be rightsized -- CPU and memory vertical scaling, horizontal scaling, and the triggers that activate them. WorkloadPolicyTarget attaches a policy to one or more clusters with optional namespace and workload kind filters.
WorkloadPolicy
Example
import { resources, types } from "@devzero/pulumi-devzero";
const policy = new resources.WorkloadPolicy("cost-saving-policy", {
name: "cost-saving-policy", // unique within team
description: "Rightsize non-critical workloads",
loopbackPeriodSeconds: 86400, // 1 day (shown as "Lookback Period" in the dashboard)
cooldownMinutes: 300, // 5 hours between successive scale-down actions
minDataPoints: 20, // min samples before any recommendation
actionTriggers: ["on_detection", "on_schedule"], // when to apply recommendations
cronSchedule: "*/15 * * * *", // every 15 min; required when "on_schedule" is set
detectionTriggers: ["pod_creation", "pod_update"], // used when "on_detection" is set
cpuVerticalScaling: {
enabled: true,
targetPercentile: 0.75, // P75 of observed usage
minRequest: 25, // millicores; hard floor
maxScaleUpPercent: 1000, // % per step
maxScaleDownPercent: 1, // % per step
minDataPoints: 20, // min CPU samples
adjustReqEvenIfNotSet: true, // set requests even if workload has none
limitsRemovalEnabled: true, // strip CPU limits (cycles compress safely)
},
memoryVerticalScaling: {
enabled: true,
targetPercentile: 1, // P100 — guard against OOMKills
minRequest: 134217728, // 128 MiB in bytes; hard floor
maxScaleUpPercent: 1000, // % per step
maxScaleDownPercent: 1, // % per step
overheadMultiplier: 0.3, // extra headroom over the recommendation
limitsAdjustmentEnabled: true, // adjust limits alongside requests
limitMultiplier: 1, // limits = request × this
minDataPoints: 20, // min memory samples
adjustReqEvenIfNotSet: true, // set requests even if workload has none
},
enablePmaxProtection: true, // guard against spike-induced OOMKills
pmaxRatioThreshold: 3, // raise requests 3× on an OOM event
minChangePercent: 0.2, // apply only if change > 20%
});from pulumi_devzero.resources import WorkloadPolicy, WorkloadPolicyArgs
from pulumi_devzero.resources.types import VerticalScalingArgs
policy = WorkloadPolicy("cost-saving-policy", args=WorkloadPolicyArgs(
name="cost-saving-policy", # unique policy name within team
description="Rightsize non-critical workloads",
loopback_period_seconds=86400, # seconds (86400 seconds = 1 day) ('Lookback Period' on the dashboard)
cooldown_minutes=300, # minutes (300 minutes = 5 hours) (with time between successive scale-down recommendation)
min_data_points=20, # minimum data points before any recommendations are generated
action_triggers=["on_detection", "on_schedule"], # controls how/when recommendations are applied
cron_schedule="*/15 * * * *", # every 15 min
detection_triggers=["pod_creation", "pod_update"], # only applies for 'on_detection'
cpu_vertical_scaling=VerticalScalingArgs(
enabled=True,
target_percentile=0.75, # P75 (percentile of observed usage to target)
min_request=25, # millicores (hard floor for CPU requests)
max_scale_up_percent=1000, # max scale-up per step (% of current)
max_scale_down_percent=1, # max scale-down per step (% of current)
min_data_points=20, # minimum data points for CPU utilization before any recommendations are generated
adjust_req_even_if_not_set=True, # recommended CPU requests will be set even if the workload currently has no CPU requests defined.
limits_removal_enabled=True, # remove CPU limits since CPU cycles can be compressed
),
memory_vertical_scaling=VerticalScalingArgs(
enabled=True,
target_percentile=1, # P100 (max observed to prevent OOMkills)
min_request=134217728, # bytes (134217728 bytes = 128 MiB) (hard floor for memory requests)
max_scale_up_percent=1000, # max scale-up per step (% of current)
max_scale_down_percent=1, # max scale-down per step (% of current)
overhead_multiplier=0.3, # extra headroom on top of whatever the request would otherwise be set to
limits_adjustment_enabled=True, # adjust limits alongwith requests
limit_multiplier=1, # limits = request × limitMultiplier
min_data_points=20, # minimum data points for memory utilization before any recommendations are generated
adjust_req_even_if_not_set=True, # recommended memory requests will be set even if the workload currently has no memory requests defined.
),
enable_pmax_protection=True, # protect against spike-induced OOMKills
pmax_ratio_threshold=3, # multiplies existing memory requests by 3x on OOM event
min_change_percent=0.2, # recommendations are applied if change exceeds 20%
))package main
import (
"github.com/devzero-inc/pulumi-provider-devzero/sdk/go/devzero/resources"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := resources.NewWorkloadPolicy(ctx, "cost-saving-policy", &resources.WorkloadPolicyArgs{
Name: pulumi.String("cost-saving-policy"), // unique policy name within team
Description: pulumi.StringPtr("Rightsize non-critical workloads"),
LoopbackPeriodSeconds: pulumi.IntPtr(86400), // seconds (86400 seconds = 1 day) ('Lookback Period' on the dashboard)
CooldownMinutes: pulumi.IntPtr(300), // minutes (300 minutes = 5 hours) (with time between successive scale-down recommendation)
MinDataPoints: pulumi.IntPtr(20), // minimum data points before any recommendations are generated
ActionTriggers: pulumi.StringArray{
pulumi.String("on_detection"),
pulumi.String("on_schedule"),
}, // controls how/when recommendations are applied
CronSchedule: pulumi.StringPtr("*/15 * * * *"), // every 15 min
DetectionTriggers: pulumi.StringArray{
pulumi.String("pod_creation"),
pulumi.String("pod_update"),
}, // only applies for 'on_detection'
CpuVerticalScaling: resources.VerticalScalingArgsArgs{
Enabled: pulumi.BoolPtr(true),
TargetPercentile: pulumi.Float64Ptr(0.75), // P75 (percentile of observed usage to target)
MinRequest: pulumi.IntPtr(25), // millicores (hard floor for CPU requests)
MaxScaleUpPercent: pulumi.Float64Ptr(1000), // max scale-up per step (% of current)
MaxScaleDownPercent: pulumi.Float64Ptr(1), // max scale-down per step (% of current)
MinDataPoints: pulumi.IntPtr(20), // minimum data points for CPU utilization before any recommendations are generated
AdjustReqEvenIfNotSet: pulumi.BoolPtr(true), // recommended CPU requests will be set even if the workload currently has no CPU requests defined.
LimitsRemovalEnabled: pulumi.BoolPtr(true), // remove CPU limits since CPU cycles can be compressed
}.ToVerticalScalingArgsPtrOutput(),
MemoryVerticalScaling: resources.VerticalScalingArgsArgs{
Enabled: pulumi.BoolPtr(true),
TargetPercentile: pulumi.Float64Ptr(1), // P100 (max observed to prevent OOMkills)
MinRequest: pulumi.IntPtr(134217728), // bytes (134217728 bytes = 128 MiB) (hard floor for memory requests)
MaxScaleUpPercent: pulumi.Float64Ptr(1000), // max scale-up per step (% of current)
MaxScaleDownPercent: pulumi.Float64Ptr(1), // max scale-down per step (% of current)
OverheadMultiplier: pulumi.Float64Ptr(0.3), // extra headroom on top of whatever the request would otherwise be set to
LimitsAdjustmentEnabled: pulumi.BoolPtr(true), // adjust limits alongwith requests
LimitMultiplier: pulumi.Float64Ptr(1), // limits = request × limitMultiplier
MinDataPoints: pulumi.IntPtr(20), // minimum data points for memory utilization before any recommendations are generated
AdjustReqEvenIfNotSet: pulumi.BoolPtr(true), // recommended memory requests will be set even if the workload currently has no memory requests defined.
}.ToVerticalScalingArgsPtrOutput(),
EnablePmaxProtection: pulumi.BoolPtr(true), // protect against spike-induced OOMKills
PmaxRatioThreshold: pulumi.Float64Ptr(3), // multiplies existing memory requests by 3x on OOM event
MinChangePercent: pulumi.Float64Ptr(0.2), // recommendations are applied if change exceeds 20%
})
if err != nil {
return err
}
return nil
})
}Arguments
| Parameter | Type | Required | Description |
|---|---|---|---|
name | string | Yes | Unique name for the policy |
description | string | No | Human-readable description |
cpuVerticalScaling | VerticalScalingArgs | No | CPU vertical scaling configuration |
memoryVerticalScaling | VerticalScalingArgs | No | Memory vertical scaling configuration |
horizontalScaling | HorizontalScalingArgs | No | Horizontal scaling configuration |
actionTriggers | string[] | No | When to act: "on_detection", "on_schedule". Both can be used together. |
cronSchedule | string | No | 5-field UTC cron expression for scheduled application. Required when actionTriggers includes "on_schedule". Example: "0 2 * * *" |
detectionTriggers | string[] | No | What triggers detection: "pod_creation", "pod_update", "pod_reschedule" |
enablePmaxProtection | bool | No | Raise requests to cover peak usage when max/recommendation ratio exceeds pmaxRatioThreshold. Default: false |
pmaxRatioThreshold | float | No | Max-to-recommendation ratio that triggers pmax protection. Default: 3.0 |
loopbackPeriodSeconds | int | No | Period in seconds to look back for usage data. Default: 86400 (24 h) |
startupPeriodSeconds | int | No | Seconds after workload start to exclude from usage data (avoids cold-start spikes). Example: 300 |
minDataPoints | int | No | Global minimum data points required before a recommendation is emitted. Default: 15 |
minChangePercent | float | No | Global minimum relative change (0–1) required before applying a recommendation. Default: 0.2 (20%) |
stabilityCvMax | float | No | Maximum coefficient of variation (stddev/mean) for a workload to be considered stable enough for VPA. Example: 0.3 |
hysteresisVsTarget | float | No | Dead-band ratio around the HPA target to suppress VPA/HPA oscillation. Example: 0.1 |
driftDeltaPercent | float | No | Percentage change from baseline recommendation that triggers a VPA refresh. Example: 20.0 |
minVpaWindowDataPoints | int | No | Minimum data points in the VPA analysis window. Default: 30 |
cooldownMinutes | int | No | Minutes to wait between applying recommendations. Default: 300 (5h) |
gpuVerticalScaling | VerticalScalingArgs | No | GPU core vertical scaling configuration (units: GPU millicores) |
gpuVramVerticalScaling | VerticalScalingArgs | No | GPU VRAM vertical scaling configuration (units: bytes) |
liveMigrationEnabled | bool | No | Allow live pod migration when applying recommendations without restart. Default: false |
schedulerPlugins | string[] | No | Kubernetes scheduler plugins to activate. Example: ["binpacking"] |
defragmentationSchedule | string | No | Cron expression for background node defragmentation. Example: "0 3 * * 0" |
Python uses snake_case: cpu_vertical_scaling, memory_vertical_scaling, gpu_vertical_scaling, gpu_vram_vertical_scaling, horizontal_scaling, action_triggers, cron_schedule, detection_triggers, enable_pmax_protection, pmax_ratio_threshold, loopback_period_seconds, startup_period_seconds, min_data_points, min_change_percent, stability_cv_max, hysteresis_vs_target, drift_delta_percent, min_vpa_window_data_points, cooldown_minutes, live_migration_enabled, scheduler_plugins, defragmentation_schedule. Go uses PascalCase equivalents.
VerticalScalingArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
enabled | bool | Yes | Whether vertical scaling is active |
targetPercentile | float | No | Usage percentile to target (e.g. 0.95) |
minRequest | int | No | Minimum request value (millicores for CPU, MiB for memory) |
maxRequest | int | No | Maximum request value |
maxScaleUpPercent | float | No | Maximum upward scaling percentage per step (e.g. 100 = 100%). Default: 1000 |
maxScaleDownPercent | float | No | Maximum downward scaling percentage per step (e.g. 25 = 25%). Default: 25 |
overheadMultiplier | float | No | Safety margin multiplier applied to recommendations |
limitsAdjustmentEnabled | bool | No | Whether to adjust limits alongside requests |
limitMultiplier | float | No | Limits are set to request * limitMultiplier |
minDataPoints | int | No | Minimum data points required before a recommendation is emitted. Default: 20 |
adjustReqEvenIfNotSet | bool | No | Recommend requests even when the workload has no existing requests set. Default: false |
limitsRemovalEnabled | bool | No | Actively remove limits from workloads (CPU axis only — memory limits removal is not supported). Takes precedence over limitsAdjustmentEnabled. Default: false |
Python uses snake_case: target_percentile, min_request, max_request, max_scale_up_percent, max_scale_down_percent, overhead_multiplier, limits_adjustment_enabled, limit_multiplier, min_data_points, adjust_req_even_if_not_set, limits_removal_enabled. Go uses PascalCase: TargetPercentile, MinRequest, MaxRequest, etc.
HorizontalScalingArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
enabled | bool | Yes | Enable horizontal (replica) scaling |
minReplicas | int | No | Minimum number of replicas to maintain |
maxReplicas | int | No | Maximum number of replicas to scale to |
targetUtilization | float | No | Target utilization ratio (0–1) for the primary metric. Example: 0.7 |
primaryMetric | string | No | Metric driving HPA: cpu | memory | gpu | network_ingress | network_egress |
minDataPoints | int | No | Minimum data points before a recommendation is emitted |
maxReplicaChangePercent | float | No | Maximum % change in replica count per cycle. Example: 50.0 |
Python: min_replicas, max_replicas, target_utilization, primary_metric, min_data_points, max_replica_change_percent. Go: MinReplicas, MaxReplicas, TargetUtilization, PrimaryMetric, etc.
WorkloadPolicyTarget
WorkloadPolicyTarget attaches a WorkloadPolicy to one or more clusters. You can optionally filter by workload kind and namespace.
Example
import { resources } from "@devzero/pulumi-devzero";
const target = new resources.WorkloadPolicyTarget("production-target", {
name: "production-target",
policyId: policy.id,
clusterIds: [cluster.id],
kindFilter: ["Deployment", "StatefulSet"],
// Match namespaces by name pattern — useful when namespaces follow a naming
// convention but aren't consistently labeled (e.g. team-*, prod-*).
namespacePattern: {
pattern: "^prod-",
flags: "i", // case-insensitive
},
enabled: true,
}); from pulumi_devzero.resources import WorkloadPolicyTarget, WorkloadPolicyTargetArgs, NamePatternArgs
target = WorkloadPolicyTarget("production-target", args=WorkloadPolicyTargetArgs(
name="production-target",
policy_id=policy.id,
cluster_ids=[cluster.id],
kind_filter=["Deployment", "StatefulSet"],
# Match namespaces by name pattern — useful when namespaces follow a naming
# convention but aren't consistently labeled (e.g. team-*, prod-*).
namespace_pattern=NamePatternArgs(pattern="^prod-", flags="i"), # case-insensitive
enabled=True,
)) _, err = resources.NewWorkloadPolicyTarget(ctx, "production-target", &resources.WorkloadPolicyTargetArgs{
Name: pulumi.String("production-target"),
PolicyId: policy.ID(),
ClusterIds: pulumi.StringArray{cluster.ID()},
KindFilter: pulumi.StringArray{
pulumi.String("Deployment"),
pulumi.String("StatefulSet"),
},
// Match namespaces by name pattern — useful when namespaces follow a naming
// convention but aren't consistently labeled (e.g. team-*, prod-*).
NamespacePattern: &resources.NamePatternArgsArgs{
Pattern: pulumi.String("^prod-"),
Flags: pulumi.StringPtr("i"), // case-insensitive
},
Enabled: pulumi.BoolPtr(true),
})
if err != nil {
return err
}Arguments
| Parameter | Type | Required | Description |
|---|---|---|---|
name | string | Yes | Unique name for the target |
policyId | string | Yes | ID of the WorkloadPolicy to attach |
clusterIds | string[] | Yes | List of cluster IDs to apply the policy to |
description | string | No | Human-readable description |
priority | int | No | Evaluation priority; higher value wins when targets overlap |
kindFilter | string[] | No | Workload kinds to include (see below) |
workloadNames | string[] | No | Explicit list of workload names to include |
nodeGroupNames | string[] | No | Restrict matching to specific node groups by name |
namePattern | NamePatternArgs | No | Regex pattern to match workload names |
namespacePattern | NamePatternArgs | No | Regex pattern to match namespace names. Use this when namespaces follow a naming convention but aren't consistently labeled (e.g. ^prod-, ^team-). Combined with other filters using AND logic. |
namespaceSelector | LabelSelectorArgs | No | Select namespaces by name using include/exclude expressions (matchExpressions) |
workloadSelector | LabelSelectorArgs | No | Select workloads by labels |
enabled | bool | No | Whether the target is active (default: true) |
Python uses snake_case: policy_id, cluster_ids, kind_filter, workload_names, node_group_names, name_pattern, namespace_pattern, namespace_selector, workload_selector. Go uses PascalCase: PolicyId, ClusterIds, KindFilter, NamespacePattern, NamespaceSelector, etc.
Supported kind filter values: Pod, Deployment, StatefulSet, DaemonSet, Job, CronJob, ReplicaSet, ReplicationController, Rollout
NamePatternArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
pattern | string | Yes | Regular expression to match against the name |
flags | string | No | Optional regex flags. Use "i" for case-insensitive matching. Can also be embedded inline with (?i). |
Python: pattern, flags. Go: Pattern, Flags.