NewCompare CPU & GPU pricing across AWS, Azure & GCP
Pulumi

Workload Rules

Pin explicit resource rules directly to a single Kubernetes workload.

Workload Rules

WorkloadRule pins explicit resource rules directly to a single workload (a specific kind/namespace/name on a cluster). Unlike WorkloadPolicy, which applies a shared policy to many workloads via a WorkloadPolicyTarget, a WorkloadRule targets one workload and lets you override CPU, memory, GPU, and HPA settings with precise values.

Set autoGenerate: true to have the engine automatically compute all rule fields from observed usage. Omit it (or set it to false) to provide your own values via cpuRule, memoryRule, hpaRule, etc.

WorkloadRule

Example

import * as pulumi from "@pulumi/pulumi";
import { resources } from "@devzero/pulumi-devzero";

const rule = new resources.WorkloadRule("my-app-rule", {
    clusterId: "cluster-abc123",
    namespace:  "production",
    kind:       "Deployment",
    name:       "my-api",

    cpuRule: {
        enabled:                 true,    // activate CPU vertical scaling
        minRequest:              10,      // millicores; hard floor for CPU requests
        maxRequest:              32000,   // millicores; hard ceiling for CPU requests
        targetPercentile:        0.95,    // P95 of observed CPU usage to target
        limitsAdjustmentEnabled: true,    // adjust CPU limits alongside requests
        limitMultiplier:         1.0,     // limits = request × 1.0
    },
    memoryRule: {
        enabled:                 true,        // activate memory vertical scaling for this workload
        minRequest:              67108864,    // bytes; hard floor for memory requests (64 MiB)
        maxRequest:              68719476736, // bytes; hard ceiling for memory requests (64 GiB)
        targetPercentile:        0.95,        // P95 of observed memory usage to target
        limitsAdjustmentEnabled: true,        // adjust memory limits alongside requests
    },
    emergencyResponse: {
        oomEnabled:              true,    // react to OOMKills by increasing memory requests
        oomMemoryMultiplier:     1.5,     // multiply memory request by 1.5× on each OOM event
        cpuThrottlingEnabled:    true,    // react to CPU throttling by increasing CPU requests
        cpuThrottlingThreshold:  0.20,   // trigger when throttle ratio exceeds 20%
        cpuThrottlingMultiplier: 1.25,   // multiply CPU request by 1.25× on throttle reaction
    },
    actionTriggers:       ["on_schedule", "on_detection"],
    cronSchedule:         "0 2 * * *",
    detectionTriggers:    ["pod_creation", "pod_update"],
    liveMigrationEnabled: false,
});

export const ruleId = rule.id;

Auto-generate:

const rule = new resources.WorkloadRule("my-app-rule", {
    clusterId:    "cluster-abc123",
    namespace:    "production",
    kind:         "Deployment",
    name:         "my-api",
    autoGenerate: true,
});
import pulumi
from pulumi_devzero.resources import (
    WorkloadRule, WorkloadRuleArgs,
    ResourceRuleConfigArgsArgs,
    EmergencyResponseConfigArgsArgs,
)

rule = WorkloadRule(
    "my-app-rule",
    args=WorkloadRuleArgs(
        cluster_id="cluster-abc123",
        namespace="production",
        kind="Deployment",
        name="my-api",
        cpu_rule=ResourceRuleConfigArgsArgs(
            enabled=True,                    # activate CPU vertical scaling
            min_request=10,                  # millicores; hard floor for CPU requests
            max_request=32000,               # millicores; hard ceiling for CPU requests
            target_percentile=0.95,          # P95 of observed CPU usage to target
            limits_adjustment_enabled=True,  # adjust CPU limits alongside requests
            limit_multiplier=1.0,            # limits = request × 1.0
        ),
        memory_rule=ResourceRuleConfigArgsArgs(
            enabled=True,                    # activate memory vertical scaling for this workload
            min_request=67108864,            # bytes; hard floor for memory requests (64 MiB)
            max_request=68719476736,         # bytes; hard ceiling for memory requests (64 GiB)
            target_percentile=0.95,          # P95 of observed memory usage to target
            limits_adjustment_enabled=True,  # adjust memory limits alongside requests
        ),
        emergency_response=EmergencyResponseConfigArgsArgs(
            oom_enabled=True,                  # react to OOMKills by increasing memory requests
            oom_memory_multiplier=1.5,         # multiply memory request by 1.5× on each OOM event
            cpu_throttling_enabled=True,       # react to CPU throttling by increasing CPU requests
            cpu_throttling_threshold=0.20,     # trigger when throttle ratio exceeds 20%
            cpu_throttling_multiplier=1.25,    # multiply CPU request by 1.25× on throttle reaction
        ),
        action_triggers=["on_schedule", "on_detection"],
        cron_schedule="0 2 * * *",
        detection_triggers=["pod_creation", "pod_update"],
        live_migration_enabled=False,
    ),
)

pulumi.export("rule_id", rule.id)

Auto-generate:

rule = WorkloadRule("my-app-rule", args=WorkloadRuleArgs(
    cluster_id="cluster-abc123", namespace="production",
    kind="Deployment", name="my-api", auto_generate=True,
))
package main

import (
    "github.com/devzero-inc/pulumi-provider-devzero/sdk/go/devzero/resources"
    "github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
            ClusterId: pulumi.String("cluster-abc123"),
            Namespace: pulumi.String("production"),
            Kind:      pulumi.String("Deployment"),
            Name:      pulumi.String("my-api"),

            CpuRule: resources.ResourceRuleConfigArgsArgs{
                Enabled:                 pulumi.BoolPtr(true),          // activate CPU vertical scaling
                MinRequest:              pulumi.IntPtr(10),             // millicores; hard floor for CPU requests
                MaxRequest:              pulumi.IntPtr(32000),          // millicores; hard ceiling for CPU requests
                TargetPercentile:        pulumi.Float64Ptr(0.95),       // P95 of observed CPU usage to target
                LimitsAdjustmentEnabled: pulumi.BoolPtr(true),          // adjust CPU limits alongside requests
                LimitMultiplier:         pulumi.Float64Ptr(1.0),        // limits = request × 1.0
            }.ToResourceRuleConfigArgsPtrOutput(),
            MemoryRule: resources.ResourceRuleConfigArgsArgs{
                Enabled:                 pulumi.BoolPtr(true),          // activate memory vertical scaling for this workload
                MinRequest:              pulumi.IntPtr(67108864),       // bytes; hard floor for memory requests (64 MiB)
                MaxRequest:              pulumi.IntPtr(68719476736),    // bytes; hard ceiling for memory requests (64 GiB)
                TargetPercentile:        pulumi.Float64Ptr(0.95),       // P95 of observed memory usage to target
                LimitsAdjustmentEnabled: pulumi.BoolPtr(true),          // adjust memory limits alongside requests
            }.ToResourceRuleConfigArgsPtrOutput(),
            EmergencyResponse: resources.EmergencyResponseConfigArgsArgs{
                OomEnabled:              pulumi.BoolPtr(true),          // react to OOMKills by increasing memory requests
                OomMemoryMultiplier:     pulumi.Float64Ptr(1.5),        // multiply memory request by 1.5× on each OOM event
                CpuThrottlingEnabled:    pulumi.BoolPtr(true),          // react to CPU throttling by increasing CPU requests
                CpuThrottlingThreshold:  pulumi.Float64Ptr(0.20),       // trigger when throttle ratio exceeds 20%
                CpuThrottlingMultiplier: pulumi.Float64Ptr(1.25),       // multiply CPU request by 1.25× on throttle reaction
            }.ToEmergencyResponseConfigArgsPtrOutput(),
            ActionTriggers:       pulumi.StringArray{pulumi.String("on_schedule"), pulumi.String("on_detection")},
            CronSchedule:         pulumi.StringPtr("0 2 * * *"),
            DetectionTriggers:    pulumi.StringArray{pulumi.String("pod_creation"), pulumi.String("pod_update")},
            LiveMigrationEnabled: pulumi.BoolPtr(false),
        })
        if err != nil {
            return err
        }

        ctx.Export("ruleId", rule.ID())
        return nil
    })
}

Auto-generate:

rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
    ClusterId:    pulumi.String("cluster-abc123"),
    Namespace:    pulumi.String("production"),
    Kind:         pulumi.String("Deployment"),
    Name:         pulumi.String("my-api"),
    AutoGenerate: pulumi.BoolPtr(true),
})

Arguments

ParameterTypeRequiredDescription
clusterIdstringYesID of the cluster the workload lives in
namespacestringYesKubernetes namespace of the workload
kindstringYesWorkload kind: Deployment | StatefulSet | DaemonSet | CronJob | Job
namestringYesName of the Kubernetes workload
autoGenerateboolNoWhen true, the engine fills all rule fields from observed usage; manual fields are ignored
cpuRuleResourceRuleConfigArgsNoCPU vertical scaling rule
memoryRuleResourceRuleConfigArgsNoMemory vertical scaling rule
gpuRuleResourceRuleConfigArgsNoGPU vertical scaling rule (units: GPU millicores)
hpaRuleHPARuleConfigArgsNoHorizontal (replica) scaling rule
emergencyResponseEmergencyResponseConfigArgsNoOOM and CPU-throttle emergency reactions
actionTriggersstring[]NoWhen to apply: on_detection | on_schedule
cronSchedulestringNoCron expression for scheduled application (5-field UTC). Required when actionTriggers includes on_schedule
detectionTriggersstring[]NoEvents that trigger a recommendation: pod_creation | pod_update | pod_reschedule
schedulerPluginsstring[]NoKubernetes scheduler plugins to activate. Example: ["binpacking"]
defragmentationSchedulestringNoCron expression for node defragmentation
liveMigrationEnabledboolNoAllow live pod migration when applying recommendations without restart
useInPlaceVerticalScalingboolNoUse in-place pod vertical scaling instead of pod restarts
containersContainerResourceRuleConfigArgs[]NoPer-container resource overrides. When empty, workload-level rules apply to all containers

Python uses snake_case: cluster_id, auto_generate, cpu_rule, memory_rule, gpu_rule, hpa_rule, emergency_response, action_triggers, cron_schedule, detection_triggers, scheduler_plugins, defragmentation_schedule, live_migration_enabled, use_in_place_vertical_scaling. Go uses PascalCase equivalents.

ResourceRuleConfigArgs

ParameterTypeRequiredDescription
enabledboolYesEnable this resource axis rule
minRequestintNoMinimum resource request (millicores for CPU, bytes for memory/GPU)
maxRequestintNoMaximum resource request
targetPercentilefloatNoPercentile of observed usage to target (0–1). Example: 0.95
maxScaleUpPercentfloatNoMaximum percentage to scale up in one step (workload-level only)
maxScaleDownPercentfloatNoMaximum percentage to scale down in one step (workload-level only)
limitsAdjustmentEnabledboolNoWhether to also adjust resource limits
limitMultiplierfloatNoLimits = request × limitMultiplier
limitsRemovalEnabledboolNoActively remove limits from workloads (CPU only)

Python: min_request, max_request, target_percentile, max_scale_up_percent, max_scale_down_percent, limits_adjustment_enabled, limit_multiplier, limits_removal_enabled. Go: PascalCase equivalents.

HPARuleConfigArgs

ParameterTypeRequiredDescription
enabledboolYesEnable horizontal (replica) scaling
minReplicasintNoMinimum number of replicas
maxReplicasintNoMaximum number of replicas
targetUtilizationfloatNoTarget CPU utilization ratio (0–1). Example: 0.8
targetMemoryUtilizationfloatNoTarget memory utilization ratio (0–1), tuned independently of CPU. Example: 0.65
primaryMetricstringNoPrimary metric driving HPA (used when metrics is empty): cpu | memory | gpu | network_ingress | network_egress. Example: "memory"
maxReplicaChangePercentfloatNoMaximum fraction of current replicas that can change in one scale event (0–1). 0.25 means at most 25% added or removed at once. Example: 0.25
metricsHPAMetricTriggerArgs[]NoExternal metric triggers only (e.g. Prometheus, queue depth). Use when primaryMetric alone is not sufficient. CPU/Memory/Network are auto-generated by the engine and are silently dropped if redeclared here
compositeFormulastringNoExpression combining multiple metric ratios into one scaling signal. Example: "0.6*cpu + 0.4*memory"
behaviorHPABehaviorArgsNoFine-grained scale-up and scale-down behavior policies
fallbackHPAFallbackArgsNoReplica fallback when metrics become unavailable

Python uses snake_case (e.g. target_memory_utilization, composite_formula). Go uses PascalCase equivalents.

HPAMetricTriggerArgs

When to use metrics[]: Only add entries here when you need to scale on external metrics (e.g. a Prometheus query, request queue depth, or custom business metric). CPU, Memory, and Network triggers are auto-generated by the engine from primaryMetric + targetUtilization — redeclaring them in metrics[] has no effect; the engine silently drops them and regenerates its own triggers.

ParameterTypeRequiredDescription
typestringYesMetric source type. Built-in: CPU, Memory, NetworkIngress, NetworkEgress. External: prometheus
targetUtilizationstringNoTarget utilization as a decimal string (resource metrics). Example: "0.70"
targetValuestringNoAbsolute target value as a string (external/object metrics). Example: "50"
weightstringNoWeight for composite formula scaling (decimal string). Example: "0.5"
metadatamap[string]stringNoFree-form key-value pairs passed to the external scaler
serverAddressstringNoPrometheus server URL — packed into metadata by the service layer. Example: "http://prometheus:9090"
querystringNoPromQL query string — packed into metadata by the service layer. Example: "sum(rate(http_requests_total[2m]))"

Python: target_utilization, target_value, server_address. Go uses PascalCase equivalents.

Example — CPU & Memory utilization HPA

const rule = new resources.WorkloadRule("my-app-rule", {
    clusterId: "cluster-abc123",
    namespace: "production",
    kind: "Deployment",
    name: "my-api",
    hpaRule: {
        enabled: true,                                         // activate horizontal (replica) scaling
        minReplicas: 1,
        maxReplicas: 8,
        primaryMetric: "memory",                               // primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
        targetUtilization: 0.8,                                // target 80% utilization for the primary metric
        targetMemoryUtilization: 0.65,                         // target 65% memory utilization, tuned independently
        maxReplicaChangePercent: 0.25,                         // cap scale events at ±25% of current replicas per cycle
        metrics: [
            {
                type: "prometheus",                            // external Prometheus metric
                targetValue: "50",                       // absolute target value (e.g. 50 req/s)
                serverAddress: "http://prometheus:9090",       // Prometheus server URL
                query: "sum(rate(http_requests_total[2m]))",   // PromQL query
            },
        ],
        fallback: {
            replicas: 1,                                       // hold at 1 replica when metrics are unavailable
            behavior: "currentReplicas",                       // use the current live replica count as the fallback value
            failureThreshold: 3,                               // activate fallback after 3 consecutive metric failures
        },
        behavior: {
            scaleDown: {
                selectPolicy: "Min",   // apply the most conservative (smallest) scale-down step
                policies: [
                    { type: "Percent", value: 10 }, // remove at most 10% of replicas per cycle
                ],
            },
            scaleUp: {
                selectPolicy: "Max",   // apply the most aggressive (largest) scale-up step
                policies: [
                    { type: "Percent", value: 100 }, // allow up to 100% more replicas per cycle
                ],
            },
        },
    },
});
from pulumi_devzero.resources import (
    WorkloadRule, WorkloadRuleArgs,
    HPARuleConfigArgsArgs,
    HPAMetricTriggerArgsArgs,
    HPAFallbackArgsArgs,
    HPABehaviorArgsArgs,
    HPAScalingRulesArgsArgs,
    HPAScalingPolicyArgsArgs,
)

rule = WorkloadRule("my-app-rule", args=WorkloadRuleArgs(
    cluster_id="cluster-abc123",
    namespace="production",
    kind="Deployment",
    name="my-api",
    hpa_rule=HPARuleConfigArgsArgs(
        enabled=True,                                          # activate horizontal (replica) scaling
        min_replicas=1,
        max_replicas=8,
        primary_metric="memory",                               # primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
        target_utilization=0.8,                                # target 80% utilization for the primary metric
        target_memory_utilization=0.65,                        # target 65% memory utilization, tuned independently
        max_replica_change_percent=0.25,                       # cap scale events at ±25% of current replicas per cycle
        metrics=[
            HPAMetricTriggerArgsArgs(
                type="prometheus",              # external Prometheus metric
                target_value="50",        # absolute target value (e.g. 50 req/s)
                server_address="http://prometheus:9090",  # Prometheus server URL
                query="sum(rate(http_requests_total[2m]))",  # PromQL query
            ),
        ],
        fallback=HPAFallbackArgsArgs(
            replicas=1,                     # hold at 1 replica when metrics are unavailable
            behavior="currentReplicas",     # use the current live replica count as the fallback value
            failure_threshold=3,            # activate fallback after 3 consecutive metric failures
        ),
        behavior=HPABehaviorArgsArgs(
            scale_down=HPAScalingRulesArgsArgs(
                select_policy="Min",  # apply the most conservative (smallest) scale-down step
                policies=[HPAScalingPolicyArgsArgs(type="Percent", value=10)],  # remove at most 10% of replicas per cycle
            ),
            scale_up=HPAScalingRulesArgsArgs(
                select_policy="Max",  # apply the most aggressive (largest) scale-up step
                policies=[
                    HPAScalingPolicyArgsArgs(type="Percent", value=100),  # allow up to 100% more replicas per cycle
                ],
            ),
        ),
    ),
))
rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
    ClusterId: pulumi.String("cluster-abc123"),
    Namespace: pulumi.String("production"),
    Kind:      pulumi.String("Deployment"),
    Name:      pulumi.String("my-api"),
    HpaRule: resources.HPARuleConfigArgsArgs{
        Enabled:                 pulumi.BoolPtr(true),                      // activate horizontal (replica) scaling
        MinReplicas:             pulumi.IntPtr(1),
        MaxReplicas:             pulumi.IntPtr(8),
        PrimaryMetric:           pulumi.StringPtr("memory"),                  // primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
        TargetUtilization:       pulumi.Float64Ptr(0.8),                    // target 80% utilization for the primary metric
        TargetMemoryUtilization: pulumi.Float64Ptr(0.65),                   // target 65% memory utilization, tuned independently
        MaxReplicaChangePercent: pulumi.Float64Ptr(0.25),                   // cap scale events at ±25% of current replicas per cycle
        Metrics: resources.HPAMetricTriggerArgsArray{
            resources.HPAMetricTriggerArgsArgs{
                Type:          pulumi.String("prometheus"),                        // external Prometheus metric
                TargetValue:   pulumi.StringPtr("50"),                       // absolute target value (e.g. 50 req/s)
                ServerAddress: pulumi.StringPtr("http://prometheus:9090"),         // Prometheus server URL
                Query:         pulumi.StringPtr("sum(rate(http_requests_total[2m]))"), // PromQL query
            },
        },
        Fallback: resources.HPAFallbackArgsArgs{
            Replicas:         pulumi.Int(1),                           // hold at 1 replica when metrics are unavailable
            Behavior:         pulumi.String("currentReplicas"),        // use the current live replica count as the fallback value
            FailureThreshold: pulumi.Int(3),                           // activate fallback after 3 consecutive metric failures
        }.ToHPAFallbackArgsPtrOutput(),
        Behavior: resources.HPABehaviorArgsArgs{
            ScaleDown: resources.HPAScalingRulesArgsArgs{
                SelectPolicy: pulumi.String("Min"), // apply the most conservative (smallest) scale-down step
                Policies: resources.HPAScalingPolicyArgsArray{
                    resources.HPAScalingPolicyArgsArgs{Type: pulumi.String("Percent"), Value: pulumi.Int(10)}, // remove at most 10% of replicas per cycle
                },
            }.ToHPAScalingRulesArgsPtrOutput(),
            ScaleUp: resources.HPAScalingRulesArgsArgs{
                SelectPolicy: pulumi.String("Max"), // apply the most aggressive (largest) scale-up step
                Policies: resources.HPAScalingPolicyArgsArray{
                    resources.HPAScalingPolicyArgsArgs{Type: pulumi.String("Percent"), Value: pulumi.Int(100)}, // allow up to 100% more replicas per cycle
                },
            }.ToHPAScalingRulesArgsPtrOutput(),
        }.ToHPABehaviorArgsPtrOutput(),
    }.ToHPARuleConfigArgsPtrOutput(),
})

HPAFallbackArgs

ParameterTypeRequiredDescription
replicasintYesReplica count to use when metrics are unavailable. Example: 1
behaviorstringYesHow to apply fallback replicas. One of: static, currentReplicas, currentReplicasIfHigher, currentReplicasIfLower. Example: "currentReplicas"
failureThresholdintYesConsecutive metric failures before fallback activates. Example: 3

Python: failure_threshold. Go: PascalCase equivalents.

HPABehaviorArgs

ParameterTypeRequiredDescription
scaleUpHPAScalingRulesArgsNoScale-up rate limiting and stabilization
scaleDownHPAScalingRulesArgsNoScale-down rate limiting and stabilization

Python: scale_up, scale_down. Go: PascalCase equivalents.

HPAScalingRulesArgs

ParameterTypeRequiredDescription
stabilizationWindowSecondsintNoSeconds to look back when selecting replica count to avoid flapping. Default: 0 for scale-up, 300 for scale-down
selectPolicystringNoWhich policy wins when multiple match: Max | Min | Disabled. Example: "Max"
policiesHPAScalingPolicyArgs[]NoList of rate-limiting step policies

Python: stabilization_window_seconds, select_policy. Go: PascalCase equivalents.

HPAScalingPolicyArgs

ParameterTypeRequiredDescription
typestringYesPolicy type: Pods (absolute count) | Percent (% of current replicas). Example: "Percent"
valueintYesMaximum change allowed per period. Example: 100
periodSecondsintYesTime window for this policy in seconds. Example: 60

Python: period_seconds. Go: PascalCase equivalents.

EmergencyResponseConfigArgs

ParameterTypeRequiredDescription
oomEnabledboolNoReact to OOM kills by increasing memory requests
oomMemoryMultiplierfloatNoMultiplier applied to memory on OOM. Example: 1.5
cpuThrottlingEnabledboolNoReact to CPU throttling by increasing CPU requests
cpuThrottlingThresholdfloatNoThrottle ratio (0–1) that triggers a reaction. Example: 0.1
cpuThrottlingMultiplierfloatNoMultiplier applied to CPU request on throttle reaction. Example: 1.25

Python: oom_enabled, oom_memory_multiplier, oom_max_reactions, cpu_throttling_enabled, cpu_throttling_threshold, cpu_throttling_multiplier. Go: PascalCase equivalents.

ContainerResourceRuleConfigArgs

ParameterTypeRequiredDescription
containerNamestringYesName of the container this config applies to
cpuRuleResourceRuleConfigArgsNoCPU rule for this container
memoryRuleResourceRuleConfigArgsNoMemory rule for this container
gpuRuleResourceRuleConfigArgsNoGPU rule for this container

Python: container_name, cpu_rule, memory_rule, gpu_rule. Go: PascalCase equivalents.

On this page