Workload Rules

WorkloadRule pins explicit resource rules directly to a single workload (a specific kind/namespace/name on a cluster). Unlike WorkloadPolicy, which applies a shared policy to many workloads via a WorkloadPolicyTarget, a WorkloadRule targets one workload and lets you override CPU, memory, GPU, and HPA settings with precise values.

Set autoGenerate: true to have the engine automatically compute all rule fields from observed usage. Omit it (or set it to false) to provide your own values via cpuRule, memoryRule, hpaRule, etc.

WorkloadRule

Example

import * as pulumi from "@pulumi/pulumi";
import { resources } from "@devzero/pulumi-devzero";

const rule = new resources.WorkloadRule("my-app-rule", {
    clusterId: "cluster-abc123",
    namespace:  "production",
    kind:       "Deployment",
    name:       "my-api",

    cpuRule: {
        enabled:                 true,    // activate CPU vertical scaling
        minRequest:              10,      // millicores; hard floor for CPU requests
        maxRequest:              32000,   // millicores; hard ceiling for CPU requests
        targetPercentile:        0.95,    // P95 of observed CPU usage to target
        limitsAdjustmentEnabled: true,    // adjust CPU limits alongside requests
        limitMultiplier:         1.0,     // limits = request × 1.0
    },
    memoryRule: {
        enabled:                 true,        // activate memory vertical scaling for this workload
        minRequest:              67108864,    // bytes; hard floor for memory requests (64 MiB)
        maxRequest:              68719476736, // bytes; hard ceiling for memory requests (64 GiB)
        targetPercentile:        0.95,        // P95 of observed memory usage to target
        limitsAdjustmentEnabled: true,        // adjust memory limits alongside requests
    },
    emergencyResponse: {
        oomEnabled:              true,    // react to OOMKills by increasing memory requests
        oomMemoryMultiplier:     1.5,     // multiply memory request by 1.5× on each OOM event
        cpuThrottlingEnabled:    true,    // react to CPU throttling by increasing CPU requests
        cpuThrottlingThreshold:  0.20,   // trigger when throttle ratio exceeds 20%
        cpuThrottlingMultiplier: 1.25,   // multiply CPU request by 1.25× on throttle reaction
    },
    actionTriggers:       ["on_schedule", "on_detection"],
    cronSchedule:         "0 2 * * *",
    detectionTriggers:    ["pod_creation", "pod_update"],
    liveMigrationEnabled: false,
});

export const ruleId = rule.id;

Auto-generate:

const rule = new resources.WorkloadRule("my-app-rule", {
    clusterId:    "cluster-abc123",
    namespace:    "production",
    kind:         "Deployment",
    name:         "my-api",
    autoGenerate: true,
});

import pulumi
from pulumi_devzero.resources import (
    WorkloadRule, WorkloadRuleArgs,
    ResourceRuleConfigArgsArgs,
    EmergencyResponseConfigArgsArgs,
)

rule = WorkloadRule(
    "my-app-rule",
    args=WorkloadRuleArgs(
        cluster_id="cluster-abc123",
        namespace="production",
        kind="Deployment",
        name="my-api",
        cpu_rule=ResourceRuleConfigArgsArgs(
            enabled=True,                    # activate CPU vertical scaling
            min_request=10,                  # millicores; hard floor for CPU requests
            max_request=32000,               # millicores; hard ceiling for CPU requests
            target_percentile=0.95,          # P95 of observed CPU usage to target
            limits_adjustment_enabled=True,  # adjust CPU limits alongside requests
            limit_multiplier=1.0,            # limits = request × 1.0
        ),
        memory_rule=ResourceRuleConfigArgsArgs(
            enabled=True,                    # activate memory vertical scaling for this workload
            min_request=67108864,            # bytes; hard floor for memory requests (64 MiB)
            max_request=68719476736,         # bytes; hard ceiling for memory requests (64 GiB)
            target_percentile=0.95,          # P95 of observed memory usage to target
            limits_adjustment_enabled=True,  # adjust memory limits alongside requests
        ),
        emergency_response=EmergencyResponseConfigArgsArgs(
            oom_enabled=True,                  # react to OOMKills by increasing memory requests
            oom_memory_multiplier=1.5,         # multiply memory request by 1.5× on each OOM event
            cpu_throttling_enabled=True,       # react to CPU throttling by increasing CPU requests
            cpu_throttling_threshold=0.20,     # trigger when throttle ratio exceeds 20%
            cpu_throttling_multiplier=1.25,    # multiply CPU request by 1.25× on throttle reaction
        ),
        action_triggers=["on_schedule", "on_detection"],
        cron_schedule="0 2 * * *",
        detection_triggers=["pod_creation", "pod_update"],
        live_migration_enabled=False,
    ),
)

pulumi.export("rule_id", rule.id)

Auto-generate:

rule = WorkloadRule("my-app-rule", args=WorkloadRuleArgs(
    cluster_id="cluster-abc123", namespace="production",
    kind="Deployment", name="my-api", auto_generate=True,
))

package main

import (
    "github.com/devzero-inc/pulumi-provider-devzero/sdk/go/devzero/resources"
    "github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
    pulumi.Run(func(ctx *pulumi.Context) error {
        rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
            ClusterId: pulumi.String("cluster-abc123"),
            Namespace: pulumi.String("production"),
            Kind:      pulumi.String("Deployment"),
            Name:      pulumi.String("my-api"),

            CpuRule: resources.ResourceRuleConfigArgsArgs{
                Enabled:                 pulumi.BoolPtr(true),          // activate CPU vertical scaling
                MinRequest:              pulumi.IntPtr(10),             // millicores; hard floor for CPU requests
                MaxRequest:              pulumi.IntPtr(32000),          // millicores; hard ceiling for CPU requests
                TargetPercentile:        pulumi.Float64Ptr(0.95),       // P95 of observed CPU usage to target
                LimitsAdjustmentEnabled: pulumi.BoolPtr(true),          // adjust CPU limits alongside requests
                LimitMultiplier:         pulumi.Float64Ptr(1.0),        // limits = request × 1.0
            }.ToResourceRuleConfigArgsPtrOutput(),
            MemoryRule: resources.ResourceRuleConfigArgsArgs{
                Enabled:                 pulumi.BoolPtr(true),          // activate memory vertical scaling for this workload
                MinRequest:              pulumi.IntPtr(67108864),       // bytes; hard floor for memory requests (64 MiB)
                MaxRequest:              pulumi.IntPtr(68719476736),    // bytes; hard ceiling for memory requests (64 GiB)
                TargetPercentile:        pulumi.Float64Ptr(0.95),       // P95 of observed memory usage to target
                LimitsAdjustmentEnabled: pulumi.BoolPtr(true),          // adjust memory limits alongside requests
            }.ToResourceRuleConfigArgsPtrOutput(),
            EmergencyResponse: resources.EmergencyResponseConfigArgsArgs{
                OomEnabled:              pulumi.BoolPtr(true),          // react to OOMKills by increasing memory requests
                OomMemoryMultiplier:     pulumi.Float64Ptr(1.5),        // multiply memory request by 1.5× on each OOM event
                CpuThrottlingEnabled:    pulumi.BoolPtr(true),          // react to CPU throttling by increasing CPU requests
                CpuThrottlingThreshold:  pulumi.Float64Ptr(0.20),       // trigger when throttle ratio exceeds 20%
                CpuThrottlingMultiplier: pulumi.Float64Ptr(1.25),       // multiply CPU request by 1.25× on throttle reaction
            }.ToEmergencyResponseConfigArgsPtrOutput(),
            ActionTriggers:       pulumi.StringArray{pulumi.String("on_schedule"), pulumi.String("on_detection")},
            CronSchedule:         pulumi.StringPtr("0 2 * * *"),
            DetectionTriggers:    pulumi.StringArray{pulumi.String("pod_creation"), pulumi.String("pod_update")},
            LiveMigrationEnabled: pulumi.BoolPtr(false),
        })
        if err != nil {
            return err
        }

        ctx.Export("ruleId", rule.ID())
        return nil
    })
}

Auto-generate:

rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
    ClusterId:    pulumi.String("cluster-abc123"),
    Namespace:    pulumi.String("production"),
    Kind:         pulumi.String("Deployment"),
    Name:         pulumi.String("my-api"),
    AutoGenerate: pulumi.BoolPtr(true),
})

Arguments

Parameter	Type	Required	Description
`clusterId`	string	Yes	ID of the cluster the workload lives in
`namespace`	string	Yes	Kubernetes namespace of the workload
`kind`	string	Yes	Workload kind: `Deployment` \| `StatefulSet` \| `DaemonSet` \| `CronJob` \| `Job`
`name`	string	Yes	Name of the Kubernetes workload
`autoGenerate`	bool	No	When `true`, the engine fills all rule fields from observed usage; manual fields are ignored
`cpuRule`	ResourceRuleConfigArgs	No	CPU vertical scaling rule
`memoryRule`	ResourceRuleConfigArgs	No	Memory vertical scaling rule
`gpuRule`	ResourceRuleConfigArgs	No	GPU vertical scaling rule (units: GPU millicores)
`hpaRule`	HPARuleConfigArgs	No	Horizontal (replica) scaling rule
`emergencyResponse`	EmergencyResponseConfigArgs	No	OOM and CPU-throttle emergency reactions
`actionTriggers`	string[]	No	When to apply: `on_detection` \| `on_schedule`
`cronSchedule`	string	No	Cron expression for scheduled application (5-field UTC). Required when `actionTriggers` includes `on_schedule`
`detectionTriggers`	string[]	No	Events that trigger a recommendation: `pod_creation` \| `pod_update` \| `pod_reschedule`
`schedulerPlugins`	string[]	No	Kubernetes scheduler plugins to activate. Example: `["binpacking"]`
`defragmentationSchedule`	string	No	Cron expression for node defragmentation
`liveMigrationEnabled`	bool	No	Allow live pod migration when applying recommendations without restart
`useInPlaceVerticalScaling`	bool	No	Use in-place pod vertical scaling instead of pod restarts
`containers`	ContainerResourceRuleConfigArgs[]	No	Per-container resource overrides. When empty, workload-level rules apply to all containers

Python uses snake_case: cluster_id, auto_generate, cpu_rule, memory_rule, gpu_rule, hpa_rule, emergency_response, action_triggers, cron_schedule, detection_triggers, scheduler_plugins, defragmentation_schedule, live_migration_enabled, use_in_place_vertical_scaling. Go uses PascalCase equivalents.

ResourceRuleConfigArgs

Parameter	Type	Required	Description
`enabled`	bool	Yes	Enable this resource axis rule
`minRequest`	int	No	Minimum resource request (millicores for CPU, bytes for memory/GPU)
`maxRequest`	int	No	Maximum resource request
`targetPercentile`	float	No	Percentile of observed usage to target (0–1). Example: `0.95`
`maxScaleUpPercent`	float	No	Maximum percentage to scale up in one step (workload-level only)
`maxScaleDownPercent`	float	No	Maximum percentage to scale down in one step (workload-level only)
`limitsAdjustmentEnabled`	bool	No	Whether to also adjust resource limits
`limitMultiplier`	float	No	Limits = request × limitMultiplier
`limitsRemovalEnabled`	bool	No	Actively remove limits from workloads (CPU only)

Python: min_request, max_request, target_percentile, max_scale_up_percent, max_scale_down_percent, limits_adjustment_enabled, limit_multiplier, limits_removal_enabled. Go: PascalCase equivalents.

HPARuleConfigArgs

Parameter	Type	Required	Description
`enabled`	bool	Yes	Enable horizontal (replica) scaling
`minReplicas`	int	No	Minimum number of replicas
`maxReplicas`	int	No	Maximum number of replicas
`targetUtilization`	float	No	Target CPU utilization ratio (0–1). Example: `0.8`
`targetMemoryUtilization`	float	No	Target memory utilization ratio (0–1), tuned independently of CPU. Example: `0.65`
`primaryMetric`	string	No	Primary metric driving HPA (used when `metrics` is empty): `cpu` \| `memory` \| `gpu` \| `network_ingress` \| `network_egress`. Example: `"memory"`
`maxReplicaChangePercent`	float	No	Maximum fraction of current replicas that can change in one scale event (0–1). `0.25` means at most 25% added or removed at once. Example: `0.25`
`metrics`	HPAMetricTriggerArgs[]	No	External metric triggers only (e.g. Prometheus, queue depth). Use when `primaryMetric` alone is not sufficient. CPU/Memory/Network are auto-generated by the engine and are silently dropped if redeclared here
`compositeFormula`	string	No	Expression combining multiple metric ratios into one scaling signal. Example: `"0.6cpu + 0.4memory"`
`behavior`	HPABehaviorArgs	No	Fine-grained scale-up and scale-down behavior policies
`fallback`	HPAFallbackArgs	No	Replica fallback when metrics become unavailable

Python uses snake_case (e.g. target_memory_utilization, composite_formula). Go uses PascalCase equivalents.

HPAMetricTriggerArgs

When to use metrics[]: Only add entries here when you need to scale on external metrics (e.g. a Prometheus query, request queue depth, or custom business metric). CPU, Memory, and Network triggers are auto-generated by the engine from primaryMetric + targetUtilization — redeclaring them in metrics[] has no effect; the engine silently drops them and regenerates its own triggers.

Parameter	Type	Required	Description
`type`	string	Yes	Metric source type. Built-in: `CPU`, `Memory`, `NetworkIngress`, `NetworkEgress`. External: `prometheus`
`targetUtilization`	string	No	Target utilization as a decimal string (resource metrics). Example: `"0.70"`
`targetValue`	string	No	Absolute target value as a string (external/object metrics). Example: `"50"`
`weight`	string	No	Weight for composite formula scaling (decimal string). Example: `"0.5"`
`metadata`	map[string]string	No	Free-form key-value pairs passed to the external scaler
`serverAddress`	string	No	Prometheus server URL — packed into `metadata` by the service layer. Example: `"http://prometheus:9090"`
`query`	string	No	PromQL query string — packed into `metadata` by the service layer. Example: `"sum(rate(http_requests_total[2m]))"`

Python: target_utilization, target_value, server_address. Go uses PascalCase equivalents.

Example — CPU & Memory utilization HPA

const rule = new resources.WorkloadRule("my-app-rule", {
    clusterId: "cluster-abc123",
    namespace: "production",
    kind: "Deployment",
    name: "my-api",
    hpaRule: {
        enabled: true,                                         // activate horizontal (replica) scaling
        minReplicas: 1,
        maxReplicas: 8,
        primaryMetric: "memory",                               // primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
        targetUtilization: 0.8,                                // target 80% utilization for the primary metric
        targetMemoryUtilization: 0.65,                         // target 65% memory utilization, tuned independently
        maxReplicaChangePercent: 0.25,                         // cap scale events at ±25% of current replicas per cycle
        metrics: [
            {
                type: "prometheus",                            // external Prometheus metric
                targetValue: "50",                       // absolute target value (e.g. 50 req/s)
                serverAddress: "http://prometheus:9090",       // Prometheus server URL
                query: "sum(rate(http_requests_total[2m]))",   // PromQL query
            },
        ],
        fallback: {
            replicas: 1,                                       // hold at 1 replica when metrics are unavailable
            behavior: "currentReplicas",                       // use the current live replica count as the fallback value
            failureThreshold: 3,                               // activate fallback after 3 consecutive metric failures
        },
        behavior: {
            scaleDown: {
                selectPolicy: "Min",   // apply the most conservative (smallest) scale-down step
                policies: [
                    { type: "Percent", value: 10 }, // remove at most 10% of replicas per cycle
                ],
            },
            scaleUp: {
                selectPolicy: "Max",   // apply the most aggressive (largest) scale-up step
                policies: [
                    { type: "Percent", value: 100 }, // allow up to 100% more replicas per cycle
                ],
            },
        },
    },
});

from pulumi_devzero.resources import (
    WorkloadRule, WorkloadRuleArgs,
    HPARuleConfigArgsArgs,
    HPAMetricTriggerArgsArgs,
    HPAFallbackArgsArgs,
    HPABehaviorArgsArgs,
    HPAScalingRulesArgsArgs,
    HPAScalingPolicyArgsArgs,
)

rule = WorkloadRule("my-app-rule", args=WorkloadRuleArgs(
    cluster_id="cluster-abc123",
    namespace="production",
    kind="Deployment",
    name="my-api",
    hpa_rule=HPARuleConfigArgsArgs(
        enabled=True,                                          # activate horizontal (replica) scaling
        min_replicas=1,
        max_replicas=8,
        primary_metric="memory",                               # primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
        target_utilization=0.8,                                # target 80% utilization for the primary metric
        target_memory_utilization=0.65,                        # target 65% memory utilization, tuned independently
        max_replica_change_percent=0.25,                       # cap scale events at ±25% of current replicas per cycle
        metrics=[
            HPAMetricTriggerArgsArgs(
                type="prometheus",              # external Prometheus metric
                target_value="50",        # absolute target value (e.g. 50 req/s)
                server_address="http://prometheus:9090",  # Prometheus server URL
                query="sum(rate(http_requests_total[2m]))",  # PromQL query
            ),
        ],
        fallback=HPAFallbackArgsArgs(
            replicas=1,                     # hold at 1 replica when metrics are unavailable
            behavior="currentReplicas",     # use the current live replica count as the fallback value
            failure_threshold=3,            # activate fallback after 3 consecutive metric failures
        ),
        behavior=HPABehaviorArgsArgs(
            scale_down=HPAScalingRulesArgsArgs(
                select_policy="Min",  # apply the most conservative (smallest) scale-down step
                policies=[HPAScalingPolicyArgsArgs(type="Percent", value=10)],  # remove at most 10% of replicas per cycle
            ),
            scale_up=HPAScalingRulesArgsArgs(
                select_policy="Max",  # apply the most aggressive (largest) scale-up step
                policies=[
                    HPAScalingPolicyArgsArgs(type="Percent", value=100),  # allow up to 100% more replicas per cycle
                ],
            ),
        ),
    ),
))

rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
    ClusterId: pulumi.String("cluster-abc123"),
    Namespace: pulumi.String("production"),
    Kind:      pulumi.String("Deployment"),
    Name:      pulumi.String("my-api"),
    HpaRule: resources.HPARuleConfigArgsArgs{
        Enabled:                 pulumi.BoolPtr(true),                      // activate horizontal (replica) scaling
        MinReplicas:             pulumi.IntPtr(1),
        MaxReplicas:             pulumi.IntPtr(8),
        PrimaryMetric:           pulumi.StringPtr("memory"),                  // primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
        TargetUtilization:       pulumi.Float64Ptr(0.8),                    // target 80% utilization for the primary metric
        TargetMemoryUtilization: pulumi.Float64Ptr(0.65),                   // target 65% memory utilization, tuned independently
        MaxReplicaChangePercent: pulumi.Float64Ptr(0.25),                   // cap scale events at ±25% of current replicas per cycle
        Metrics: resources.HPAMetricTriggerArgsArray{
            resources.HPAMetricTriggerArgsArgs{
                Type:          pulumi.String("prometheus"),                        // external Prometheus metric
                TargetValue:   pulumi.StringPtr("50"),                       // absolute target value (e.g. 50 req/s)
                ServerAddress: pulumi.StringPtr("http://prometheus:9090"),         // Prometheus server URL
                Query:         pulumi.StringPtr("sum(rate(http_requests_total[2m]))"), // PromQL query
            },
        },
        Fallback: resources.HPAFallbackArgsArgs{
            Replicas:         pulumi.Int(1),                           // hold at 1 replica when metrics are unavailable
            Behavior:         pulumi.String("currentReplicas"),        // use the current live replica count as the fallback value
            FailureThreshold: pulumi.Int(3),                           // activate fallback after 3 consecutive metric failures
        }.ToHPAFallbackArgsPtrOutput(),
        Behavior: resources.HPABehaviorArgsArgs{
            ScaleDown: resources.HPAScalingRulesArgsArgs{
                SelectPolicy: pulumi.String("Min"), // apply the most conservative (smallest) scale-down step
                Policies: resources.HPAScalingPolicyArgsArray{
                    resources.HPAScalingPolicyArgsArgs{Type: pulumi.String("Percent"), Value: pulumi.Int(10)}, // remove at most 10% of replicas per cycle
                },
            }.ToHPAScalingRulesArgsPtrOutput(),
            ScaleUp: resources.HPAScalingRulesArgsArgs{
                SelectPolicy: pulumi.String("Max"), // apply the most aggressive (largest) scale-up step
                Policies: resources.HPAScalingPolicyArgsArray{
                    resources.HPAScalingPolicyArgsArgs{Type: pulumi.String("Percent"), Value: pulumi.Int(100)}, // allow up to 100% more replicas per cycle
                },
            }.ToHPAScalingRulesArgsPtrOutput(),
        }.ToHPABehaviorArgsPtrOutput(),
    }.ToHPARuleConfigArgsPtrOutput(),
})

HPAFallbackArgs

Parameter	Type	Required	Description
`replicas`	int	Yes	Replica count to use when metrics are unavailable. Example: `1`
`behavior`	string	Yes	How to apply fallback replicas. One of: `static`, `currentReplicas`, `currentReplicasIfHigher`, `currentReplicasIfLower`. Example: `"currentReplicas"`
`failureThreshold`	int	Yes	Consecutive metric failures before fallback activates. Example: `3`

Python: failure_threshold. Go: PascalCase equivalents.

HPABehaviorArgs

Parameter	Type	Required	Description
`scaleUp`	HPAScalingRulesArgs	No	Scale-up rate limiting and stabilization
`scaleDown`	HPAScalingRulesArgs	No	Scale-down rate limiting and stabilization

Python: scale_up, scale_down. Go: PascalCase equivalents.

HPAScalingRulesArgs

Parameter	Type	Required	Description
`stabilizationWindowSeconds`	int	No	Seconds to look back when selecting replica count to avoid flapping. Default: `0` for scale-up, `300` for scale-down
`selectPolicy`	string	No	Which policy wins when multiple match: `Max` \| `Min` \| `Disabled`. Example: `"Max"`
`policies`	HPAScalingPolicyArgs[]	No	List of rate-limiting step policies

Python: stabilization_window_seconds, select_policy. Go: PascalCase equivalents.

HPAScalingPolicyArgs

Parameter	Type	Required	Description
`type`	string	Yes	Policy type: `Pods` (absolute count) \| `Percent` (% of current replicas). Example: `"Percent"`
`value`	int	Yes	Maximum change allowed per period. Example: `100`
`periodSeconds`	int	Yes	Time window for this policy in seconds. Example: `60`

Python: period_seconds. Go: PascalCase equivalents.

EmergencyResponseConfigArgs

Parameter	Type	Required	Description
`oomEnabled`	bool	No	React to OOM kills by increasing memory requests
`oomMemoryMultiplier`	float	No	Multiplier applied to memory on OOM. Example: `1.5`
`cpuThrottlingEnabled`	bool	No	React to CPU throttling by increasing CPU requests
`cpuThrottlingThreshold`	float	No	Throttle ratio (0–1) that triggers a reaction. Example: `0.1`
`cpuThrottlingMultiplier`	float	No	Multiplier applied to CPU request on throttle reaction. Example: `1.25`

Python: oom_enabled, oom_memory_multiplier, oom_max_reactions, cpu_throttling_enabled, cpu_throttling_threshold, cpu_throttling_multiplier. Go: PascalCase equivalents.

ContainerResourceRuleConfigArgs

Parameter	Type	Required	Description
`containerName`	string	Yes	Name of the container this config applies to
`cpuRule`	ResourceRuleConfigArgs	No	CPU rule for this container
`memoryRule`	ResourceRuleConfigArgs	No	Memory rule for this container
`gpuRule`	ResourceRuleConfigArgs	No	GPU rule for this container

Python: container_name, cpu_rule, memory_rule, gpu_rule. Go: PascalCase equivalents.

Workload Rules

On this page