Workload Rules
Pin explicit resource rules directly to a single Kubernetes workload.
Workload Rules
WorkloadRule pins explicit resource rules directly to a single workload (a specific kind/namespace/name on a cluster). Unlike WorkloadPolicy, which applies a shared policy to many workloads via a WorkloadPolicyTarget, a WorkloadRule targets one workload and lets you override CPU, memory, GPU, and HPA settings with precise values.
Set autoGenerate: true to have the engine automatically compute all rule fields from observed usage. Omit it (or set it to false) to provide your own values via cpuRule, memoryRule, hpaRule, etc.
WorkloadRule
Example
import * as pulumi from "@pulumi/pulumi";
import { resources } from "@devzero/pulumi-devzero";
const rule = new resources.WorkloadRule("my-app-rule", {
clusterId: "cluster-abc123",
namespace: "production",
kind: "Deployment",
name: "my-api",
cpuRule: {
enabled: true, // activate CPU vertical scaling
minRequest: 10, // millicores; hard floor for CPU requests
maxRequest: 32000, // millicores; hard ceiling for CPU requests
targetPercentile: 0.95, // P95 of observed CPU usage to target
limitsAdjustmentEnabled: true, // adjust CPU limits alongside requests
limitMultiplier: 1.0, // limits = request × 1.0
},
memoryRule: {
enabled: true, // activate memory vertical scaling for this workload
minRequest: 67108864, // bytes; hard floor for memory requests (64 MiB)
maxRequest: 68719476736, // bytes; hard ceiling for memory requests (64 GiB)
targetPercentile: 0.95, // P95 of observed memory usage to target
limitsAdjustmentEnabled: true, // adjust memory limits alongside requests
},
emergencyResponse: {
oomEnabled: true, // react to OOMKills by increasing memory requests
oomMemoryMultiplier: 1.5, // multiply memory request by 1.5× on each OOM event
cpuThrottlingEnabled: true, // react to CPU throttling by increasing CPU requests
cpuThrottlingThreshold: 0.20, // trigger when throttle ratio exceeds 20%
cpuThrottlingMultiplier: 1.25, // multiply CPU request by 1.25× on throttle reaction
},
actionTriggers: ["on_schedule", "on_detection"],
cronSchedule: "0 2 * * *",
detectionTriggers: ["pod_creation", "pod_update"],
liveMigrationEnabled: false,
});
export const ruleId = rule.id;Auto-generate:
const rule = new resources.WorkloadRule("my-app-rule", { clusterId: "cluster-abc123", namespace: "production", kind: "Deployment", name: "my-api", autoGenerate: true, });
import pulumi
from pulumi_devzero.resources import (
WorkloadRule, WorkloadRuleArgs,
ResourceRuleConfigArgsArgs,
EmergencyResponseConfigArgsArgs,
)
rule = WorkloadRule(
"my-app-rule",
args=WorkloadRuleArgs(
cluster_id="cluster-abc123",
namespace="production",
kind="Deployment",
name="my-api",
cpu_rule=ResourceRuleConfigArgsArgs(
enabled=True, # activate CPU vertical scaling
min_request=10, # millicores; hard floor for CPU requests
max_request=32000, # millicores; hard ceiling for CPU requests
target_percentile=0.95, # P95 of observed CPU usage to target
limits_adjustment_enabled=True, # adjust CPU limits alongside requests
limit_multiplier=1.0, # limits = request × 1.0
),
memory_rule=ResourceRuleConfigArgsArgs(
enabled=True, # activate memory vertical scaling for this workload
min_request=67108864, # bytes; hard floor for memory requests (64 MiB)
max_request=68719476736, # bytes; hard ceiling for memory requests (64 GiB)
target_percentile=0.95, # P95 of observed memory usage to target
limits_adjustment_enabled=True, # adjust memory limits alongside requests
),
emergency_response=EmergencyResponseConfigArgsArgs(
oom_enabled=True, # react to OOMKills by increasing memory requests
oom_memory_multiplier=1.5, # multiply memory request by 1.5× on each OOM event
cpu_throttling_enabled=True, # react to CPU throttling by increasing CPU requests
cpu_throttling_threshold=0.20, # trigger when throttle ratio exceeds 20%
cpu_throttling_multiplier=1.25, # multiply CPU request by 1.25× on throttle reaction
),
action_triggers=["on_schedule", "on_detection"],
cron_schedule="0 2 * * *",
detection_triggers=["pod_creation", "pod_update"],
live_migration_enabled=False,
),
)
pulumi.export("rule_id", rule.id)Auto-generate:
rule = WorkloadRule("my-app-rule", args=WorkloadRuleArgs( cluster_id="cluster-abc123", namespace="production", kind="Deployment", name="my-api", auto_generate=True, ))
package main
import (
"github.com/devzero-inc/pulumi-provider-devzero/sdk/go/devzero/resources"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
ClusterId: pulumi.String("cluster-abc123"),
Namespace: pulumi.String("production"),
Kind: pulumi.String("Deployment"),
Name: pulumi.String("my-api"),
CpuRule: resources.ResourceRuleConfigArgsArgs{
Enabled: pulumi.BoolPtr(true), // activate CPU vertical scaling
MinRequest: pulumi.IntPtr(10), // millicores; hard floor for CPU requests
MaxRequest: pulumi.IntPtr(32000), // millicores; hard ceiling for CPU requests
TargetPercentile: pulumi.Float64Ptr(0.95), // P95 of observed CPU usage to target
LimitsAdjustmentEnabled: pulumi.BoolPtr(true), // adjust CPU limits alongside requests
LimitMultiplier: pulumi.Float64Ptr(1.0), // limits = request × 1.0
}.ToResourceRuleConfigArgsPtrOutput(),
MemoryRule: resources.ResourceRuleConfigArgsArgs{
Enabled: pulumi.BoolPtr(true), // activate memory vertical scaling for this workload
MinRequest: pulumi.IntPtr(67108864), // bytes; hard floor for memory requests (64 MiB)
MaxRequest: pulumi.IntPtr(68719476736), // bytes; hard ceiling for memory requests (64 GiB)
TargetPercentile: pulumi.Float64Ptr(0.95), // P95 of observed memory usage to target
LimitsAdjustmentEnabled: pulumi.BoolPtr(true), // adjust memory limits alongside requests
}.ToResourceRuleConfigArgsPtrOutput(),
EmergencyResponse: resources.EmergencyResponseConfigArgsArgs{
OomEnabled: pulumi.BoolPtr(true), // react to OOMKills by increasing memory requests
OomMemoryMultiplier: pulumi.Float64Ptr(1.5), // multiply memory request by 1.5× on each OOM event
CpuThrottlingEnabled: pulumi.BoolPtr(true), // react to CPU throttling by increasing CPU requests
CpuThrottlingThreshold: pulumi.Float64Ptr(0.20), // trigger when throttle ratio exceeds 20%
CpuThrottlingMultiplier: pulumi.Float64Ptr(1.25), // multiply CPU request by 1.25× on throttle reaction
}.ToEmergencyResponseConfigArgsPtrOutput(),
ActionTriggers: pulumi.StringArray{pulumi.String("on_schedule"), pulumi.String("on_detection")},
CronSchedule: pulumi.StringPtr("0 2 * * *"),
DetectionTriggers: pulumi.StringArray{pulumi.String("pod_creation"), pulumi.String("pod_update")},
LiveMigrationEnabled: pulumi.BoolPtr(false),
})
if err != nil {
return err
}
ctx.Export("ruleId", rule.ID())
return nil
})
}Auto-generate:
rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{ ClusterId: pulumi.String("cluster-abc123"), Namespace: pulumi.String("production"), Kind: pulumi.String("Deployment"), Name: pulumi.String("my-api"), AutoGenerate: pulumi.BoolPtr(true), })
Arguments
| Parameter | Type | Required | Description |
|---|---|---|---|
clusterId | string | Yes | ID of the cluster the workload lives in |
namespace | string | Yes | Kubernetes namespace of the workload |
kind | string | Yes | Workload kind: Deployment | StatefulSet | DaemonSet | CronJob | Job |
name | string | Yes | Name of the Kubernetes workload |
autoGenerate | bool | No | When true, the engine fills all rule fields from observed usage; manual fields are ignored |
cpuRule | ResourceRuleConfigArgs | No | CPU vertical scaling rule |
memoryRule | ResourceRuleConfigArgs | No | Memory vertical scaling rule |
gpuRule | ResourceRuleConfigArgs | No | GPU vertical scaling rule (units: GPU millicores) |
hpaRule | HPARuleConfigArgs | No | Horizontal (replica) scaling rule |
emergencyResponse | EmergencyResponseConfigArgs | No | OOM and CPU-throttle emergency reactions |
actionTriggers | string[] | No | When to apply: on_detection | on_schedule |
cronSchedule | string | No | Cron expression for scheduled application (5-field UTC). Required when actionTriggers includes on_schedule |
detectionTriggers | string[] | No | Events that trigger a recommendation: pod_creation | pod_update | pod_reschedule |
schedulerPlugins | string[] | No | Kubernetes scheduler plugins to activate. Example: ["binpacking"] |
defragmentationSchedule | string | No | Cron expression for node defragmentation |
liveMigrationEnabled | bool | No | Allow live pod migration when applying recommendations without restart |
useInPlaceVerticalScaling | bool | No | Use in-place pod vertical scaling instead of pod restarts |
containers | ContainerResourceRuleConfigArgs[] | No | Per-container resource overrides. When empty, workload-level rules apply to all containers |
Python uses snake_case: cluster_id, auto_generate, cpu_rule, memory_rule, gpu_rule, hpa_rule, emergency_response, action_triggers, cron_schedule, detection_triggers, scheduler_plugins, defragmentation_schedule, live_migration_enabled, use_in_place_vertical_scaling. Go uses PascalCase equivalents.
ResourceRuleConfigArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
enabled | bool | Yes | Enable this resource axis rule |
minRequest | int | No | Minimum resource request (millicores for CPU, bytes for memory/GPU) |
maxRequest | int | No | Maximum resource request |
targetPercentile | float | No | Percentile of observed usage to target (0–1). Example: 0.95 |
maxScaleUpPercent | float | No | Maximum percentage to scale up in one step (workload-level only) |
maxScaleDownPercent | float | No | Maximum percentage to scale down in one step (workload-level only) |
limitsAdjustmentEnabled | bool | No | Whether to also adjust resource limits |
limitMultiplier | float | No | Limits = request × limitMultiplier |
limitsRemovalEnabled | bool | No | Actively remove limits from workloads (CPU only) |
Python: min_request, max_request, target_percentile, max_scale_up_percent, max_scale_down_percent, limits_adjustment_enabled, limit_multiplier, limits_removal_enabled. Go: PascalCase equivalents.
HPARuleConfigArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
enabled | bool | Yes | Enable horizontal (replica) scaling |
minReplicas | int | No | Minimum number of replicas |
maxReplicas | int | No | Maximum number of replicas |
targetUtilization | float | No | Target CPU utilization ratio (0–1). Example: 0.8 |
targetMemoryUtilization | float | No | Target memory utilization ratio (0–1), tuned independently of CPU. Example: 0.65 |
primaryMetric | string | No | Primary metric driving HPA (used when metrics is empty): cpu | memory | gpu | network_ingress | network_egress. Example: "memory" |
maxReplicaChangePercent | float | No | Maximum fraction of current replicas that can change in one scale event (0–1). 0.25 means at most 25% added or removed at once. Example: 0.25 |
metrics | HPAMetricTriggerArgs[] | No | External metric triggers only (e.g. Prometheus, queue depth). Use when primaryMetric alone is not sufficient. CPU/Memory/Network are auto-generated by the engine and are silently dropped if redeclared here |
compositeFormula | string | No | Expression combining multiple metric ratios into one scaling signal. Example: "0.6*cpu + 0.4*memory" |
behavior | HPABehaviorArgs | No | Fine-grained scale-up and scale-down behavior policies |
fallback | HPAFallbackArgs | No | Replica fallback when metrics become unavailable |
Python uses snake_case (e.g. target_memory_utilization, composite_formula). Go uses PascalCase equivalents.
HPAMetricTriggerArgs
When to use
metrics[]: Only add entries here when you need to scale on external metrics (e.g. a Prometheus query, request queue depth, or custom business metric). CPU, Memory, and Network triggers are auto-generated by the engine fromprimaryMetric+targetUtilization— redeclaring them inmetrics[]has no effect; the engine silently drops them and regenerates its own triggers.
| Parameter | Type | Required | Description |
|---|---|---|---|
type | string | Yes | Metric source type. Built-in: CPU, Memory, NetworkIngress, NetworkEgress. External: prometheus |
targetUtilization | string | No | Target utilization as a decimal string (resource metrics). Example: "0.70" |
targetValue | string | No | Absolute target value as a string (external/object metrics). Example: "50" |
weight | string | No | Weight for composite formula scaling (decimal string). Example: "0.5" |
metadata | map[string]string | No | Free-form key-value pairs passed to the external scaler |
serverAddress | string | No | Prometheus server URL — packed into metadata by the service layer. Example: "http://prometheus:9090" |
query | string | No | PromQL query string — packed into metadata by the service layer. Example: "sum(rate(http_requests_total[2m]))" |
Python: target_utilization, target_value, server_address. Go uses PascalCase equivalents.
Example — CPU & Memory utilization HPA
const rule = new resources.WorkloadRule("my-app-rule", {
clusterId: "cluster-abc123",
namespace: "production",
kind: "Deployment",
name: "my-api",
hpaRule: {
enabled: true, // activate horizontal (replica) scaling
minReplicas: 1,
maxReplicas: 8,
primaryMetric: "memory", // primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
targetUtilization: 0.8, // target 80% utilization for the primary metric
targetMemoryUtilization: 0.65, // target 65% memory utilization, tuned independently
maxReplicaChangePercent: 0.25, // cap scale events at ±25% of current replicas per cycle
metrics: [
{
type: "prometheus", // external Prometheus metric
targetValue: "50", // absolute target value (e.g. 50 req/s)
serverAddress: "http://prometheus:9090", // Prometheus server URL
query: "sum(rate(http_requests_total[2m]))", // PromQL query
},
],
fallback: {
replicas: 1, // hold at 1 replica when metrics are unavailable
behavior: "currentReplicas", // use the current live replica count as the fallback value
failureThreshold: 3, // activate fallback after 3 consecutive metric failures
},
behavior: {
scaleDown: {
selectPolicy: "Min", // apply the most conservative (smallest) scale-down step
policies: [
{ type: "Percent", value: 10 }, // remove at most 10% of replicas per cycle
],
},
scaleUp: {
selectPolicy: "Max", // apply the most aggressive (largest) scale-up step
policies: [
{ type: "Percent", value: 100 }, // allow up to 100% more replicas per cycle
],
},
},
},
});from pulumi_devzero.resources import (
WorkloadRule, WorkloadRuleArgs,
HPARuleConfigArgsArgs,
HPAMetricTriggerArgsArgs,
HPAFallbackArgsArgs,
HPABehaviorArgsArgs,
HPAScalingRulesArgsArgs,
HPAScalingPolicyArgsArgs,
)
rule = WorkloadRule("my-app-rule", args=WorkloadRuleArgs(
cluster_id="cluster-abc123",
namespace="production",
kind="Deployment",
name="my-api",
hpa_rule=HPARuleConfigArgsArgs(
enabled=True, # activate horizontal (replica) scaling
min_replicas=1,
max_replicas=8,
primary_metric="memory", # primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
target_utilization=0.8, # target 80% utilization for the primary metric
target_memory_utilization=0.65, # target 65% memory utilization, tuned independently
max_replica_change_percent=0.25, # cap scale events at ±25% of current replicas per cycle
metrics=[
HPAMetricTriggerArgsArgs(
type="prometheus", # external Prometheus metric
target_value="50", # absolute target value (e.g. 50 req/s)
server_address="http://prometheus:9090", # Prometheus server URL
query="sum(rate(http_requests_total[2m]))", # PromQL query
),
],
fallback=HPAFallbackArgsArgs(
replicas=1, # hold at 1 replica when metrics are unavailable
behavior="currentReplicas", # use the current live replica count as the fallback value
failure_threshold=3, # activate fallback after 3 consecutive metric failures
),
behavior=HPABehaviorArgsArgs(
scale_down=HPAScalingRulesArgsArgs(
select_policy="Min", # apply the most conservative (smallest) scale-down step
policies=[HPAScalingPolicyArgsArgs(type="Percent", value=10)], # remove at most 10% of replicas per cycle
),
scale_up=HPAScalingRulesArgsArgs(
select_policy="Max", # apply the most aggressive (largest) scale-up step
policies=[
HPAScalingPolicyArgsArgs(type="Percent", value=100), # allow up to 100% more replicas per cycle
],
),
),
),
))rule, err := resources.NewWorkloadRule(ctx, "my-app-rule", &resources.WorkloadRuleArgs{
ClusterId: pulumi.String("cluster-abc123"),
Namespace: pulumi.String("production"),
Kind: pulumi.String("Deployment"),
Name: pulumi.String("my-api"),
HpaRule: resources.HPARuleConfigArgsArgs{
Enabled: pulumi.BoolPtr(true), // activate horizontal (replica) scaling
MinReplicas: pulumi.IntPtr(1),
MaxReplicas: pulumi.IntPtr(8),
PrimaryMetric: pulumi.StringPtr("memory"), // primary metric driving HPA decisions (short form: cpu | memory | gpu | network_ingress | network_egress)
TargetUtilization: pulumi.Float64Ptr(0.8), // target 80% utilization for the primary metric
TargetMemoryUtilization: pulumi.Float64Ptr(0.65), // target 65% memory utilization, tuned independently
MaxReplicaChangePercent: pulumi.Float64Ptr(0.25), // cap scale events at ±25% of current replicas per cycle
Metrics: resources.HPAMetricTriggerArgsArray{
resources.HPAMetricTriggerArgsArgs{
Type: pulumi.String("prometheus"), // external Prometheus metric
TargetValue: pulumi.StringPtr("50"), // absolute target value (e.g. 50 req/s)
ServerAddress: pulumi.StringPtr("http://prometheus:9090"), // Prometheus server URL
Query: pulumi.StringPtr("sum(rate(http_requests_total[2m]))"), // PromQL query
},
},
Fallback: resources.HPAFallbackArgsArgs{
Replicas: pulumi.Int(1), // hold at 1 replica when metrics are unavailable
Behavior: pulumi.String("currentReplicas"), // use the current live replica count as the fallback value
FailureThreshold: pulumi.Int(3), // activate fallback after 3 consecutive metric failures
}.ToHPAFallbackArgsPtrOutput(),
Behavior: resources.HPABehaviorArgsArgs{
ScaleDown: resources.HPAScalingRulesArgsArgs{
SelectPolicy: pulumi.String("Min"), // apply the most conservative (smallest) scale-down step
Policies: resources.HPAScalingPolicyArgsArray{
resources.HPAScalingPolicyArgsArgs{Type: pulumi.String("Percent"), Value: pulumi.Int(10)}, // remove at most 10% of replicas per cycle
},
}.ToHPAScalingRulesArgsPtrOutput(),
ScaleUp: resources.HPAScalingRulesArgsArgs{
SelectPolicy: pulumi.String("Max"), // apply the most aggressive (largest) scale-up step
Policies: resources.HPAScalingPolicyArgsArray{
resources.HPAScalingPolicyArgsArgs{Type: pulumi.String("Percent"), Value: pulumi.Int(100)}, // allow up to 100% more replicas per cycle
},
}.ToHPAScalingRulesArgsPtrOutput(),
}.ToHPABehaviorArgsPtrOutput(),
}.ToHPARuleConfigArgsPtrOutput(),
})HPAFallbackArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
replicas | int | Yes | Replica count to use when metrics are unavailable. Example: 1 |
behavior | string | Yes | How to apply fallback replicas. One of: static, currentReplicas, currentReplicasIfHigher, currentReplicasIfLower. Example: "currentReplicas" |
failureThreshold | int | Yes | Consecutive metric failures before fallback activates. Example: 3 |
Python: failure_threshold. Go: PascalCase equivalents.
HPABehaviorArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
scaleUp | HPAScalingRulesArgs | No | Scale-up rate limiting and stabilization |
scaleDown | HPAScalingRulesArgs | No | Scale-down rate limiting and stabilization |
Python: scale_up, scale_down. Go: PascalCase equivalents.
HPAScalingRulesArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
stabilizationWindowSeconds | int | No | Seconds to look back when selecting replica count to avoid flapping. Default: 0 for scale-up, 300 for scale-down |
selectPolicy | string | No | Which policy wins when multiple match: Max | Min | Disabled. Example: "Max" |
policies | HPAScalingPolicyArgs[] | No | List of rate-limiting step policies |
Python: stabilization_window_seconds, select_policy. Go: PascalCase equivalents.
HPAScalingPolicyArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
type | string | Yes | Policy type: Pods (absolute count) | Percent (% of current replicas). Example: "Percent" |
value | int | Yes | Maximum change allowed per period. Example: 100 |
periodSeconds | int | Yes | Time window for this policy in seconds. Example: 60 |
Python: period_seconds. Go: PascalCase equivalents.
EmergencyResponseConfigArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
oomEnabled | bool | No | React to OOM kills by increasing memory requests |
oomMemoryMultiplier | float | No | Multiplier applied to memory on OOM. Example: 1.5 |
cpuThrottlingEnabled | bool | No | React to CPU throttling by increasing CPU requests |
cpuThrottlingThreshold | float | No | Throttle ratio (0–1) that triggers a reaction. Example: 0.1 |
cpuThrottlingMultiplier | float | No | Multiplier applied to CPU request on throttle reaction. Example: 1.25 |
Python: oom_enabled, oom_memory_multiplier, oom_max_reactions, cpu_throttling_enabled, cpu_throttling_threshold, cpu_throttling_multiplier. Go: PascalCase equivalents.
ContainerResourceRuleConfigArgs
| Parameter | Type | Required | Description |
|---|---|---|---|
containerName | string | Yes | Name of the container this config applies to |
cpuRule | ResourceRuleConfigArgs | No | CPU rule for this container |
memoryRule | ResourceRuleConfigArgs | No | Memory rule for this container |
gpuRule | ResourceRuleConfigArgs | No | GPU rule for this container |
Python: container_name, cpu_rule, memory_rule, gpu_rule. Go: PascalCase equivalents.