Add delayed limiting, configure with CORE_RESOURCES_LIMIT_WAIT_FOR_SEC

This commit is contained in:
Ingo Oppermann 2026-02-26 11:59:30 +01:00
parent 432417ea32
commit 74cc2a0163
No known key found for this signature in database
GPG Key ID: 2AB32426E9DD229E
5 changed files with 69 additions and 27 deletions

View File

@ -1,5 +1,5 @@
ARG GOLANG_IMAGE=golang:1.25-alpine3.21
ARG BUILD_IMAGE=alpine:3.21
ARG GOLANG_IMAGE=golang:1.26-alpine3.23
ARG BUILD_IMAGE=alpine:3.23
# Cross-Compilation
# https://www.docker.com/blog/faster-multi-platform-builds-dockerfile-cross-compilation-guide/

View File

@ -382,6 +382,7 @@ func (a *api) start(ctx context.Context) error {
MaxMemory: cfg.Resources.MaxMemoryUsage,
MaxGPU: cfg.Resources.MaxGPUUsage,
MaxGPUMemory: cfg.Resources.MaxGPUMemoryUsage,
LimitWaitFor: time.Duration(cfg.Resources.LimitWaitFor) * time.Second,
Logger: a.log.logger.core.WithComponent("Resources"),
PSUtil: psutil,
})

View File

@ -338,6 +338,7 @@ func (d *Config) init() {
d.vars.Register(value.NewFloatRange(&d.Resources.MaxMemoryUsage, 0, 0, 100), "resources.max_memory_usage", "CORE_RESOURCES_MAX_MEMORY_USAGE", nil, "Maximum system usage in percent, from 0 (no limit) to 100", false, false)
d.vars.Register(value.NewFloatRange(&d.Resources.MaxGPUUsage, 0, 0, 100), "resources.max_gpu_usage", "CORE_RESOURCES_MAX_GPU_USAGE", nil, "Maximum general, encoder, and decoder GPU usage in percent per GPU, from 0 (no limit) to 100", false, false)
d.vars.Register(value.NewFloatRange(&d.Resources.MaxGPUMemoryUsage, 0, 0, 100), "resources.max_gpu_memory_usage", "CORE_RESOURCES_MAX_GPU_MEMORY_USAGE", nil, "Maximum GPU memory usage in percent per GPU, from 0 (no limit) to 100", false, false)
d.vars.Register(value.NewInt64(&d.Resources.LimitWaitFor, 0), "resources.limit_wait_for_sec", "CORE_RESOURCES_LIMIT_WAIT_FOR_SEC", nil, "Time to wait before signalling to limit", false, false)
// Cluster
d.vars.Register(value.NewBool(&d.Cluster.Enable, false), "cluster.enable", "CORE_CLUSTER_ENABLE", nil, "Enable cluster mode", false, false)

View File

@ -194,6 +194,7 @@ type Data struct {
MaxMemoryUsage float64 `json:"max_memory_usage"` // percent 0-100
MaxGPUUsage float64 `json:"max_gpu_usage"` // percent 0-100
MaxGPUMemoryUsage float64 `json:"max_gpu_memory_usage"` // percent 0-100
LimitWaitFor int64 `json:"limit_wait_for_sec"`
} `json:"resources"`
Cluster struct {
Enable bool `json:"enable"`

View File

@ -111,6 +111,10 @@ type resources struct {
isMemoryLimiting bool
isGPULimiting []bool
limitWaitFor time.Duration
limitLast bool
limitSince time.Time
self psutil.Process
cancelObserver context.CancelFunc
@ -146,10 +150,11 @@ type Resources interface {
}
type Config struct {
MaxCPU float64 // percent 0-100
MaxMemory float64 // percent 0-100
MaxGPU float64 // general,encoder,decoder usage, percent 0-100
MaxGPUMemory float64 // memory usage, percent 0-100
MaxCPU float64 // percent 0-100
MaxMemory float64 // percent 0-100
MaxGPU float64 // general,encoder,decoder usage, percent 0-100
MaxGPUMemory float64 // memory usage, percent 0-100
LimitWaitFor time.Duration // seconds to wait before triggering limiter
PSUtil psutil.Util
Logger log.Logger
}
@ -203,6 +208,7 @@ func New(config Config) (Resources, error) {
maxCPU: config.MaxCPU,
maxGPU: config.MaxGPU,
maxGPUMemory: config.MaxGPUMemory,
limitWaitFor: config.LimitWaitFor,
psutil: config.PSUtil,
isUnlimited: isUnlimited,
ngpu: len(gpu),
@ -339,12 +345,14 @@ func (r *resources) observe(ctx context.Context, interval time.Duration) {
}
doGPULimit := make([]bool, r.ngpu)
doGPUAnyLimit := false
for i, limiting := range r.isGPULimiting {
maxMemory := uint64(r.maxGPUMemory * float64(gpustat[i].MemoryTotal) / 100)
if !limiting {
if gpustat[i].MemoryUsed >= maxMemory || (gpustat[i].Usage >= r.maxGPU && gpustat[i].Encoder >= r.maxGPU && gpustat[i].Decoder >= r.maxGPU) {
doGPULimit[i] = true
doGPUAnyLimit = true
}
} else {
doGPULimit[i] = true
@ -355,31 +363,62 @@ func (r *resources) observe(ctx context.Context, interval time.Duration) {
}
r.lock.Lock()
if r.isCPULimiting != doCPULimit {
r.logger.Warn().WithFields(log.Fields{
"enabled": doCPULimit,
"current": cpuload,
}).Log("Limiting CPU")
}
r.isCPULimiting = doCPULimit
if r.isMemoryLimiting != doMemoryLimit {
r.logger.Warn().WithFields(log.Fields{
"enabled": doMemoryLimit,
"current": vmstat.Used,
}).Log("Limiting memory")
}
r.isMemoryLimiting = doMemoryLimit
updateLimiting := false
for i, limiting := range r.isGPULimiting {
if limiting != doGPULimit[i] {
r.logger.Warn().WithFields(log.Fields{
"enabled": doGPULimit,
"index": i,
}).Log("Limiting GPU")
if doCPULimit || doMemoryLimit || doGPUAnyLimit {
if !r.limitLast {
r.limitSince = time.Now()
r.limitLast = true
}
waiting := time.Since(r.limitSince)
if waiting >= r.limitWaitFor {
updateLimiting = true
} else {
r.logger.Warn().WithFields(log.Fields{
"cur_cpu": cpuload,
"cur_mem": vmstat.Used,
"waiting": waiting,
"wait_for": r.limitWaitFor,
}).Log("Waiting before limiting")
}
} else {
r.limitLast = false
updateLimiting = true
}
if updateLimiting {
if r.isCPULimiting != doCPULimit {
r.logger.Warn().WithFields(log.Fields{
"enabled": doCPULimit,
"current": cpuload,
"wait_for": r.limitWaitFor,
}).Log("Limiting CPU")
}
r.isCPULimiting = doCPULimit
if r.isMemoryLimiting != doMemoryLimit {
r.logger.Warn().WithFields(log.Fields{
"enabled": doMemoryLimit,
"current": vmstat.Used,
"wait_for": r.limitWaitFor,
}).Log("Limiting memory")
}
r.isMemoryLimiting = doMemoryLimit
for i, limiting := range r.isGPULimiting {
if limiting != doGPULimit[i] {
r.logger.Warn().WithFields(log.Fields{
"enabled": doGPULimit,
"index": i,
"wait_for": r.limitWaitFor,
}).Log("Limiting GPU")
}
}
r.isGPULimiting = doGPULimit
}
r.isGPULimiting = doGPULimit
r.lock.Unlock()
}