diff --git a/Dockerfile b/Dockerfile index 562e204e..46db17b0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -ARG GOLANG_IMAGE=golang:1.25-alpine3.21 -ARG BUILD_IMAGE=alpine:3.21 +ARG GOLANG_IMAGE=golang:1.26-alpine3.23 +ARG BUILD_IMAGE=alpine:3.23 # Cross-Compilation # https://www.docker.com/blog/faster-multi-platform-builds-dockerfile-cross-compilation-guide/ diff --git a/app/api/api.go b/app/api/api.go index aff6effc..912d4a69 100644 --- a/app/api/api.go +++ b/app/api/api.go @@ -382,6 +382,7 @@ func (a *api) start(ctx context.Context) error { MaxMemory: cfg.Resources.MaxMemoryUsage, MaxGPU: cfg.Resources.MaxGPUUsage, MaxGPUMemory: cfg.Resources.MaxGPUMemoryUsage, + LimitWaitFor: time.Duration(cfg.Resources.LimitWaitFor) * time.Second, Logger: a.log.logger.core.WithComponent("Resources"), PSUtil: psutil, }) diff --git a/config/config.go b/config/config.go index 98d2fe49..35993a4c 100644 --- a/config/config.go +++ b/config/config.go @@ -338,6 +338,7 @@ func (d *Config) init() { d.vars.Register(value.NewFloatRange(&d.Resources.MaxMemoryUsage, 0, 0, 100), "resources.max_memory_usage", "CORE_RESOURCES_MAX_MEMORY_USAGE", nil, "Maximum system usage in percent, from 0 (no limit) to 100", false, false) d.vars.Register(value.NewFloatRange(&d.Resources.MaxGPUUsage, 0, 0, 100), "resources.max_gpu_usage", "CORE_RESOURCES_MAX_GPU_USAGE", nil, "Maximum general, encoder, and decoder GPU usage in percent per GPU, from 0 (no limit) to 100", false, false) d.vars.Register(value.NewFloatRange(&d.Resources.MaxGPUMemoryUsage, 0, 0, 100), "resources.max_gpu_memory_usage", "CORE_RESOURCES_MAX_GPU_MEMORY_USAGE", nil, "Maximum GPU memory usage in percent per GPU, from 0 (no limit) to 100", false, false) + d.vars.Register(value.NewInt64(&d.Resources.LimitWaitFor, 0), "resources.limit_wait_for_sec", "CORE_RESOURCES_LIMIT_WAIT_FOR_SEC", nil, "Time to wait before signalling to limit", false, false) // Cluster d.vars.Register(value.NewBool(&d.Cluster.Enable, false), "cluster.enable", "CORE_CLUSTER_ENABLE", nil, "Enable cluster mode", false, false) diff --git a/config/data.go b/config/data.go index af68a071..09e25815 100644 --- a/config/data.go +++ b/config/data.go @@ -194,6 +194,7 @@ type Data struct { MaxMemoryUsage float64 `json:"max_memory_usage"` // percent 0-100 MaxGPUUsage float64 `json:"max_gpu_usage"` // percent 0-100 MaxGPUMemoryUsage float64 `json:"max_gpu_memory_usage"` // percent 0-100 + LimitWaitFor int64 `json:"limit_wait_for_sec"` } `json:"resources"` Cluster struct { Enable bool `json:"enable"` diff --git a/resources/resources.go b/resources/resources.go index 77024088..399d0657 100644 --- a/resources/resources.go +++ b/resources/resources.go @@ -111,6 +111,10 @@ type resources struct { isMemoryLimiting bool isGPULimiting []bool + limitWaitFor time.Duration + limitLast bool + limitSince time.Time + self psutil.Process cancelObserver context.CancelFunc @@ -146,10 +150,11 @@ type Resources interface { } type Config struct { - MaxCPU float64 // percent 0-100 - MaxMemory float64 // percent 0-100 - MaxGPU float64 // general,encoder,decoder usage, percent 0-100 - MaxGPUMemory float64 // memory usage, percent 0-100 + MaxCPU float64 // percent 0-100 + MaxMemory float64 // percent 0-100 + MaxGPU float64 // general,encoder,decoder usage, percent 0-100 + MaxGPUMemory float64 // memory usage, percent 0-100 + LimitWaitFor time.Duration // seconds to wait before triggering limiter PSUtil psutil.Util Logger log.Logger } @@ -203,6 +208,7 @@ func New(config Config) (Resources, error) { maxCPU: config.MaxCPU, maxGPU: config.MaxGPU, maxGPUMemory: config.MaxGPUMemory, + limitWaitFor: config.LimitWaitFor, psutil: config.PSUtil, isUnlimited: isUnlimited, ngpu: len(gpu), @@ -339,12 +345,14 @@ func (r *resources) observe(ctx context.Context, interval time.Duration) { } doGPULimit := make([]bool, r.ngpu) + doGPUAnyLimit := false for i, limiting := range r.isGPULimiting { maxMemory := uint64(r.maxGPUMemory * float64(gpustat[i].MemoryTotal) / 100) if !limiting { if gpustat[i].MemoryUsed >= maxMemory || (gpustat[i].Usage >= r.maxGPU && gpustat[i].Encoder >= r.maxGPU && gpustat[i].Decoder >= r.maxGPU) { doGPULimit[i] = true + doGPUAnyLimit = true } } else { doGPULimit[i] = true @@ -355,31 +363,62 @@ func (r *resources) observe(ctx context.Context, interval time.Duration) { } r.lock.Lock() - if r.isCPULimiting != doCPULimit { - r.logger.Warn().WithFields(log.Fields{ - "enabled": doCPULimit, - "current": cpuload, - }).Log("Limiting CPU") - } - r.isCPULimiting = doCPULimit - if r.isMemoryLimiting != doMemoryLimit { - r.logger.Warn().WithFields(log.Fields{ - "enabled": doMemoryLimit, - "current": vmstat.Used, - }).Log("Limiting memory") - } - r.isMemoryLimiting = doMemoryLimit + updateLimiting := false - for i, limiting := range r.isGPULimiting { - if limiting != doGPULimit[i] { - r.logger.Warn().WithFields(log.Fields{ - "enabled": doGPULimit, - "index": i, - }).Log("Limiting GPU") + if doCPULimit || doMemoryLimit || doGPUAnyLimit { + if !r.limitLast { + r.limitSince = time.Now() + r.limitLast = true } + + waiting := time.Since(r.limitSince) + + if waiting >= r.limitWaitFor { + updateLimiting = true + } else { + r.logger.Warn().WithFields(log.Fields{ + "cur_cpu": cpuload, + "cur_mem": vmstat.Used, + "waiting": waiting, + "wait_for": r.limitWaitFor, + }).Log("Waiting before limiting") + } + } else { + r.limitLast = false + updateLimiting = true + } + + if updateLimiting { + if r.isCPULimiting != doCPULimit { + r.logger.Warn().WithFields(log.Fields{ + "enabled": doCPULimit, + "current": cpuload, + "wait_for": r.limitWaitFor, + }).Log("Limiting CPU") + } + r.isCPULimiting = doCPULimit + + if r.isMemoryLimiting != doMemoryLimit { + r.logger.Warn().WithFields(log.Fields{ + "enabled": doMemoryLimit, + "current": vmstat.Used, + "wait_for": r.limitWaitFor, + }).Log("Limiting memory") + } + r.isMemoryLimiting = doMemoryLimit + + for i, limiting := range r.isGPULimiting { + if limiting != doGPULimit[i] { + r.logger.Warn().WithFields(log.Fields{ + "enabled": doGPULimit, + "index": i, + "wait_for": r.limitWaitFor, + }).Log("Limiting GPU") + } + } + r.isGPULimiting = doGPULimit } - r.isGPULimiting = doGPULimit r.lock.Unlock() }