Add delayed limiting, configure with CORE_RESOURCES_LIMIT_WAIT_FOR_SEC
This commit is contained in:
parent
432417ea32
commit
74cc2a0163
@ -1,5 +1,5 @@
|
||||
ARG GOLANG_IMAGE=golang:1.25-alpine3.21
|
||||
ARG BUILD_IMAGE=alpine:3.21
|
||||
ARG GOLANG_IMAGE=golang:1.26-alpine3.23
|
||||
ARG BUILD_IMAGE=alpine:3.23
|
||||
|
||||
# Cross-Compilation
|
||||
# https://www.docker.com/blog/faster-multi-platform-builds-dockerfile-cross-compilation-guide/
|
||||
|
||||
@ -382,6 +382,7 @@ func (a *api) start(ctx context.Context) error {
|
||||
MaxMemory: cfg.Resources.MaxMemoryUsage,
|
||||
MaxGPU: cfg.Resources.MaxGPUUsage,
|
||||
MaxGPUMemory: cfg.Resources.MaxGPUMemoryUsage,
|
||||
LimitWaitFor: time.Duration(cfg.Resources.LimitWaitFor) * time.Second,
|
||||
Logger: a.log.logger.core.WithComponent("Resources"),
|
||||
PSUtil: psutil,
|
||||
})
|
||||
|
||||
@ -338,6 +338,7 @@ func (d *Config) init() {
|
||||
d.vars.Register(value.NewFloatRange(&d.Resources.MaxMemoryUsage, 0, 0, 100), "resources.max_memory_usage", "CORE_RESOURCES_MAX_MEMORY_USAGE", nil, "Maximum system usage in percent, from 0 (no limit) to 100", false, false)
|
||||
d.vars.Register(value.NewFloatRange(&d.Resources.MaxGPUUsage, 0, 0, 100), "resources.max_gpu_usage", "CORE_RESOURCES_MAX_GPU_USAGE", nil, "Maximum general, encoder, and decoder GPU usage in percent per GPU, from 0 (no limit) to 100", false, false)
|
||||
d.vars.Register(value.NewFloatRange(&d.Resources.MaxGPUMemoryUsage, 0, 0, 100), "resources.max_gpu_memory_usage", "CORE_RESOURCES_MAX_GPU_MEMORY_USAGE", nil, "Maximum GPU memory usage in percent per GPU, from 0 (no limit) to 100", false, false)
|
||||
d.vars.Register(value.NewInt64(&d.Resources.LimitWaitFor, 0), "resources.limit_wait_for_sec", "CORE_RESOURCES_LIMIT_WAIT_FOR_SEC", nil, "Time to wait before signalling to limit", false, false)
|
||||
|
||||
// Cluster
|
||||
d.vars.Register(value.NewBool(&d.Cluster.Enable, false), "cluster.enable", "CORE_CLUSTER_ENABLE", nil, "Enable cluster mode", false, false)
|
||||
|
||||
@ -194,6 +194,7 @@ type Data struct {
|
||||
MaxMemoryUsage float64 `json:"max_memory_usage"` // percent 0-100
|
||||
MaxGPUUsage float64 `json:"max_gpu_usage"` // percent 0-100
|
||||
MaxGPUMemoryUsage float64 `json:"max_gpu_memory_usage"` // percent 0-100
|
||||
LimitWaitFor int64 `json:"limit_wait_for_sec"`
|
||||
} `json:"resources"`
|
||||
Cluster struct {
|
||||
Enable bool `json:"enable"`
|
||||
|
||||
@ -111,6 +111,10 @@ type resources struct {
|
||||
isMemoryLimiting bool
|
||||
isGPULimiting []bool
|
||||
|
||||
limitWaitFor time.Duration
|
||||
limitLast bool
|
||||
limitSince time.Time
|
||||
|
||||
self psutil.Process
|
||||
|
||||
cancelObserver context.CancelFunc
|
||||
@ -146,10 +150,11 @@ type Resources interface {
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
MaxCPU float64 // percent 0-100
|
||||
MaxMemory float64 // percent 0-100
|
||||
MaxGPU float64 // general,encoder,decoder usage, percent 0-100
|
||||
MaxGPUMemory float64 // memory usage, percent 0-100
|
||||
MaxCPU float64 // percent 0-100
|
||||
MaxMemory float64 // percent 0-100
|
||||
MaxGPU float64 // general,encoder,decoder usage, percent 0-100
|
||||
MaxGPUMemory float64 // memory usage, percent 0-100
|
||||
LimitWaitFor time.Duration // seconds to wait before triggering limiter
|
||||
PSUtil psutil.Util
|
||||
Logger log.Logger
|
||||
}
|
||||
@ -203,6 +208,7 @@ func New(config Config) (Resources, error) {
|
||||
maxCPU: config.MaxCPU,
|
||||
maxGPU: config.MaxGPU,
|
||||
maxGPUMemory: config.MaxGPUMemory,
|
||||
limitWaitFor: config.LimitWaitFor,
|
||||
psutil: config.PSUtil,
|
||||
isUnlimited: isUnlimited,
|
||||
ngpu: len(gpu),
|
||||
@ -339,12 +345,14 @@ func (r *resources) observe(ctx context.Context, interval time.Duration) {
|
||||
}
|
||||
|
||||
doGPULimit := make([]bool, r.ngpu)
|
||||
doGPUAnyLimit := false
|
||||
|
||||
for i, limiting := range r.isGPULimiting {
|
||||
maxMemory := uint64(r.maxGPUMemory * float64(gpustat[i].MemoryTotal) / 100)
|
||||
if !limiting {
|
||||
if gpustat[i].MemoryUsed >= maxMemory || (gpustat[i].Usage >= r.maxGPU && gpustat[i].Encoder >= r.maxGPU && gpustat[i].Decoder >= r.maxGPU) {
|
||||
doGPULimit[i] = true
|
||||
doGPUAnyLimit = true
|
||||
}
|
||||
} else {
|
||||
doGPULimit[i] = true
|
||||
@ -355,31 +363,62 @@ func (r *resources) observe(ctx context.Context, interval time.Duration) {
|
||||
}
|
||||
|
||||
r.lock.Lock()
|
||||
if r.isCPULimiting != doCPULimit {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"enabled": doCPULimit,
|
||||
"current": cpuload,
|
||||
}).Log("Limiting CPU")
|
||||
}
|
||||
r.isCPULimiting = doCPULimit
|
||||
|
||||
if r.isMemoryLimiting != doMemoryLimit {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"enabled": doMemoryLimit,
|
||||
"current": vmstat.Used,
|
||||
}).Log("Limiting memory")
|
||||
}
|
||||
r.isMemoryLimiting = doMemoryLimit
|
||||
updateLimiting := false
|
||||
|
||||
for i, limiting := range r.isGPULimiting {
|
||||
if limiting != doGPULimit[i] {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"enabled": doGPULimit,
|
||||
"index": i,
|
||||
}).Log("Limiting GPU")
|
||||
if doCPULimit || doMemoryLimit || doGPUAnyLimit {
|
||||
if !r.limitLast {
|
||||
r.limitSince = time.Now()
|
||||
r.limitLast = true
|
||||
}
|
||||
|
||||
waiting := time.Since(r.limitSince)
|
||||
|
||||
if waiting >= r.limitWaitFor {
|
||||
updateLimiting = true
|
||||
} else {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"cur_cpu": cpuload,
|
||||
"cur_mem": vmstat.Used,
|
||||
"waiting": waiting,
|
||||
"wait_for": r.limitWaitFor,
|
||||
}).Log("Waiting before limiting")
|
||||
}
|
||||
} else {
|
||||
r.limitLast = false
|
||||
updateLimiting = true
|
||||
}
|
||||
|
||||
if updateLimiting {
|
||||
if r.isCPULimiting != doCPULimit {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"enabled": doCPULimit,
|
||||
"current": cpuload,
|
||||
"wait_for": r.limitWaitFor,
|
||||
}).Log("Limiting CPU")
|
||||
}
|
||||
r.isCPULimiting = doCPULimit
|
||||
|
||||
if r.isMemoryLimiting != doMemoryLimit {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"enabled": doMemoryLimit,
|
||||
"current": vmstat.Used,
|
||||
"wait_for": r.limitWaitFor,
|
||||
}).Log("Limiting memory")
|
||||
}
|
||||
r.isMemoryLimiting = doMemoryLimit
|
||||
|
||||
for i, limiting := range r.isGPULimiting {
|
||||
if limiting != doGPULimit[i] {
|
||||
r.logger.Warn().WithFields(log.Fields{
|
||||
"enabled": doGPULimit,
|
||||
"index": i,
|
||||
"wait_for": r.limitWaitFor,
|
||||
}).Log("Limiting GPU")
|
||||
}
|
||||
}
|
||||
r.isGPULimiting = doGPULimit
|
||||
}
|
||||
r.isGPULimiting = doGPULimit
|
||||
|
||||
r.lock.Unlock()
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user