Choose the GPU with the least overall usage

This commit is contained in:
Ingo Oppermann 2024-12-10 15:47:07 +01:00
parent 64a2136501
commit 893f8c2b1f
No known key found for this signature in database
GPG Key ID: 2AB32426E9DD229E
2 changed files with 29 additions and 17 deletions

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"os"
"sort"
"sync"
"time"
@ -474,7 +475,7 @@ func (r *resources) Request(req Request) (Response, error) {
return res, fmt.Errorf("some GPU resources requested but no GPU available")
}
foundGPU := -1
fittingGPU := []psutil.GPUInfo{}
for _, g := range gpustat {
if req.GPUUsage > 0 && g.Usage+req.GPUUsage > r.maxGPU {
logger.Debug().WithFields(log.Fields{"id": g.Index, "cur_gpu": g.Usage}).Log("Rejected, GPU usage limit exceeded")
@ -499,24 +500,31 @@ func (r *resources) Request(req Request) (Response, error) {
continue
}
foundGPU = g.Index
logger = logger.Debug().WithFields(log.Fields{
"cur_gpu": foundGPU,
"cur_gpu_general": g.Usage,
"cur_gpu_encoder": g.Encoder,
"cur_gpu_decoder": g.Decoder,
"cur_gpu_memory": gpuMemoryUsage,
})
break
fittingGPU = append(fittingGPU, g)
}
if foundGPU < 0 {
if len(fittingGPU) == 0 {
return res, fmt.Errorf("all GPU usage limits are exceeded")
}
res.GPU = foundGPU
sort.SliceStable(fittingGPU, func(a, b int) bool {
loadA := fittingGPU[a].Usage + fittingGPU[a].Encoder + fittingGPU[a].Decoder
loadB := fittingGPU[b].Usage + fittingGPU[b].Encoder + fittingGPU[b].Decoder
return loadA < loadB
})
foundGPU := fittingGPU[0]
logger = logger.Debug().WithFields(log.Fields{
"cur_gpu": foundGPU.Index,
"cur_gpu_general": foundGPU.Usage,
"cur_gpu_encoder": foundGPU.Encoder,
"cur_gpu_decoder": foundGPU.Decoder,
"cur_gpu_memory": float64(foundGPU.MemoryUsed) / float64(foundGPU.MemoryTotal) * 100,
})
res.GPU = foundGPU.Index
}
logger.Debug().WithFields(log.Fields{

View File

@ -725,16 +725,20 @@ func TestRequestGPULimitsMoreGPU(t *testing.T) {
MaxMemory: 100,
MaxGPU: 60,
MaxGPUMemory: 60,
PSUtil: psutil.New(2),
PSUtil: psutil.New(3),
})
require.NoError(t, err)
_, err = r.Request(Request{CPU: 10, Memory: 10, GPUEncoder: 50, GPUMemory: 10})
require.Error(t, err)
res, err := r.Request(Request{CPU: 10, Memory: 10, GPUEncoder: 30, GPUMemory: 10})
res, err := r.Request(Request{CPU: 10, Memory: 10, GPUEncoder: 10, GPUMemory: 10})
require.NoError(t, err)
require.Equal(t, 1, res.GPU)
require.Equal(t, 2, res.GPU)
res, err = r.Request(Request{CPU: 10, Memory: 10, GPUEncoder: 30, GPUMemory: 10})
require.NoError(t, err)
require.Equal(t, 2, res.GPU)
}
func TestHasLimits(t *testing.T) {