Use reference affinity while rebalancing processes

This commit is contained in:
Ingo Oppermann 2023-05-10 15:45:08 +02:00
parent 5af5c686ee
commit 7a2d0a7ad9
No known key found for this signature in database
GPG Key ID: 2AB32426E9DD229E
2 changed files with 329 additions and 11 deletions

View File

@ -572,6 +572,13 @@ func synchronize(want []app.Config, have []ProcessConfig, resources map[string]N
// A map from the process reference to the node it is running on
haveReferenceAffinityMap := map[string]string{}
for _, p := range have {
if len(p.Config.Reference) == 0 {
continue
}
// This is a simplification because a reference could be on several nodes,
// but here take into consideration on which node the reference has been
// seen the last. This is good enough for now.
haveReferenceAffinityMap[p.Config.Reference] = p.NodeID
}
@ -659,6 +666,54 @@ func synchronize(want []app.Config, have []ProcessConfig, resources map[string]N
return opStack
}
type referenceAffinityNodeCount struct {
nodeid string
count uint64
}
func createReferenceAffinityMap(processes []ProcessConfig) map[string][]referenceAffinityNodeCount {
referenceAffinityMap := map[string][]referenceAffinityNodeCount{}
for _, p := range processes {
if len(p.Config.Reference) == 0 {
continue
}
// Here we count how often a reference is present on a node. When
// moving processes to a different node, the node with the highest
// count of same references will be the first candidate.
found := false
arr := referenceAffinityMap[p.Config.Reference]
for i, count := range arr {
if count.nodeid == p.NodeID {
count.count++
arr[i] = count
found = true
break
}
}
if !found {
arr = append(arr, referenceAffinityNodeCount{
nodeid: p.NodeID,
count: 1,
})
}
referenceAffinityMap[p.Config.Reference] = arr
}
// Sort every reference count in decreasing order for each reference
for ref, count := range referenceAffinityMap {
sort.SliceStable(count, func(a, b int) bool {
return count[a].count > count[b].count
})
referenceAffinityMap[ref] = count
}
return referenceAffinityMap
}
// rebalance returns a list of operations that will move running processes away from nodes
// that are overloaded.
func rebalance(have []ProcessConfig, resources map[string]NodeResources) []interface{} {
@ -686,6 +741,9 @@ func rebalance(have []ProcessConfig, resources map[string]NodeResources) []inter
processNodeMap[nodeid] = processes
}
// A map from the process reference to the nodes it is running on
haveReferenceAffinityMap := createReferenceAffinityMap(have)
opStack := []interface{}{}
// Check if any of the nodes is overloaded
@ -695,7 +753,8 @@ func rebalance(have []ProcessConfig, resources map[string]NodeResources) []inter
continue
}
// Pick the first process from that node and move it to another node with enough free resources
// Move processes from this noed to another node with enough free resources.
// The processes are ordered ascending by their runtime.
processes := processNodeMap[id]
if len(processes) == 0 {
// If there are no processes on that node, we can't do anything
@ -704,23 +763,42 @@ func rebalance(have []ProcessConfig, resources map[string]NodeResources) []inter
overloadedNodeid := id
for _, p := range processes {
for i, p := range processes {
if p.State != "running" {
// We consider only currently running processes
continue
}
// Find another node with enough resources available
availableNodeid := ""
for id, r := range resources {
if id == overloadedNodeid {
// Skip the overloaded node
continue
}
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit {
availableNodeid = id
break
// Try to move the process to a node where other processes with the same
// reference currently reside.
if len(p.Config.Reference) != 0 {
for _, count := range haveReferenceAffinityMap[p.Config.Reference] {
if count.nodeid == overloadedNodeid {
continue
}
r := resources[count.nodeid]
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit {
availableNodeid = count.nodeid
break
}
}
}
// Find another node with enough resources available
if len(availableNodeid) == 0 {
for id, r := range resources {
if id == overloadedNodeid {
// Skip the overloaded node
continue
}
if r.CPU+p.CPU < r.CPULimit && r.Mem+p.Mem < r.MemLimit {
availableNodeid = id
break
}
}
}
@ -740,6 +818,10 @@ func rebalance(have []ProcessConfig, resources map[string]NodeResources) []inter
config: p.Config,
})
// Adjust the process
p.NodeID = availableNodeid
processes[i] = p
// Adjust the resources
r = resources[availableNodeid]
r.CPU += p.CPU

View File

@ -726,3 +726,239 @@ func TestRebalanceSkip(t *testing.T) {
},
}, resources)
}
func TestRebalanceReferenceAffinity(t *testing.T) {
processes := []ProcessConfig{
{
NodeID: "node1",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar1",
},
},
{
NodeID: "node1",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 1,
Config: &app.Config{
ID: "foobar2",
Reference: "barfoo",
},
},
{
NodeID: "node2",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar3",
Reference: "barfoo",
},
},
{
NodeID: "node3",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar4",
Reference: "barfoo",
},
},
{
NodeID: "node3",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar5",
Reference: "barfoo",
},
},
}
resources := map[string]NodeResources{
"node1": {
NCPU: 1,
CPU: 90,
Mem: 90,
MemTotal: 4 * 1024 * 1024 * 1024,
CPULimit: 90,
MemLimit: 90,
},
"node2": {
NCPU: 1,
CPU: 1,
Mem: 1,
MemTotal: 4 * 1024 * 1024 * 1024,
CPULimit: 90,
MemLimit: 90,
},
"node3": {
NCPU: 1,
CPU: 1,
Mem: 1,
MemTotal: 4 * 1024 * 1024 * 1024,
CPULimit: 90,
MemLimit: 90,
},
}
opStack := rebalance(processes, resources)
require.NotEmpty(t, opStack)
require.Equal(t, []interface{}{
processOpMove{
fromNodeid: "node1",
toNodeid: "node3",
config: &app.Config{
ID: "foobar2",
Reference: "barfoo",
},
},
}, opStack)
require.Equal(t, map[string]NodeResources{
"node1": {
NCPU: 1,
CPU: 89,
Mem: 89,
MemTotal: 4 * 1024 * 1024 * 1024,
CPULimit: 90,
MemLimit: 90,
},
"node2": {
NCPU: 1,
CPU: 1,
Mem: 1,
MemTotal: 4 * 1024 * 1024 * 1024,
CPULimit: 90,
MemLimit: 90,
},
"node3": {
NCPU: 1,
CPU: 2,
Mem: 2,
MemTotal: 4 * 1024 * 1024 * 1024,
CPULimit: 90,
MemLimit: 90,
},
}, resources)
}
func TestCreateReferenceAffinityNodeMap(t *testing.T) {
processes := []ProcessConfig{
{
NodeID: "node1",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar1",
},
},
{
NodeID: "node1",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 1,
Config: &app.Config{
ID: "foobar2",
Reference: "ref1",
},
},
{
NodeID: "node2",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar3",
Reference: "ref3",
},
},
{
NodeID: "node2",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar3",
Reference: "ref2",
},
},
{
NodeID: "node3",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar4",
Reference: "ref1",
},
},
{
NodeID: "node3",
Order: "start",
State: "running",
CPU: 1,
Mem: 1,
Runtime: 42,
Config: &app.Config{
ID: "foobar5",
Reference: "ref1",
},
},
}
affinityMap := createReferenceAffinityMap(processes)
require.Equal(t, map[string][]referenceAffinityNodeCount{
"ref1": {
{
nodeid: "node3",
count: 2,
},
{
nodeid: "node1",
count: 1,
},
},
"ref2": {
{
nodeid: "node2",
count: 1,
},
},
"ref3": {
{
nodeid: "node2",
count: 1,
},
},
}, affinityMap)
}