背景
在k8s 1.36之前,只要设置了requests.memory,都会设置memory.min来预留内存,这样会导致系统可用内存很少,比如会增加BestEffort pod的oom风险
k8s 1.36后,引入了TieredReservation memory qos,针对Guaranteed设置memory.min针对Burstable设置memory.low来预留内存,设置memory.high来进行内存节流
memory.min是强限制,这部分内存会预留给这个pod
memory.low是软限制,普通压力下内核仍然会保护这部分内存,但是在极端压力下内核会回收该内存的一部分防止系统级oom
memory.high是软限制,使用内存达到这个值会触发内存回收和限制分配而不会oom, 公式为memory.high=floor[(requests.memory + memory throttling factor * (limits.memory or node allocatable memory - requests.memory))/pageSize] * pageSize
使用方式
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
featureGates:
MemoryQoS: true
memoryReservationPolicy: TieredReservation 默认None
memoryThrottlingFactor: 0.9 默认0.9
源码
pkg/kubelet/cm/helpers_linux.go中
pod资源配置
func ResourceConfigForPod(allocatedPod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64, enforceMemoryQoS bool, memoryReservationPolicy kubeletconfig.MemoryReservationPolicy) *ResourceConfig {
...
if enforceMemoryQoS && memoryReservationPolicy == kubeletconfig.TieredReservationMemoryReservationPolicy {
memoryRequest := int64(0)
if request, found := reqs[v1.ResourceMemory]; found {
memoryRequest = request.Value()
}
if memoryRequest > 0 {
cgroupKey := Cgroup2MemoryLow
if qosClass == v1.PodQOSGuaranteed {
cgroupKey = Cgroup2MemoryMin
}
result.Unified = map[string]string{
cgroupKey: strconv.FormatInt(memoryRequest, 10),
}
}
}
...
}
pkg/kubelet/kuberuntime/kuberuntime_container_linux.go中
生成linux容器资源
func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(ctx context.Context, pod *v1.Pod, container *v1.Container, enforceMemoryQoS bool) *runtimeapi.LinuxContainerResources {
...
memory request和limit不相等
if memoryRequest != memoryLimit {
memoryHigh := int64(0)
如果memory limit不为0
if memoryLimit != 0 {
根据memory limit和memory request计算设置memory.high
memoryHigh = int64(math.Floor(
float64(memoryRequest)+
(float64(memoryLimit)-float64(memoryRequest))*float64(m.memoryThrottlingFactor))/float64(defaultPageSize)) * defaultPageSize
} else {
根据node allocatable memory和memory request计算设置memory.high
allocatable := m.getNodeAllocatable()
allocatableMemory, ok := allocatable[v1.ResourceMemory]
if ok && allocatableMemory.Value() > 0 {
memoryHigh = int64(math.Floor(
float64(memoryRequest)+
(float64(allocatableMemory.Value())-float64(memoryRequest))*float64(m.memoryThrottlingFactor))/float64(defaultPageSize)) * defaultPageSize
}
}
if memoryHigh != 0 && memoryHigh > memoryRequest {
unified[cm.Cgroup2MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
}
}
...
}