Skip to content

Commit

Permalink
Add logging and monitoring in case we have minimum memory recommendat…
Browse files Browse the repository at this point in the history
…ions (#322)

* Add logging and monitoring in case we have minimum memory recommendations

* Apply review comments

* Fix tests
  • Loading branch information
plkokanov authored Aug 15, 2024
1 parent d9666fe commit 2cf2b46
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 13 deletions.
33 changes: 31 additions & 2 deletions vertical-pod-autoscaler/pkg/recommender/logic/estimator.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ limitations under the License.
package logic

import (
"encoding/json"
"math"
"time"

corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"

"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality"
)

// TODO: Split the estimator to have a separate estimator object for CPU and memory.
Expand Down Expand Up @@ -53,6 +58,7 @@ type marginEstimator struct {
type minResourcesEstimator struct {
minResources model.Resources
baseEstimator ResourceEstimator
vpaKey model.VpaID
}

type confidenceMultiplier struct {
Expand All @@ -79,8 +85,8 @@ func WithMargin(marginFraction float64, baseEstimator ResourceEstimator) Resourc

// WithMinResources returns a given ResourceEstimator with minResources applied.
// The returned resources are equal to the max(original resources, minResources)
func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator) ResourceEstimator {
return &minResourcesEstimator{minResources, baseEstimator}
func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator, vpaKey model.VpaID) ResourceEstimator {
return &minResourcesEstimator{minResources, baseEstimator, vpaKey}
}

// WithConfidenceMultiplier returns a given ResourceEstimator with confidenceMultiplier applied.
Expand Down Expand Up @@ -152,9 +158,32 @@ func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContaine
newResources := make(model.Resources)
for resource, resourceAmount := range originalResources {
if resourceAmount < e.minResources[resource] {
if resource == "memory" {
klog.Warningf("Computed %s resources for VPA %s were below minimum! Computed %v, minimum is %v.", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), resourceAmount, e.minResources[resource])
logHistogramInformation(s, e.vpaKey)
metrics_quality.ObserveLowerThanMinRecommendation(s.GetUpdateMode(), corev1.ResourceName(resource), e.vpaKey.Namespace+"/"+e.vpaKey.VpaName)
}
resourceAmount = e.minResources[resource]
}
newResources[resource] = resourceAmount
}
return newResources
}

func logHistogramInformation(s *model.AggregateContainerState, vpaKey model.VpaID) {
if s.AggregateCPUUsage == nil {
klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName))
return
}
if s.AggregateMemoryPeaks == nil {
klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName))
return
}
c, _ := s.SaveToCheckpoint()
prettyCheckpoint, err := json.MarshalIndent(c, "", " ")
if err != nil {
klog.Errorf("Error during marshalling checkpoint for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), err)
return
}
klog.Warningf("Here's the checkpoint/state for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), prettyCheckpoint)
}
10 changes: 5 additions & 5 deletions vertical-pod-autoscaler/pkg/recommender/logic/recommender.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ var (

// PodResourceRecommender computes resource recommendation for a Vpa object.
type PodResourceRecommender interface {
GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources
GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, vpaKey model.VpaID) RecommendedPodResources
}

// RecommendedPodResources is a Map from container name to recommended resources.
Expand All @@ -61,7 +61,7 @@ type podResourceRecommender struct {
upperBoundEstimator ResourceEstimator
}

func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources {
func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, vpaKey model.VpaID) RecommendedPodResources {
var recommendation = make(RecommendedPodResources)
if len(containerNameToAggregateStateMap) == 0 {
return recommendation
Expand All @@ -74,9 +74,9 @@ func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggre
}

recommender := &podResourceRecommender{
WithMinResources(minResources, r.targetEstimator),
WithMinResources(minResources, r.lowerBoundEstimator),
WithMinResources(minResources, r.upperBoundEstimator),
WithMinResources(minResources, r.targetEstimator, vpaKey),
WithMinResources(minResources, r.lowerBoundEstimator, vpaKey),
WithMinResources(minResources, r.upperBoundEstimator, vpaKey),
}

for containerName, aggregatedContainerState := range containerNameToAggregateStateMap {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ limitations under the License.
package logic

import (
"testing"

"github.com/stretchr/testify/assert"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
"testing"
)

func TestMinResourcesApplied(t *testing.T) {
Expand All @@ -36,7 +37,7 @@ func TestMinResourcesApplied(t *testing.T) {
"container-1": &model.AggregateContainerState{},
}

recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap)
recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{})
assert.Equal(t, model.CPUAmountFromCores(*podMinCPUMillicores/1000), recommendedResources["container-1"].Target[model.ResourceCPU])
assert.Equal(t, model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), recommendedResources["container-1"].Target[model.ResourceMemory])
}
Expand All @@ -56,7 +57,7 @@ func TestMinResourcesSplitAcrossContainers(t *testing.T) {
"container-2": &model.AggregateContainerState{},
}

recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap)
recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{})
assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-1"].Target[model.ResourceCPU])
assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-2"].Target[model.ResourceCPU])
assert.Equal(t, model.MemoryAmountFromBytes((*podMinMemoryMb*1024*1024)/2), recommendedResources["container-1"].Target[model.ResourceMemory])
Expand All @@ -80,7 +81,7 @@ func TestControlledResourcesFiltered(t *testing.T) {
},
}

recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap)
recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{})
assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory)
assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory)
assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory)
Expand All @@ -106,7 +107,7 @@ func TestControlledResourcesFilteredDefault(t *testing.T) {
},
}

recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap)
recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{})
assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory)
assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory)
assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (r *recommender) UpdateVPAs() {
if !found {
continue
}
resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa))
resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa), key)
had := vpa.HasRecommendation()

listOfResourceRecommendation := logic.MapToListOfRecommendedContainerResources(resources)
Expand Down
13 changes: 13 additions & 0 deletions vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ var (
Buckets: relativeBuckets,
}, []string{"update_mode", "resource", "vpa_size_log2"},
)
lowerThanMinRecommendationCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Name: "lower_than_min_recommendation_count",
Help: "Count of usage samples when resource recommendation values are lower than the minimum",
}, []string{"update_mode", "resource", "vpa"},
)
)

// Register initializes all VPA quality metrics
Expand All @@ -129,6 +136,7 @@ func Register() {
prometheus.MustRegister(cpuRecommendations)
prometheus.MustRegister(memoryRecommendations)
prometheus.MustRegister(relativeRecommendationChange)
prometheus.MustRegister(lowerThanMinRecommendationCounter)
}

// observeUsageRecommendationRelativeDiff records relative diff between usage and
Expand Down Expand Up @@ -222,6 +230,11 @@ func ObserveRecommendationChange(previous, current corev1.ResourceList, updateMo
}
}

// ObserveLowerThanMinRecommendation counts usage samples with lower than min memory recommendations.
func ObserveLowerThanMinRecommendation(updateMode *vpa_types.UpdateMode, resource corev1.ResourceName, vpaKey string) {
lowerThanMinRecommendationCounter.WithLabelValues(updateModeToString(updateMode), string(resource), vpaKey).Inc()
}

// quantityAsFloat converts resource.Quantity to a float64 value, in some scale (constant per resource but unspecified)
func quantityAsFloat(resource corev1.ResourceName, quantity resource.Quantity) float64 {
switch resource {
Expand Down

0 comments on commit 2cf2b46

Please sign in to comment.