Skip to content

Commit

Permalink
Add IsRedisRunning checks to ensure that pods are up and running
Browse files Browse the repository at this point in the history
  • Loading branch information
ese committed Dec 1, 2022
1 parent 5f76b6e commit 1e46182
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 11 deletions.
42 changes: 42 additions & 0 deletions mocks/operator/redisfailover/service/RedisFailoverCheck.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 7 additions & 8 deletions operator/redisfailover/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,14 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
}

func (r *RedisFailoverHandler) checkAndHealBootstrapMode(rf *redisfailoverv1.RedisFailover) error {
err := r.rfChecker.CheckRedisNumber(rf)
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.REDIS_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, err)
if err != nil {

if !r.rfChecker.IsRedisRunning(rf) {
setRedisCheckerMetrics(r.mClient, "redis", rf.Namespace, rf.Name, metrics.REDIS_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New("not all replicas running"))
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Number of redis mismatch, waiting for redis statefulset reconcile")
return nil
}

err = r.UpdateRedisesPods(rf)
err := r.UpdateRedisesPods(rf)
if err != nil {
return err
}
Expand All @@ -221,10 +221,9 @@ func (r *RedisFailoverHandler) checkAndHealBootstrapMode(rf *redisfailoverv1.Red
}

if rf.SentinelsAllowed() {
err = r.rfChecker.CheckSentinelNumber(rf)
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, err)
if err != nil {
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Number of sentinel mismatch, waiting for sentinel deployment reconcile")
if !r.rfChecker.IsSentinelRunning(rf) {
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_REPLICA_MISMATCH, metrics.NOT_APPLICABLE, errors.New("not all replicas running"))
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Number of sentinel mismatch, waiting for sentinel deployment reconcile")
return nil
}

Expand Down
6 changes: 3 additions & 3 deletions operator/redisfailover/checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,14 +265,14 @@ func TestCheckAndHeal(t *testing.T) {
mrfh := &mRFService.RedisFailoverHeal{}

if test.redisCheckNumberOK {
mrfc.On("CheckRedisNumber", rf).Once().Return(nil)
mrfc.On("IsRedisRunning", rf).Once().Return(true)
} else {
continueTests = false
mrfc.On("CheckRedisNumber", rf).Once().Return(errors.New(""))
mrfc.On("IsRedisRunning", rf).Once().Return(false)
}

if allowSentinels {
mrfc.On("CheckSentinelNumber", rf).Once().Return(nil)
mrfc.On("IsSentinelRunning", rf).Once().Return(true)
}

if bootstrappingTests && continueTests {
Expand Down
29 changes: 29 additions & 0 deletions operator/redisfailover/service/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ type RedisFailoverCheck interface {
GetStatefulSetUpdateRevision(rFailover *redisfailoverv1.RedisFailover) (string, error)
GetRedisRevisionHash(podName string, rFailover *redisfailoverv1.RedisFailover) (string, error)
CheckRedisSlavesReady(slaveIP string, rFailover *redisfailoverv1.RedisFailover) (bool, error)
IsRedisRunning(rFailover *redisfailoverv1.RedisFailover) bool
IsSentinelRunning(rFailover *redisfailoverv1.RedisFailover) bool
IsClusterRunning(rFailover *redisfailoverv1.RedisFailover) bool
}

// RedisFailoverChecker is our implementation of RedisFailoverCheck interface
Expand Down Expand Up @@ -385,6 +388,32 @@ func (r *RedisFailoverChecker) CheckRedisSlavesReady(ip string, rFailover *redis
return r.redisClient.SlaveIsReady(ip, port, password)
}

// IsRedisRunning returns true if all the pods are Running
func (r *RedisFailoverChecker) IsRedisRunning(rFailover *redisfailoverv1.RedisFailover) bool {
dp, err := r.k8sService.GetStatefulSetPods(rFailover.Namespace, GetRedisName(rFailover))
return err == nil && len(dp.Items) > int(rFailover.Spec.Redis.Replicas-1) && AreAllRunning(dp)
}

// IsSentinelRunning returns true if all the pods are Running
func (r *RedisFailoverChecker) IsSentinelRunning(rFailover *redisfailoverv1.RedisFailover) bool {
dp, err := r.k8sService.GetDeploymentPods(rFailover.Namespace, GetSentinelName(rFailover))
return err == nil && len(dp.Items) > int(rFailover.Spec.Redis.Replicas-1) && AreAllRunning(dp)
}

// IsClusterRunning returns true if all the pods in the given redisfailover are Running
func (r *RedisFailoverChecker) IsClusterRunning(rFailover *redisfailoverv1.RedisFailover) bool {
return r.IsSentinelRunning(rFailover) && r.IsRedisRunning(rFailover)
}

func getRedisPort(p int32) string {
return strconv.Itoa(int(p))
}

func AreAllRunning(pods *corev1.PodList) bool {
for _, pod := range pods.Items {
if pod.Status.Phase != corev1.PodRunning || pod.DeletionTimestamp != nil {
return false
}
}
return true
}
87 changes: 87 additions & 0 deletions operator/redisfailover/service/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -869,3 +869,90 @@ func TestGetRedisRevisionHash(t *testing.T) {
}

}

func TestClusterRunning(t *testing.T) {
assert := assert.New(t)

rf := generateRF()

allRunning := &corev1.PodList{
Items: []corev1.Pod{
{
Status: corev1.PodStatus{
PodIP: "0.0.0.0",
Phase: corev1.PodRunning,
},
},
{
Status: corev1.PodStatus{
PodIP: "1.1.1.1",
Phase: corev1.PodRunning,
},
},
{
Status: corev1.PodStatus{
PodIP: "1.1.1.1",
Phase: corev1.PodRunning,
},
},
},
}

notAllRunning := &corev1.PodList{
Items: []corev1.Pod{
{
Status: corev1.PodStatus{
PodIP: "0.0.0.0",
Phase: corev1.PodRunning,
},
},
{
Status: corev1.PodStatus{
PodIP: "1.1.1.1",
Phase: corev1.PodPending,
},
},
{
Status: corev1.PodStatus{
PodIP: "1.1.1.1",
Phase: corev1.PodRunning,
},
},
},
}

notAllReplicas := &corev1.PodList{
Items: []corev1.Pod{
{
Status: corev1.PodStatus{
PodIP: "0.0.0.0",
Phase: corev1.PodRunning,
},
},
{
Status: corev1.PodStatus{
PodIP: "1.1.1.1",
Phase: corev1.PodRunning,
},
},
},
}

ms := &mK8SService.Services{}
ms.On("GetDeploymentPods", namespace, rfservice.GetSentinelName(rf)).Once().Return(allRunning, nil)
ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(allRunning, nil)
mr := &mRedisService.Client{}

checker := rfservice.NewRedisFailoverChecker(ms, mr, log.DummyLogger{}, metrics.Dummy)

assert.True(checker.IsClusterRunning(rf))

ms.On("GetDeploymentPods", namespace, rfservice.GetSentinelName(rf)).Once().Return(allRunning, nil)
ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(notAllReplicas, nil)
assert.False(checker.IsClusterRunning(rf))

ms.On("GetDeploymentPods", namespace, rfservice.GetSentinelName(rf)).Once().Return(notAllRunning, nil)
ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(allRunning, nil)
assert.False(checker.IsClusterRunning(rf))

}

0 comments on commit 1e46182

Please sign in to comment.