Skip to content

Commit d77fae7

Browse files
GuptaManan100arthurschreiber
authored andcommitted
feat: add tests for processing of health checks and also fix a bug
Signed-off-by: Manan Gupta <[email protected]>
1 parent 517f519 commit d77fae7

File tree

2 files changed

+239
-9
lines changed

2 files changed

+239
-9
lines changed

go/vt/discovery/keyspace_events.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -381,17 +381,19 @@ func (kss *keyspaceState) onHealthCheck(th *TabletHealth) {
381381
sstate.serving = true
382382
case !th.Serving:
383383
sstate.serving = false
384-
// Once we have seen a non-serving primary healthcheck, there is no need for us to explicitly wait
385-
// for a reparent to happen. We use waitForReparent to ensure that we don't prematurely stop
386-
// buffering when we receive a serving healthcheck from the primary that is being demoted.
387-
// However, if we receive a non-serving check, then we know that we won't receive any more serving
388-
// healthchecks anymore until reparent finishes. Specifically, this helps us when PRS fails, but
389-
// stops gracefully because the new candidate couldn't get caught up in time. In this case, we promote
390-
// the previous primary back. Without turning off waitForReparent here, we wouldn't be able to stop
391-
// buffering for that case.
392-
sstate.waitForReparent = false
393384
}
394385
}
386+
if !th.Serving {
387+
// Once we have seen a non-serving primary healthcheck, there is no need for us to explicitly wait
388+
// for a reparent to happen. We use waitForReparent to ensure that we don't prematurely stop
389+
// buffering when we receive a serving healthcheck from the primary that is being demoted.
390+
// However, if we receive a non-serving check, then we know that we won't receive any more serving
391+
// healthchecks anymore until reparent finishes. Specifically, this helps us when PRS fails, but
392+
// stops gracefully because the new candidate couldn't get caught up in time. In this case, we promote
393+
// the previous primary back. Without turning off waitForReparent here, we wouldn't be able to stop
394+
// buffering for that case.
395+
sstate.waitForReparent = false
396+
}
395397

396398
// if the primary for this shard has been externally reparented, we're undergoing a failover,
397399
// which is considered an availability event. update this shard to point it to the new tablet

go/vt/discovery/keyspace_events_test.go

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,234 @@ func TestWaitForConsistentKeyspaces(t *testing.T) {
421421
}
422422
}
423423

424+
func TestOnHealthCheck(t *testing.T) {
425+
testcases := []struct {
426+
name string
427+
ss *shardState
428+
th *TabletHealth
429+
wantServing bool
430+
wantWaitForReparent bool
431+
wantExternallyReparented int64
432+
wantUID uint32
433+
}{
434+
{
435+
name: "Non primary tablet health ignored",
436+
ss: &shardState{
437+
serving: false,
438+
waitForReparent: false,
439+
externallyReparented: 10,
440+
currentPrimary: &topodatapb.TabletAlias{
441+
Cell: testCell,
442+
Uid: 1,
443+
},
444+
},
445+
th: &TabletHealth{
446+
Target: &querypb.Target{
447+
TabletType: topodatapb.TabletType_REPLICA,
448+
},
449+
Serving: true,
450+
},
451+
wantServing: false,
452+
wantWaitForReparent: false,
453+
wantExternallyReparented: 10,
454+
wantUID: 1,
455+
}, {
456+
name: "Serving primary seen in non-serving shard",
457+
ss: &shardState{
458+
serving: false,
459+
waitForReparent: false,
460+
externallyReparented: 10,
461+
currentPrimary: &topodatapb.TabletAlias{
462+
Cell: testCell,
463+
Uid: 1,
464+
},
465+
},
466+
th: &TabletHealth{
467+
Target: &querypb.Target{
468+
TabletType: topodatapb.TabletType_PRIMARY,
469+
},
470+
Serving: true,
471+
PrimaryTermStartTime: 20,
472+
Tablet: &topodatapb.Tablet{
473+
Alias: &topodatapb.TabletAlias{
474+
Cell: testCell,
475+
Uid: 2,
476+
},
477+
},
478+
},
479+
wantServing: true,
480+
wantWaitForReparent: false,
481+
wantExternallyReparented: 20,
482+
wantUID: 2,
483+
}, {
484+
name: "New serving primary seen while waiting for reparent",
485+
ss: &shardState{
486+
serving: false,
487+
waitForReparent: true,
488+
externallyReparented: 10,
489+
currentPrimary: &topodatapb.TabletAlias{
490+
Cell: testCell,
491+
Uid: 1,
492+
},
493+
},
494+
th: &TabletHealth{
495+
Target: &querypb.Target{
496+
TabletType: topodatapb.TabletType_PRIMARY,
497+
},
498+
Serving: true,
499+
PrimaryTermStartTime: 20,
500+
Tablet: &topodatapb.Tablet{
501+
Alias: &topodatapb.TabletAlias{
502+
Cell: testCell,
503+
Uid: 2,
504+
},
505+
},
506+
},
507+
wantServing: true,
508+
wantWaitForReparent: false,
509+
wantExternallyReparented: 20,
510+
wantUID: 2,
511+
}, {
512+
name: "Old serving primary seen while waiting for reparent",
513+
ss: &shardState{
514+
serving: false,
515+
waitForReparent: true,
516+
externallyReparented: 10,
517+
currentPrimary: &topodatapb.TabletAlias{
518+
Cell: testCell,
519+
Uid: 1,
520+
},
521+
},
522+
th: &TabletHealth{
523+
Target: &querypb.Target{
524+
TabletType: topodatapb.TabletType_PRIMARY,
525+
},
526+
Serving: true,
527+
PrimaryTermStartTime: 10,
528+
Tablet: &topodatapb.Tablet{
529+
Alias: &topodatapb.TabletAlias{
530+
Cell: testCell,
531+
Uid: 1,
532+
},
533+
},
534+
},
535+
wantServing: false,
536+
wantWaitForReparent: true,
537+
wantExternallyReparented: 10,
538+
wantUID: 1,
539+
}, {
540+
name: "Old non-serving primary seen while waiting for reparent",
541+
ss: &shardState{
542+
serving: false,
543+
waitForReparent: true,
544+
externallyReparented: 10,
545+
currentPrimary: &topodatapb.TabletAlias{
546+
Cell: testCell,
547+
Uid: 1,
548+
},
549+
},
550+
th: &TabletHealth{
551+
Target: &querypb.Target{
552+
TabletType: topodatapb.TabletType_PRIMARY,
553+
},
554+
Serving: false,
555+
PrimaryTermStartTime: 10,
556+
Tablet: &topodatapb.Tablet{
557+
Alias: &topodatapb.TabletAlias{
558+
Cell: testCell,
559+
Uid: 1,
560+
},
561+
},
562+
},
563+
wantServing: false,
564+
wantWaitForReparent: false,
565+
wantExternallyReparented: 10,
566+
wantUID: 1,
567+
}, {
568+
name: "New serving primary while already serving",
569+
ss: &shardState{
570+
serving: true,
571+
waitForReparent: false,
572+
externallyReparented: 10,
573+
currentPrimary: &topodatapb.TabletAlias{
574+
Cell: testCell,
575+
Uid: 1,
576+
},
577+
},
578+
th: &TabletHealth{
579+
Target: &querypb.Target{
580+
TabletType: topodatapb.TabletType_PRIMARY,
581+
},
582+
Serving: true,
583+
PrimaryTermStartTime: 20,
584+
Tablet: &topodatapb.Tablet{
585+
Alias: &topodatapb.TabletAlias{
586+
Cell: testCell,
587+
Uid: 2,
588+
},
589+
},
590+
},
591+
wantServing: true,
592+
wantWaitForReparent: false,
593+
wantExternallyReparented: 20,
594+
wantUID: 2,
595+
}, {
596+
name: "Primary goes non serving",
597+
ss: &shardState{
598+
serving: true,
599+
waitForReparent: false,
600+
externallyReparented: 10,
601+
currentPrimary: &topodatapb.TabletAlias{
602+
Cell: testCell,
603+
Uid: 1,
604+
},
605+
},
606+
th: &TabletHealth{
607+
Target: &querypb.Target{
608+
TabletType: topodatapb.TabletType_PRIMARY,
609+
},
610+
Serving: false,
611+
PrimaryTermStartTime: 10,
612+
Tablet: &topodatapb.Tablet{
613+
Alias: &topodatapb.TabletAlias{
614+
Cell: testCell,
615+
Uid: 1,
616+
},
617+
},
618+
},
619+
wantServing: false,
620+
wantWaitForReparent: false,
621+
wantExternallyReparented: 10,
622+
wantUID: 1,
623+
},
624+
}
625+
626+
ksName := "ks"
627+
shard := "-80"
628+
kss := &keyspaceState{
629+
mu: sync.Mutex{},
630+
keyspace: ksName,
631+
shards: make(map[string]*shardState),
632+
}
633+
// Adding this so that we don't run any topo calls from ensureConsistentLocked.
634+
kss.moveTablesState = &MoveTablesState{
635+
Typ: MoveTablesRegular,
636+
State: MoveTablesSwitching,
637+
}
638+
for _, tt := range testcases {
639+
t.Run(tt.name, func(t *testing.T) {
640+
kss.shards[shard] = tt.ss
641+
tt.th.Target.Keyspace = ksName
642+
tt.th.Target.Shard = shard
643+
kss.onHealthCheck(tt.th)
644+
require.Equal(t, tt.wantServing, tt.ss.serving)
645+
require.Equal(t, tt.wantWaitForReparent, tt.ss.waitForReparent)
646+
require.Equal(t, tt.wantExternallyReparented, tt.ss.externallyReparented)
647+
require.Equal(t, tt.wantUID, tt.ss.currentPrimary.Uid)
648+
})
649+
}
650+
}
651+
424652
type fakeTopoServer struct {
425653
}
426654

0 commit comments

Comments
 (0)