Skip to content

Commit 0534dc5

Browse files
Properly handle grpc dial errors.
Signed-off-by: Arthur Schreiber <[email protected]>
1 parent 6e5203b commit 0534dc5

File tree

3 files changed

+119
-1
lines changed

3 files changed

+119
-1
lines changed

go/vt/vttablet/tabletserver/throttle/base/metric_result.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ package base
1919
import (
2020
"errors"
2121
"net"
22+
23+
"google.golang.org/grpc/codes"
24+
"google.golang.org/grpc/status"
2225
)
2326

2427
// MetricResult is what we expect our probes to return. This can be a numeric result, or
@@ -58,6 +61,11 @@ func IsDialTCPError(err error) bool {
5861
if err == nil {
5962
return false
6063
}
64+
65+
if s, ok := status.FromError(err); ok {
66+
return s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded
67+
}
68+
6169
switch err := err.(type) {
6270
case *net.OpError:
6371
return err.Op == "dial" && err.Net == "tcp"

go/vt/vttablet/tabletserver/throttle/throttler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,7 @@ func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, probe
907907
req := &tabletmanagerdatapb.CheckThrottlerRequest{} // We leave AppName empty; it will default to VitessName anyway, and we can save some proto space
908908
resp, gRPCErr := tmClient.CheckThrottler(ctx, probe.Tablet, req)
909909
if gRPCErr != nil {
910-
return metricsWithError(fmt.Errorf("gRPC error accessing tablet %v. Err=%v", probe.Alias, gRPCErr))
910+
return metricsWithError(fmt.Errorf("gRPC error accessing tablet %v. Err=%w", probe.Alias, gRPCErr))
911911
}
912912
throttleMetric.Value = resp.Value
913913
if resp.ResponseCode == tabletmanagerdatapb.CheckThrottlerResponseCode_INTERNAL_ERROR {

go/vt/vttablet/tabletserver/throttle/throttler_test.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,15 @@ import (
3030
"github.com/stretchr/testify/require"
3131
"golang.org/x/exp/maps"
3232

33+
"google.golang.org/grpc"
34+
"google.golang.org/grpc/credentials/insecure"
35+
3336
"vitess.io/vitess/go/protoutil"
37+
38+
"vitess.io/vitess/go/vt/grpcclient"
3439
"vitess.io/vitess/go/vt/topo"
3540
"vitess.io/vitess/go/vt/vtenv"
41+
"vitess.io/vitess/go/vt/vttablet/grpctmclient"
3642
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
3743
"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
3844
"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base"
@@ -837,6 +843,110 @@ func TestApplyThrottlerConfigAppCheckedMetrics(t *testing.T) {
837843
})
838844
}
839845

846+
func TestIsDialTCPError(t *testing.T) {
847+
// Verify that IsDialTCPError actually recognizes grpc dial errors
848+
cc, err := grpcclient.DialContext(t.Context(), ":0", true, grpc.WithTransportCredentials(insecure.NewCredentials()))
849+
require.NoError(t, err)
850+
defer cc.Close()
851+
852+
err = cc.Invoke(context.Background(), "/Fail", nil, nil)
853+
854+
require.True(t, base.IsDialTCPError(err))
855+
require.True(t, base.IsDialTCPError(fmt.Errorf("wrapped: %w", err)))
856+
}
857+
858+
func TestProbeWithUnavailableHost(t *testing.T) {
859+
throttler := Throttler{
860+
throttledApps: cache.New(cache.NoExpiration, 0),
861+
heartbeatWriter: &FakeHeartbeatWriter{},
862+
}
863+
864+
alias := &topodatapb.TabletAlias{
865+
Cell: "cell1",
866+
Uid: 100,
867+
}
868+
869+
// The hostname used here is not routable, so the connection will fail.
870+
tablet := &topo.TabletInfo{
871+
Tablet: &topodatapb.Tablet{
872+
Alias: alias,
873+
Hostname: "192.0.2.0",
874+
MysqlHostname: "192.0.2.0",
875+
MysqlPort: 3306,
876+
PortMap: map[string]int32{"grpc": 5000},
877+
Type: topodatapb.TabletType_PRIMARY,
878+
},
879+
}
880+
881+
probe := &base.Probe{
882+
Alias: "cell1-100",
883+
Tablet: tablet.Tablet,
884+
CacheMillis: 100,
885+
}
886+
887+
tmClient := grpctmclient.NewClient()
888+
889+
probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
890+
891+
metrics := probeFunc(t.Context(), tmClient)
892+
require.True(t, base.IsDialTCPError(metrics["custom"].Err))
893+
894+
tabletResultsMap := base.TabletResultMap{
895+
"cell1-100": base.MetricResultMap{
896+
"custom": metrics["custom"],
897+
},
898+
}
899+
900+
worstMetric := base.AggregateTabletMetricResults("custom", tabletResultsMap, 0, true, 0.0)
901+
require.Equal(t, base.NoHostsMetricResult, worstMetric)
902+
}
903+
904+
func TestProbeWithEmptyHostAndPort(t *testing.T) {
905+
throttler := Throttler{
906+
throttledApps: cache.New(cache.NoExpiration, 0),
907+
heartbeatWriter: &FakeHeartbeatWriter{},
908+
}
909+
910+
alias := &topodatapb.TabletAlias{
911+
Cell: "cell1",
912+
Uid: 100,
913+
}
914+
915+
// The hostname used here is not routable, so the connection will fail.
916+
tablet := &topo.TabletInfo{
917+
Tablet: &topodatapb.Tablet{
918+
Alias: alias,
919+
Hostname: "",
920+
MysqlHostname: "192.0.2.0",
921+
MysqlPort: 3306,
922+
PortMap: map[string]int32{"grpc": 0},
923+
Type: topodatapb.TabletType_PRIMARY,
924+
},
925+
}
926+
927+
probe := &base.Probe{
928+
Alias: "cell1-100",
929+
Tablet: tablet.Tablet,
930+
CacheMillis: 100,
931+
}
932+
933+
tmClient := grpctmclient.NewClient()
934+
935+
probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
936+
937+
metrics := probeFunc(t.Context(), tmClient)
938+
require.True(t, base.IsDialTCPError(metrics["custom"].Err))
939+
940+
tabletResultsMap := base.TabletResultMap{
941+
"cell1-100": base.MetricResultMap{
942+
"custom": metrics["custom"],
943+
},
944+
}
945+
946+
worstMetric := base.AggregateTabletMetricResults("custom", tabletResultsMap, 0, true, 0.0)
947+
require.Equal(t, base.NoHostsMetricResult, worstMetric)
948+
}
949+
840950
func TestIsAppThrottled(t *testing.T) {
841951
plusOneHour := time.Now().Add(time.Hour)
842952
throttler := Throttler{

0 commit comments

Comments
 (0)