From 03acc681722268d38ad3b41109e9848d3245f306 Mon Sep 17 00:00:00 2001 From: Wonkun Kim Date: Fri, 3 Jun 2022 16:25:08 -0500 Subject: [PATCH 4/4] Add retry on listing members in join command --- cmd/join.go | 51 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/cmd/join.go b/cmd/join.go index 38ab2e17..2c6c06a4 100644 --- a/cmd/join.go +++ b/cmd/join.go @@ -134,6 +134,23 @@ func membership() phase { return nil } + var lastErr error + retrySteps := 1 + if in.etcdAdmConfig.Retry { + retrySteps = constants.DefaultBackOffSteps + } + + // Exponential backoff for MemberList and MemberAdd (values exclude jitter): + // If --retry=false, add member only try one times, otherwise try five times. + // The backoff duration is 0, 2, 4, 8, 16 s + opts := wait.Backoff{ + Duration: constants.DefaultBackOffDuration, + Steps: retrySteps, + Factor: constants.DefaultBackOffFactor, + Jitter: 0.1, + } + + var localMember *etcd.Member var members []*etcd.Member log.Println("[membership] Checking if this member was added") log.Printf("[membership] Generating etcd client with endpoints %s", in.etcdAdmConfig.Endpoints) @@ -143,13 +160,22 @@ func membership() phase { } defer client.Close() - ctx, cancel := context.WithTimeout(context.Background(), constants.DefaultEtcdRequestTimeout) - mresp, err := client.MemberList(ctx) - cancel() + err = wait.ExponentialBackoff(opts, func() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), constants.DefaultEtcdRequestTimeout) + mresp, err := client.MemberList(ctx) + cancel() + if err != nil { + log.Warningf("[membership] Error listing members: %v, will retry after %s.", err, opts.Step()) + lastErr = err + return false, nil + } + members = mresp.Members + return true, nil + }) if err != nil { - return fmt.Errorf("error listing members: %v", err) + return fmt.Errorf("error listing members: %v", lastErr) } - members = mresp.Members + localMember, ok := etcd.MemberForPeerURLs(members, in.etcdAdmConfig.InitialAdvertisePeerURLs.StringSlice()) if !ok { log.Printf("[membership] Member was not added") @@ -157,21 +183,6 @@ func membership() phase { os.RemoveAll(in.etcdAdmConfig.DataDir) log.Println("[membership] Adding member") - var lastErr error - retrySteps := 1 - if in.etcdAdmConfig.Retry { - retrySteps = constants.DefaultBackOffSteps - } - - // Exponential backoff for MemberAdd (values exclude jitter): - // If --retry=false, add member only try one times, otherwise try five times. - // The backoff duration is 0, 2, 4, 8, 16 s - opts := wait.Backoff{ - Duration: constants.DefaultBackOffDuration, - Steps: retrySteps, - Factor: constants.DefaultBackOffFactor, - Jitter: 0.1, - } err := wait.ExponentialBackoff(opts, func() (bool, error) { ctx, cancel := context.WithTimeout(context.Background(), constants.DefaultEtcdRequestTimeout) mresp, err := client.MemberAdd(ctx, in.etcdAdmConfig.InitialAdvertisePeerURLs.StringSlice()) -- 2.39.2