maintenance.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "fmt"
  18. "io"
  19. pb "go.etcd.io/etcd/api/v3/etcdserverpb"
  20. "go.uber.org/zap"
  21. "google.golang.org/grpc"
  22. )
  23. type (
  24. DefragmentResponse pb.DefragmentResponse
  25. AlarmResponse pb.AlarmResponse
  26. AlarmMember pb.AlarmMember
  27. StatusResponse pb.StatusResponse
  28. HashKVResponse pb.HashKVResponse
  29. MoveLeaderResponse pb.MoveLeaderResponse
  30. )
  31. type Maintenance interface {
  32. // AlarmList gets all active alarms.
  33. AlarmList(ctx context.Context) (*AlarmResponse, error)
  34. // AlarmDisarm disarms a given alarm.
  35. AlarmDisarm(ctx context.Context, m *AlarmMember) (*AlarmResponse, error)
  36. // Defragment releases wasted space from internal fragmentation on a given etcd member.
  37. // Defragment is only needed when deleting a large number of keys and want to reclaim
  38. // the resources.
  39. // Defragment is an expensive operation. User should avoid defragmenting multiple members
  40. // at the same time.
  41. // To defragment multiple members in the cluster, user need to call defragment multiple
  42. // times with different endpoints.
  43. Defragment(ctx context.Context, endpoint string) (*DefragmentResponse, error)
  44. // Status gets the status of the endpoint.
  45. Status(ctx context.Context, endpoint string) (*StatusResponse, error)
  46. // HashKV returns a hash of the KV state at the time of the RPC.
  47. // If revision is zero, the hash is computed on all keys. If the revision
  48. // is non-zero, the hash is computed on all keys at or below the given revision.
  49. HashKV(ctx context.Context, endpoint string, rev int64) (*HashKVResponse, error)
  50. // Snapshot provides a reader for a point-in-time snapshot of etcd.
  51. // If the context "ctx" is canceled or timed out, reading from returned
  52. // "io.ReadCloser" would error out (e.g. context.Canceled, context.DeadlineExceeded).
  53. Snapshot(ctx context.Context) (io.ReadCloser, error)
  54. // MoveLeader requests current leader to transfer its leadership to the transferee.
  55. // Request must be made to the leader.
  56. MoveLeader(ctx context.Context, transfereeID uint64) (*MoveLeaderResponse, error)
  57. }
  58. type maintenance struct {
  59. lg *zap.Logger
  60. dial func(endpoint string) (pb.MaintenanceClient, func(), error)
  61. remote pb.MaintenanceClient
  62. callOpts []grpc.CallOption
  63. }
  64. func NewMaintenance(c *Client) Maintenance {
  65. api := &maintenance{
  66. lg: c.lg,
  67. dial: func(endpoint string) (pb.MaintenanceClient, func(), error) {
  68. conn, err := c.Dial(endpoint)
  69. if err != nil {
  70. return nil, nil, fmt.Errorf("failed to dial endpoint %s with maintenance client: %v", endpoint, err)
  71. }
  72. //get token with established connection
  73. dctx := c.ctx
  74. cancel := func() {}
  75. if c.cfg.DialTimeout > 0 {
  76. dctx, cancel = context.WithTimeout(c.ctx, c.cfg.DialTimeout)
  77. }
  78. err = c.getToken(dctx)
  79. cancel()
  80. if err != nil {
  81. conn.Close()
  82. return nil, nil, fmt.Errorf("failed to getToken from endpoint %s with maintenance client: %v", endpoint, err)
  83. }
  84. cancel = func() { conn.Close() }
  85. return RetryMaintenanceClient(c, conn), cancel, nil
  86. },
  87. remote: RetryMaintenanceClient(c, c.conn),
  88. }
  89. if c != nil {
  90. api.callOpts = c.callOpts
  91. }
  92. return api
  93. }
  94. func NewMaintenanceFromMaintenanceClient(remote pb.MaintenanceClient, c *Client) Maintenance {
  95. api := &maintenance{
  96. lg: c.lg,
  97. dial: func(string) (pb.MaintenanceClient, func(), error) {
  98. return remote, func() {}, nil
  99. },
  100. remote: remote,
  101. }
  102. if c != nil {
  103. api.callOpts = c.callOpts
  104. }
  105. return api
  106. }
  107. func (m *maintenance) AlarmList(ctx context.Context) (*AlarmResponse, error) {
  108. req := &pb.AlarmRequest{
  109. Action: pb.AlarmRequest_GET,
  110. MemberID: 0, // all
  111. Alarm: pb.AlarmType_NONE, // all
  112. }
  113. resp, err := m.remote.Alarm(ctx, req, m.callOpts...)
  114. if err == nil {
  115. return (*AlarmResponse)(resp), nil
  116. }
  117. return nil, toErr(ctx, err)
  118. }
  119. func (m *maintenance) AlarmDisarm(ctx context.Context, am *AlarmMember) (*AlarmResponse, error) {
  120. req := &pb.AlarmRequest{
  121. Action: pb.AlarmRequest_DEACTIVATE,
  122. MemberID: am.MemberID,
  123. Alarm: am.Alarm,
  124. }
  125. if req.MemberID == 0 && req.Alarm == pb.AlarmType_NONE {
  126. ar, err := m.AlarmList(ctx)
  127. if err != nil {
  128. return nil, toErr(ctx, err)
  129. }
  130. ret := AlarmResponse{}
  131. for _, am := range ar.Alarms {
  132. dresp, derr := m.AlarmDisarm(ctx, (*AlarmMember)(am))
  133. if derr != nil {
  134. return nil, toErr(ctx, derr)
  135. }
  136. ret.Alarms = append(ret.Alarms, dresp.Alarms...)
  137. }
  138. return &ret, nil
  139. }
  140. resp, err := m.remote.Alarm(ctx, req, m.callOpts...)
  141. if err == nil {
  142. return (*AlarmResponse)(resp), nil
  143. }
  144. return nil, toErr(ctx, err)
  145. }
  146. func (m *maintenance) Defragment(ctx context.Context, endpoint string) (*DefragmentResponse, error) {
  147. remote, cancel, err := m.dial(endpoint)
  148. if err != nil {
  149. return nil, toErr(ctx, err)
  150. }
  151. defer cancel()
  152. resp, err := remote.Defragment(ctx, &pb.DefragmentRequest{}, m.callOpts...)
  153. if err != nil {
  154. return nil, toErr(ctx, err)
  155. }
  156. return (*DefragmentResponse)(resp), nil
  157. }
  158. func (m *maintenance) Status(ctx context.Context, endpoint string) (*StatusResponse, error) {
  159. remote, cancel, err := m.dial(endpoint)
  160. if err != nil {
  161. return nil, toErr(ctx, err)
  162. }
  163. defer cancel()
  164. resp, err := remote.Status(ctx, &pb.StatusRequest{}, m.callOpts...)
  165. if err != nil {
  166. return nil, toErr(ctx, err)
  167. }
  168. return (*StatusResponse)(resp), nil
  169. }
  170. func (m *maintenance) HashKV(ctx context.Context, endpoint string, rev int64) (*HashKVResponse, error) {
  171. remote, cancel, err := m.dial(endpoint)
  172. if err != nil {
  173. return nil, toErr(ctx, err)
  174. }
  175. defer cancel()
  176. resp, err := remote.HashKV(ctx, &pb.HashKVRequest{Revision: rev}, m.callOpts...)
  177. if err != nil {
  178. return nil, toErr(ctx, err)
  179. }
  180. return (*HashKVResponse)(resp), nil
  181. }
  182. func (m *maintenance) Snapshot(ctx context.Context) (io.ReadCloser, error) {
  183. ss, err := m.remote.Snapshot(ctx, &pb.SnapshotRequest{}, append(m.callOpts, withMax(defaultStreamMaxRetries))...)
  184. if err != nil {
  185. return nil, toErr(ctx, err)
  186. }
  187. m.lg.Info("opened snapshot stream; downloading")
  188. pr, pw := io.Pipe()
  189. go func() {
  190. for {
  191. resp, err := ss.Recv()
  192. if err != nil {
  193. switch err {
  194. case io.EOF:
  195. m.lg.Info("completed snapshot read; closing")
  196. default:
  197. m.lg.Warn("failed to receive from snapshot stream; closing", zap.Error(err))
  198. }
  199. pw.CloseWithError(err)
  200. return
  201. }
  202. // can "resp == nil && err == nil"
  203. // before we receive snapshot SHA digest?
  204. // No, server sends EOF with an empty response
  205. // after it sends SHA digest at the end
  206. if _, werr := pw.Write(resp.Blob); werr != nil {
  207. pw.CloseWithError(werr)
  208. return
  209. }
  210. }
  211. }()
  212. return &snapshotReadCloser{ctx: ctx, ReadCloser: pr}, nil
  213. }
  214. type snapshotReadCloser struct {
  215. ctx context.Context
  216. io.ReadCloser
  217. }
  218. func (rc *snapshotReadCloser) Read(p []byte) (n int, err error) {
  219. n, err = rc.ReadCloser.Read(p)
  220. return n, toErr(rc.ctx, err)
  221. }
  222. func (m *maintenance) MoveLeader(ctx context.Context, transfereeID uint64) (*MoveLeaderResponse, error) {
  223. resp, err := m.remote.MoveLeader(ctx, &pb.MoveLeaderRequest{TargetID: transfereeID}, m.callOpts...)
  224. return (*MoveLeaderResponse)(resp), toErr(ctx, err)
  225. }