You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

clientconn.go 44 kB

4 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423
  1. /*
  2. *
  3. * Copyright 2014 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package grpc
  19. import (
  20. "context"
  21. "errors"
  22. "fmt"
  23. "math"
  24. "net"
  25. "reflect"
  26. "strings"
  27. "sync"
  28. "sync/atomic"
  29. "time"
  30. "google.golang.org/grpc/balancer"
  31. _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin.
  32. "google.golang.org/grpc/codes"
  33. "google.golang.org/grpc/connectivity"
  34. "google.golang.org/grpc/credentials"
  35. "google.golang.org/grpc/grpclog"
  36. "google.golang.org/grpc/internal/backoff"
  37. "google.golang.org/grpc/internal/channelz"
  38. "google.golang.org/grpc/internal/envconfig"
  39. "google.golang.org/grpc/internal/grpcsync"
  40. "google.golang.org/grpc/internal/transport"
  41. "google.golang.org/grpc/keepalive"
  42. "google.golang.org/grpc/resolver"
  43. _ "google.golang.org/grpc/resolver/dns" // To register dns resolver.
  44. _ "google.golang.org/grpc/resolver/passthrough" // To register passthrough resolver.
  45. "google.golang.org/grpc/status"
  46. )
  47. const (
  48. // minimum time to give a connection to complete
  49. minConnectTimeout = 20 * time.Second
  50. // must match grpclbName in grpclb/grpclb.go
  51. grpclbName = "grpclb"
  52. )
  53. var (
  54. // ErrClientConnClosing indicates that the operation is illegal because
  55. // the ClientConn is closing.
  56. //
  57. // Deprecated: this error should not be relied upon by users; use the status
  58. // code of Canceled instead.
  59. ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing")
  60. // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs.
  61. errConnDrain = errors.New("grpc: the connection is drained")
  62. // errConnClosing indicates that the connection is closing.
  63. errConnClosing = errors.New("grpc: the connection is closing")
  64. // errBalancerClosed indicates that the balancer is closed.
  65. errBalancerClosed = errors.New("grpc: balancer is closed")
  66. // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default
  67. // service config.
  68. invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid"
  69. )
  70. // The following errors are returned from Dial and DialContext
  71. var (
  72. // errNoTransportSecurity indicates that there is no transport security
  73. // being set for ClientConn. Users should either set one or explicitly
  74. // call WithInsecure DialOption to disable security.
  75. errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  76. // errTransportCredsAndBundle indicates that creds bundle is used together
  77. // with other individual Transport Credentials.
  78. errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials")
  79. // errTransportCredentialsMissing indicates that users want to transmit security
  80. // information (e.g., OAuth2 token) which requires secure connection on an insecure
  81. // connection.
  82. errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)")
  83. // errCredentialsConflict indicates that grpc.WithTransportCredentials()
  84. // and grpc.WithInsecure() are both called for a connection.
  85. errCredentialsConflict = errors.New("grpc: transport credentials are set for an insecure connection (grpc.WithTransportCredentials() and grpc.WithInsecure() are both called)")
  86. )
  87. const (
  88. defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4
  89. defaultClientMaxSendMessageSize = math.MaxInt32
  90. // http2IOBufSize specifies the buffer size for sending frames.
  91. defaultWriteBufSize = 32 * 1024
  92. defaultReadBufSize = 32 * 1024
  93. )
  94. // Dial creates a client connection to the given target.
  95. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  96. return DialContext(context.Background(), target, opts...)
  97. }
  98. // DialContext creates a client connection to the given target. By default, it's
  99. // a non-blocking dial (the function won't wait for connections to be
  100. // established, and connecting happens in the background). To make it a blocking
  101. // dial, use WithBlock() dial option.
  102. //
  103. // In the non-blocking case, the ctx does not act against the connection. It
  104. // only controls the setup steps.
  105. //
  106. // In the blocking case, ctx can be used to cancel or expire the pending
  107. // connection. Once this function returns, the cancellation and expiration of
  108. // ctx will be noop. Users should call ClientConn.Close to terminate all the
  109. // pending operations after this function returns.
  110. //
  111. // The target name syntax is defined in
  112. // https://github.com/grpc/grpc/blob/master/doc/naming.md.
  113. // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
  114. func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
  115. cc := &ClientConn{
  116. target: target,
  117. csMgr: &connectivityStateManager{},
  118. conns: make(map[*addrConn]struct{}),
  119. dopts: defaultDialOptions(),
  120. blockingpicker: newPickerWrapper(),
  121. czData: new(channelzData),
  122. firstResolveEvent: grpcsync.NewEvent(),
  123. }
  124. cc.retryThrottler.Store((*retryThrottler)(nil))
  125. cc.ctx, cc.cancel = context.WithCancel(context.Background())
  126. for _, opt := range opts {
  127. opt.apply(&cc.dopts)
  128. }
  129. chainUnaryClientInterceptors(cc)
  130. chainStreamClientInterceptors(cc)
  131. defer func() {
  132. if err != nil {
  133. cc.Close()
  134. }
  135. }()
  136. if channelz.IsOn() {
  137. if cc.dopts.channelzParentID != 0 {
  138. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
  139. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  140. Desc: "Channel Created",
  141. Severity: channelz.CtINFO,
  142. Parent: &channelz.TraceEventDesc{
  143. Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID),
  144. Severity: channelz.CtINFO,
  145. },
  146. })
  147. } else {
  148. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target)
  149. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  150. Desc: "Channel Created",
  151. Severity: channelz.CtINFO,
  152. })
  153. }
  154. cc.csMgr.channelzID = cc.channelzID
  155. }
  156. if !cc.dopts.insecure {
  157. if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
  158. return nil, errNoTransportSecurity
  159. }
  160. if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
  161. return nil, errTransportCredsAndBundle
  162. }
  163. } else {
  164. if cc.dopts.copts.TransportCredentials != nil || cc.dopts.copts.CredsBundle != nil {
  165. return nil, errCredentialsConflict
  166. }
  167. for _, cd := range cc.dopts.copts.PerRPCCredentials {
  168. if cd.RequireTransportSecurity() {
  169. return nil, errTransportCredentialsMissing
  170. }
  171. }
  172. }
  173. if cc.dopts.defaultServiceConfigRawJSON != nil {
  174. sc, err := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON)
  175. if err != nil {
  176. return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, err)
  177. }
  178. cc.dopts.defaultServiceConfig = sc
  179. }
  180. cc.mkp = cc.dopts.copts.KeepaliveParams
  181. if cc.dopts.copts.Dialer == nil {
  182. cc.dopts.copts.Dialer = newProxyDialer(
  183. func(ctx context.Context, addr string) (net.Conn, error) {
  184. network, addr := parseDialTarget(addr)
  185. return (&net.Dialer{}).DialContext(ctx, network, addr)
  186. },
  187. )
  188. }
  189. if cc.dopts.copts.UserAgent != "" {
  190. cc.dopts.copts.UserAgent += " " + grpcUA
  191. } else {
  192. cc.dopts.copts.UserAgent = grpcUA
  193. }
  194. if cc.dopts.timeout > 0 {
  195. var cancel context.CancelFunc
  196. ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout)
  197. defer cancel()
  198. }
  199. defer func() {
  200. select {
  201. case <-ctx.Done():
  202. conn, err = nil, ctx.Err()
  203. default:
  204. }
  205. }()
  206. scSet := false
  207. if cc.dopts.scChan != nil {
  208. // Try to get an initial service config.
  209. select {
  210. case sc, ok := <-cc.dopts.scChan:
  211. if ok {
  212. cc.sc = &sc
  213. scSet = true
  214. }
  215. default:
  216. }
  217. }
  218. if cc.dopts.bs == nil {
  219. cc.dopts.bs = backoff.Exponential{
  220. MaxDelay: DefaultBackoffConfig.MaxDelay,
  221. }
  222. }
  223. if cc.dopts.resolverBuilder == nil {
  224. // Only try to parse target when resolver builder is not already set.
  225. cc.parsedTarget = parseTarget(cc.target)
  226. grpclog.Infof("parsed scheme: %q", cc.parsedTarget.Scheme)
  227. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  228. if cc.dopts.resolverBuilder == nil {
  229. // If resolver builder is still nil, the parsed target's scheme is
  230. // not registered. Fallback to default resolver and set Endpoint to
  231. // the original target.
  232. grpclog.Infof("scheme %q not registered, fallback to default scheme", cc.parsedTarget.Scheme)
  233. cc.parsedTarget = resolver.Target{
  234. Scheme: resolver.GetDefaultScheme(),
  235. Endpoint: target,
  236. }
  237. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  238. }
  239. } else {
  240. cc.parsedTarget = resolver.Target{Endpoint: target}
  241. }
  242. creds := cc.dopts.copts.TransportCredentials
  243. if creds != nil && creds.Info().ServerName != "" {
  244. cc.authority = creds.Info().ServerName
  245. } else if cc.dopts.insecure && cc.dopts.authority != "" {
  246. cc.authority = cc.dopts.authority
  247. } else {
  248. // Use endpoint from "scheme://authority/endpoint" as the default
  249. // authority for ClientConn.
  250. cc.authority = cc.parsedTarget.Endpoint
  251. }
  252. if cc.dopts.scChan != nil && !scSet {
  253. // Blocking wait for the initial service config.
  254. select {
  255. case sc, ok := <-cc.dopts.scChan:
  256. if ok {
  257. cc.sc = &sc
  258. }
  259. case <-ctx.Done():
  260. return nil, ctx.Err()
  261. }
  262. }
  263. if cc.dopts.scChan != nil {
  264. go cc.scWatcher()
  265. }
  266. var credsClone credentials.TransportCredentials
  267. if creds := cc.dopts.copts.TransportCredentials; creds != nil {
  268. credsClone = creds.Clone()
  269. }
  270. cc.balancerBuildOpts = balancer.BuildOptions{
  271. DialCreds: credsClone,
  272. CredsBundle: cc.dopts.copts.CredsBundle,
  273. Dialer: cc.dopts.copts.Dialer,
  274. ChannelzParentID: cc.channelzID,
  275. Target: cc.parsedTarget,
  276. }
  277. // Build the resolver.
  278. rWrapper, err := newCCResolverWrapper(cc)
  279. if err != nil {
  280. return nil, fmt.Errorf("failed to build resolver: %v", err)
  281. }
  282. cc.mu.Lock()
  283. cc.resolverWrapper = rWrapper
  284. cc.mu.Unlock()
  285. // A blocking dial blocks until the clientConn is ready.
  286. if cc.dopts.block {
  287. for {
  288. s := cc.GetState()
  289. if s == connectivity.Ready {
  290. break
  291. } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
  292. if err = cc.blockingpicker.connectionError(); err != nil {
  293. terr, ok := err.(interface {
  294. Temporary() bool
  295. })
  296. if ok && !terr.Temporary() {
  297. return nil, err
  298. }
  299. }
  300. }
  301. if !cc.WaitForStateChange(ctx, s) {
  302. // ctx got timeout or canceled.
  303. return nil, ctx.Err()
  304. }
  305. }
  306. }
  307. return cc, nil
  308. }
  309. // chainUnaryClientInterceptors chains all unary client interceptors into one.
  310. func chainUnaryClientInterceptors(cc *ClientConn) {
  311. interceptors := cc.dopts.chainUnaryInts
  312. // Prepend dopts.unaryInt to the chaining interceptors if it exists, since unaryInt will
  313. // be executed before any other chained interceptors.
  314. if cc.dopts.unaryInt != nil {
  315. interceptors = append([]UnaryClientInterceptor{cc.dopts.unaryInt}, interceptors...)
  316. }
  317. var chainedInt UnaryClientInterceptor
  318. if len(interceptors) == 0 {
  319. chainedInt = nil
  320. } else if len(interceptors) == 1 {
  321. chainedInt = interceptors[0]
  322. } else {
  323. chainedInt = func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error {
  324. return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...)
  325. }
  326. }
  327. cc.dopts.unaryInt = chainedInt
  328. }
  329. // getChainUnaryInvoker recursively generate the chained unary invoker.
  330. func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, finalInvoker UnaryInvoker) UnaryInvoker {
  331. if curr == len(interceptors)-1 {
  332. return finalInvoker
  333. }
  334. return func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error {
  335. return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...)
  336. }
  337. }
  338. // chainStreamClientInterceptors chains all stream client interceptors into one.
  339. func chainStreamClientInterceptors(cc *ClientConn) {
  340. interceptors := cc.dopts.chainStreamInts
  341. // Prepend dopts.streamInt to the chaining interceptors if it exists, since streamInt will
  342. // be executed before any other chained interceptors.
  343. if cc.dopts.streamInt != nil {
  344. interceptors = append([]StreamClientInterceptor{cc.dopts.streamInt}, interceptors...)
  345. }
  346. var chainedInt StreamClientInterceptor
  347. if len(interceptors) == 0 {
  348. chainedInt = nil
  349. } else if len(interceptors) == 1 {
  350. chainedInt = interceptors[0]
  351. } else {
  352. chainedInt = func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, streamer Streamer, opts ...CallOption) (ClientStream, error) {
  353. return interceptors[0](ctx, desc, cc, method, getChainStreamer(interceptors, 0, streamer), opts...)
  354. }
  355. }
  356. cc.dopts.streamInt = chainedInt
  357. }
  358. // getChainStreamer recursively generate the chained client stream constructor.
  359. func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStreamer Streamer) Streamer {
  360. if curr == len(interceptors)-1 {
  361. return finalStreamer
  362. }
  363. return func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error) {
  364. return interceptors[curr+1](ctx, desc, cc, method, getChainStreamer(interceptors, curr+1, finalStreamer), opts...)
  365. }
  366. }
  367. // connectivityStateManager keeps the connectivity.State of ClientConn.
  368. // This struct will eventually be exported so the balancers can access it.
  369. type connectivityStateManager struct {
  370. mu sync.Mutex
  371. state connectivity.State
  372. notifyChan chan struct{}
  373. channelzID int64
  374. }
  375. // updateState updates the connectivity.State of ClientConn.
  376. // If there's a change it notifies goroutines waiting on state change to
  377. // happen.
  378. func (csm *connectivityStateManager) updateState(state connectivity.State) {
  379. csm.mu.Lock()
  380. defer csm.mu.Unlock()
  381. if csm.state == connectivity.Shutdown {
  382. return
  383. }
  384. if csm.state == state {
  385. return
  386. }
  387. csm.state = state
  388. if channelz.IsOn() {
  389. channelz.AddTraceEvent(csm.channelzID, &channelz.TraceEventDesc{
  390. Desc: fmt.Sprintf("Channel Connectivity change to %v", state),
  391. Severity: channelz.CtINFO,
  392. })
  393. }
  394. if csm.notifyChan != nil {
  395. // There are other goroutines waiting on this channel.
  396. close(csm.notifyChan)
  397. csm.notifyChan = nil
  398. }
  399. }
  400. func (csm *connectivityStateManager) getState() connectivity.State {
  401. csm.mu.Lock()
  402. defer csm.mu.Unlock()
  403. return csm.state
  404. }
  405. func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
  406. csm.mu.Lock()
  407. defer csm.mu.Unlock()
  408. if csm.notifyChan == nil {
  409. csm.notifyChan = make(chan struct{})
  410. }
  411. return csm.notifyChan
  412. }
  413. // ClientConn represents a client connection to an RPC server.
  414. type ClientConn struct {
  415. ctx context.Context
  416. cancel context.CancelFunc
  417. target string
  418. parsedTarget resolver.Target
  419. authority string
  420. dopts dialOptions
  421. csMgr *connectivityStateManager
  422. balancerBuildOpts balancer.BuildOptions
  423. blockingpicker *pickerWrapper
  424. mu sync.RWMutex
  425. resolverWrapper *ccResolverWrapper
  426. sc *ServiceConfig
  427. conns map[*addrConn]struct{}
  428. // Keepalive parameter can be updated if a GoAway is received.
  429. mkp keepalive.ClientParameters
  430. curBalancerName string
  431. balancerWrapper *ccBalancerWrapper
  432. retryThrottler atomic.Value
  433. firstResolveEvent *grpcsync.Event
  434. channelzID int64 // channelz unique identification number
  435. czData *channelzData
  436. }
  437. // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
  438. // ctx expires. A true value is returned in former case and false in latter.
  439. // This is an EXPERIMENTAL API.
  440. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool {
  441. ch := cc.csMgr.getNotifyChan()
  442. if cc.csMgr.getState() != sourceState {
  443. return true
  444. }
  445. select {
  446. case <-ctx.Done():
  447. return false
  448. case <-ch:
  449. return true
  450. }
  451. }
  452. // GetState returns the connectivity.State of ClientConn.
  453. // This is an EXPERIMENTAL API.
  454. func (cc *ClientConn) GetState() connectivity.State {
  455. return cc.csMgr.getState()
  456. }
  457. func (cc *ClientConn) scWatcher() {
  458. for {
  459. select {
  460. case sc, ok := <-cc.dopts.scChan:
  461. if !ok {
  462. return
  463. }
  464. cc.mu.Lock()
  465. // TODO: load balance policy runtime change is ignored.
  466. // We may revisit this decision in the future.
  467. cc.sc = &sc
  468. cc.mu.Unlock()
  469. case <-cc.ctx.Done():
  470. return
  471. }
  472. }
  473. }
  474. // waitForResolvedAddrs blocks until the resolver has provided addresses or the
  475. // context expires. Returns nil unless the context expires first; otherwise
  476. // returns a status error based on the context.
  477. func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error {
  478. // This is on the RPC path, so we use a fast path to avoid the
  479. // more-expensive "select" below after the resolver has returned once.
  480. if cc.firstResolveEvent.HasFired() {
  481. return nil
  482. }
  483. select {
  484. case <-cc.firstResolveEvent.Done():
  485. return nil
  486. case <-ctx.Done():
  487. return status.FromContextError(ctx.Err()).Err()
  488. case <-cc.ctx.Done():
  489. return ErrClientConnClosing
  490. }
  491. }
  492. // gRPC should resort to default service config when:
  493. // * resolver service config is disabled
  494. // * or, resolver does not return a service config or returns an invalid one.
  495. func (cc *ClientConn) fallbackToDefaultServiceConfig(sc string) bool {
  496. if cc.dopts.disableServiceConfig {
  497. return true
  498. }
  499. // The logic below is temporary, will be removed once we change the resolver.State ServiceConfig field type.
  500. // Right now, we assume that empty service config string means resolver does not return a config.
  501. if sc == "" {
  502. return true
  503. }
  504. // TODO: the logic below is temporary. Once we finish the logic to validate service config
  505. // in resolver, we will replace the logic below.
  506. _, err := parseServiceConfig(sc)
  507. return err != nil
  508. }
  509. func (cc *ClientConn) updateResolverState(s resolver.State) error {
  510. cc.mu.Lock()
  511. defer cc.mu.Unlock()
  512. // Check if the ClientConn is already closed. Some fields (e.g.
  513. // balancerWrapper) are set to nil when closing the ClientConn, and could
  514. // cause nil pointer panic if we don't have this check.
  515. if cc.conns == nil {
  516. return nil
  517. }
  518. if cc.fallbackToDefaultServiceConfig(s.ServiceConfig) {
  519. if cc.dopts.defaultServiceConfig != nil && cc.sc == nil {
  520. cc.applyServiceConfig(cc.dopts.defaultServiceConfig)
  521. }
  522. } else {
  523. // TODO: the parsing logic below will be moved inside resolver.
  524. sc, err := parseServiceConfig(s.ServiceConfig)
  525. if err != nil {
  526. return err
  527. }
  528. if cc.sc == nil || cc.sc.rawJSONString != s.ServiceConfig {
  529. cc.applyServiceConfig(sc)
  530. }
  531. }
  532. // update the service config that will be sent to balancer.
  533. if cc.sc != nil {
  534. s.ServiceConfig = cc.sc.rawJSONString
  535. }
  536. if cc.dopts.balancerBuilder == nil {
  537. // Only look at balancer types and switch balancer if balancer dial
  538. // option is not set.
  539. var isGRPCLB bool
  540. for _, a := range s.Addresses {
  541. if a.Type == resolver.GRPCLB {
  542. isGRPCLB = true
  543. break
  544. }
  545. }
  546. var newBalancerName string
  547. // TODO: use new loadBalancerConfig field with appropriate priority.
  548. if isGRPCLB {
  549. newBalancerName = grpclbName
  550. } else if cc.sc != nil && cc.sc.LB != nil {
  551. newBalancerName = *cc.sc.LB
  552. } else {
  553. newBalancerName = PickFirstBalancerName
  554. }
  555. cc.switchBalancer(newBalancerName)
  556. } else if cc.balancerWrapper == nil {
  557. // Balancer dial option was set, and this is the first time handling
  558. // resolved addresses. Build a balancer with dopts.balancerBuilder.
  559. cc.curBalancerName = cc.dopts.balancerBuilder.Name()
  560. cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts)
  561. }
  562. cc.balancerWrapper.updateResolverState(s)
  563. cc.firstResolveEvent.Fire()
  564. return nil
  565. }
  566. // switchBalancer starts the switching from current balancer to the balancer
  567. // with the given name.
  568. //
  569. // It will NOT send the current address list to the new balancer. If needed,
  570. // caller of this function should send address list to the new balancer after
  571. // this function returns.
  572. //
  573. // Caller must hold cc.mu.
  574. func (cc *ClientConn) switchBalancer(name string) {
  575. if strings.ToLower(cc.curBalancerName) == strings.ToLower(name) {
  576. return
  577. }
  578. grpclog.Infof("ClientConn switching balancer to %q", name)
  579. if cc.dopts.balancerBuilder != nil {
  580. grpclog.Infoln("ignoring balancer switching: Balancer DialOption used instead")
  581. return
  582. }
  583. if cc.balancerWrapper != nil {
  584. cc.balancerWrapper.close()
  585. }
  586. builder := balancer.Get(name)
  587. if channelz.IsOn() {
  588. if builder == nil {
  589. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  590. Desc: fmt.Sprintf("Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName),
  591. Severity: channelz.CtWarning,
  592. })
  593. } else {
  594. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  595. Desc: fmt.Sprintf("Channel switches to new LB policy %q", name),
  596. Severity: channelz.CtINFO,
  597. })
  598. }
  599. }
  600. if builder == nil {
  601. grpclog.Infof("failed to get balancer builder for: %v, using pick_first instead", name)
  602. builder = newPickfirstBuilder()
  603. }
  604. cc.curBalancerName = builder.Name()
  605. cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts)
  606. }
  607. func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State) {
  608. cc.mu.Lock()
  609. if cc.conns == nil {
  610. cc.mu.Unlock()
  611. return
  612. }
  613. // TODO(bar switching) send updates to all balancer wrappers when balancer
  614. // gracefully switching is supported.
  615. cc.balancerWrapper.handleSubConnStateChange(sc, s)
  616. cc.mu.Unlock()
  617. }
  618. // newAddrConn creates an addrConn for addrs and adds it to cc.conns.
  619. //
  620. // Caller needs to make sure len(addrs) > 0.
  621. func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
  622. ac := &addrConn{
  623. cc: cc,
  624. addrs: addrs,
  625. scopts: opts,
  626. dopts: cc.dopts,
  627. czData: new(channelzData),
  628. resetBackoff: make(chan struct{}),
  629. }
  630. ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
  631. // Track ac in cc. This needs to be done before any getTransport(...) is called.
  632. cc.mu.Lock()
  633. if cc.conns == nil {
  634. cc.mu.Unlock()
  635. return nil, ErrClientConnClosing
  636. }
  637. if channelz.IsOn() {
  638. ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "")
  639. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  640. Desc: "Subchannel Created",
  641. Severity: channelz.CtINFO,
  642. Parent: &channelz.TraceEventDesc{
  643. Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID),
  644. Severity: channelz.CtINFO,
  645. },
  646. })
  647. }
  648. cc.conns[ac] = struct{}{}
  649. cc.mu.Unlock()
  650. return ac, nil
  651. }
  652. // removeAddrConn removes the addrConn in the subConn from clientConn.
  653. // It also tears down the ac with the given error.
  654. func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) {
  655. cc.mu.Lock()
  656. if cc.conns == nil {
  657. cc.mu.Unlock()
  658. return
  659. }
  660. delete(cc.conns, ac)
  661. cc.mu.Unlock()
  662. ac.tearDown(err)
  663. }
  664. func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric {
  665. return &channelz.ChannelInternalMetric{
  666. State: cc.GetState(),
  667. Target: cc.target,
  668. CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted),
  669. CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded),
  670. CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed),
  671. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)),
  672. }
  673. }
  674. // Target returns the target string of the ClientConn.
  675. // This is an EXPERIMENTAL API.
  676. func (cc *ClientConn) Target() string {
  677. return cc.target
  678. }
  679. func (cc *ClientConn) incrCallsStarted() {
  680. atomic.AddInt64(&cc.czData.callsStarted, 1)
  681. atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano())
  682. }
  683. func (cc *ClientConn) incrCallsSucceeded() {
  684. atomic.AddInt64(&cc.czData.callsSucceeded, 1)
  685. }
  686. func (cc *ClientConn) incrCallsFailed() {
  687. atomic.AddInt64(&cc.czData.callsFailed, 1)
  688. }
  689. // connect starts creating a transport.
  690. // It does nothing if the ac is not IDLE.
  691. // TODO(bar) Move this to the addrConn section.
  692. func (ac *addrConn) connect() error {
  693. ac.mu.Lock()
  694. if ac.state == connectivity.Shutdown {
  695. ac.mu.Unlock()
  696. return errConnClosing
  697. }
  698. if ac.state != connectivity.Idle {
  699. ac.mu.Unlock()
  700. return nil
  701. }
  702. ac.updateConnectivityState(connectivity.Connecting)
  703. ac.mu.Unlock()
  704. // Start a goroutine connecting to the server asynchronously.
  705. go ac.resetTransport()
  706. return nil
  707. }
  708. // tryUpdateAddrs tries to update ac.addrs with the new addresses list.
  709. //
  710. // It checks whether current connected address of ac is in the new addrs list.
  711. // - If true, it updates ac.addrs and returns true. The ac will keep using
  712. // the existing connection.
  713. // - If false, it does nothing and returns false.
  714. func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool {
  715. ac.mu.Lock()
  716. defer ac.mu.Unlock()
  717. grpclog.Infof("addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
  718. if ac.state == connectivity.Shutdown {
  719. ac.addrs = addrs
  720. return true
  721. }
  722. // Unless we're busy reconnecting already, let's reconnect from the top of
  723. // the list.
  724. if ac.state != connectivity.Ready {
  725. return false
  726. }
  727. var curAddrFound bool
  728. for _, a := range addrs {
  729. if reflect.DeepEqual(ac.curAddr, a) {
  730. curAddrFound = true
  731. break
  732. }
  733. }
  734. grpclog.Infof("addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound)
  735. if curAddrFound {
  736. ac.addrs = addrs
  737. }
  738. return curAddrFound
  739. }
  740. // GetMethodConfig gets the method config of the input method.
  741. // If there's an exact match for input method (i.e. /service/method), we return
  742. // the corresponding MethodConfig.
  743. // If there isn't an exact match for the input method, we look for the default config
  744. // under the service (i.e /service/). If there is a default MethodConfig for
  745. // the service, we return it.
  746. // Otherwise, we return an empty MethodConfig.
  747. func (cc *ClientConn) GetMethodConfig(method string) MethodConfig {
  748. // TODO: Avoid the locking here.
  749. cc.mu.RLock()
  750. defer cc.mu.RUnlock()
  751. if cc.sc == nil {
  752. return MethodConfig{}
  753. }
  754. m, ok := cc.sc.Methods[method]
  755. if !ok {
  756. i := strings.LastIndex(method, "/")
  757. m = cc.sc.Methods[method[:i+1]]
  758. }
  759. return m
  760. }
  761. func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
  762. cc.mu.RLock()
  763. defer cc.mu.RUnlock()
  764. if cc.sc == nil {
  765. return nil
  766. }
  767. return cc.sc.healthCheckConfig
  768. }
  769. func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
  770. t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickOptions{
  771. FullMethodName: method,
  772. })
  773. if err != nil {
  774. return nil, nil, toRPCErr(err)
  775. }
  776. return t, done, nil
  777. }
  778. func (cc *ClientConn) applyServiceConfig(sc *ServiceConfig) error {
  779. if sc == nil {
  780. // should never reach here.
  781. return fmt.Errorf("got nil pointer for service config")
  782. }
  783. cc.sc = sc
  784. if cc.sc.retryThrottling != nil {
  785. newThrottler := &retryThrottler{
  786. tokens: cc.sc.retryThrottling.MaxTokens,
  787. max: cc.sc.retryThrottling.MaxTokens,
  788. thresh: cc.sc.retryThrottling.MaxTokens / 2,
  789. ratio: cc.sc.retryThrottling.TokenRatio,
  790. }
  791. cc.retryThrottler.Store(newThrottler)
  792. } else {
  793. cc.retryThrottler.Store((*retryThrottler)(nil))
  794. }
  795. return nil
  796. }
  797. func (cc *ClientConn) resolveNow(o resolver.ResolveNowOption) {
  798. cc.mu.RLock()
  799. r := cc.resolverWrapper
  800. cc.mu.RUnlock()
  801. if r == nil {
  802. return
  803. }
  804. go r.resolveNow(o)
  805. }
  806. // ResetConnectBackoff wakes up all subchannels in transient failure and causes
  807. // them to attempt another connection immediately. It also resets the backoff
  808. // times used for subsequent attempts regardless of the current state.
  809. //
  810. // In general, this function should not be used. Typical service or network
  811. // outages result in a reasonable client reconnection strategy by default.
  812. // However, if a previously unavailable network becomes available, this may be
  813. // used to trigger an immediate reconnect.
  814. //
  815. // This API is EXPERIMENTAL.
  816. func (cc *ClientConn) ResetConnectBackoff() {
  817. cc.mu.Lock()
  818. defer cc.mu.Unlock()
  819. for ac := range cc.conns {
  820. ac.resetConnectBackoff()
  821. }
  822. }
  823. // Close tears down the ClientConn and all underlying connections.
  824. func (cc *ClientConn) Close() error {
  825. defer cc.cancel()
  826. cc.mu.Lock()
  827. if cc.conns == nil {
  828. cc.mu.Unlock()
  829. return ErrClientConnClosing
  830. }
  831. conns := cc.conns
  832. cc.conns = nil
  833. cc.csMgr.updateState(connectivity.Shutdown)
  834. rWrapper := cc.resolverWrapper
  835. cc.resolverWrapper = nil
  836. bWrapper := cc.balancerWrapper
  837. cc.balancerWrapper = nil
  838. cc.mu.Unlock()
  839. cc.blockingpicker.close()
  840. if rWrapper != nil {
  841. rWrapper.close()
  842. }
  843. if bWrapper != nil {
  844. bWrapper.close()
  845. }
  846. for ac := range conns {
  847. ac.tearDown(ErrClientConnClosing)
  848. }
  849. if channelz.IsOn() {
  850. ted := &channelz.TraceEventDesc{
  851. Desc: "Channel Deleted",
  852. Severity: channelz.CtINFO,
  853. }
  854. if cc.dopts.channelzParentID != 0 {
  855. ted.Parent = &channelz.TraceEventDesc{
  856. Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID),
  857. Severity: channelz.CtINFO,
  858. }
  859. }
  860. channelz.AddTraceEvent(cc.channelzID, ted)
  861. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  862. // the entity being deleted, and thus prevent it from being deleted right away.
  863. channelz.RemoveEntry(cc.channelzID)
  864. }
  865. return nil
  866. }
  867. // addrConn is a network connection to a given address.
  868. type addrConn struct {
  869. ctx context.Context
  870. cancel context.CancelFunc
  871. cc *ClientConn
  872. dopts dialOptions
  873. acbw balancer.SubConn
  874. scopts balancer.NewSubConnOptions
  875. // transport is set when there's a viable transport (note: ac state may not be READY as LB channel
  876. // health checking may require server to report healthy to set ac to READY), and is reset
  877. // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway
  878. // is received, transport is closed, ac has been torn down).
  879. transport transport.ClientTransport // The current transport.
  880. mu sync.Mutex
  881. curAddr resolver.Address // The current address.
  882. addrs []resolver.Address // All addresses that the resolver resolved to.
  883. // Use updateConnectivityState for updating addrConn's connectivity state.
  884. state connectivity.State
  885. backoffIdx int // Needs to be stateful for resetConnectBackoff.
  886. resetBackoff chan struct{}
  887. channelzID int64 // channelz unique identification number.
  888. czData *channelzData
  889. }
  890. // Note: this requires a lock on ac.mu.
  891. func (ac *addrConn) updateConnectivityState(s connectivity.State) {
  892. if ac.state == s {
  893. return
  894. }
  895. updateMsg := fmt.Sprintf("Subchannel Connectivity change to %v", s)
  896. ac.state = s
  897. if channelz.IsOn() {
  898. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  899. Desc: updateMsg,
  900. Severity: channelz.CtINFO,
  901. })
  902. }
  903. ac.cc.handleSubConnStateChange(ac.acbw, s)
  904. }
  905. // adjustParams updates parameters used to create transports upon
  906. // receiving a GoAway.
  907. func (ac *addrConn) adjustParams(r transport.GoAwayReason) {
  908. switch r {
  909. case transport.GoAwayTooManyPings:
  910. v := 2 * ac.dopts.copts.KeepaliveParams.Time
  911. ac.cc.mu.Lock()
  912. if v > ac.cc.mkp.Time {
  913. ac.cc.mkp.Time = v
  914. }
  915. ac.cc.mu.Unlock()
  916. }
  917. }
  918. func (ac *addrConn) resetTransport() {
  919. for i := 0; ; i++ {
  920. if i > 0 {
  921. ac.cc.resolveNow(resolver.ResolveNowOption{})
  922. }
  923. ac.mu.Lock()
  924. if ac.state == connectivity.Shutdown {
  925. ac.mu.Unlock()
  926. return
  927. }
  928. addrs := ac.addrs
  929. backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx)
  930. // This will be the duration that dial gets to finish.
  931. dialDuration := minConnectTimeout
  932. if ac.dopts.minConnectTimeout != nil {
  933. dialDuration = ac.dopts.minConnectTimeout()
  934. }
  935. if dialDuration < backoffFor {
  936. // Give dial more time as we keep failing to connect.
  937. dialDuration = backoffFor
  938. }
  939. // We can potentially spend all the time trying the first address, and
  940. // if the server accepts the connection and then hangs, the following
  941. // addresses will never be tried.
  942. //
  943. // The spec doesn't mention what should be done for multiple addresses.
  944. // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm
  945. connectDeadline := time.Now().Add(dialDuration)
  946. ac.mu.Unlock()
  947. newTr, addr, reconnect, err := ac.tryAllAddrs(addrs, connectDeadline)
  948. if err != nil {
  949. // After exhausting all addresses, the addrConn enters
  950. // TRANSIENT_FAILURE.
  951. ac.mu.Lock()
  952. if ac.state == connectivity.Shutdown {
  953. ac.mu.Unlock()
  954. return
  955. }
  956. ac.updateConnectivityState(connectivity.TransientFailure)
  957. // Backoff.
  958. b := ac.resetBackoff
  959. ac.mu.Unlock()
  960. timer := time.NewTimer(backoffFor)
  961. select {
  962. case <-timer.C:
  963. ac.mu.Lock()
  964. ac.backoffIdx++
  965. ac.mu.Unlock()
  966. case <-b:
  967. timer.Stop()
  968. case <-ac.ctx.Done():
  969. timer.Stop()
  970. return
  971. }
  972. continue
  973. }
  974. ac.mu.Lock()
  975. if ac.state == connectivity.Shutdown {
  976. newTr.Close()
  977. ac.mu.Unlock()
  978. return
  979. }
  980. ac.curAddr = addr
  981. ac.transport = newTr
  982. ac.backoffIdx = 0
  983. healthCheckConfig := ac.cc.healthCheckConfig()
  984. // LB channel health checking is only enabled when all the four requirements below are met:
  985. // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption,
  986. // 2. the internal.HealthCheckFunc is set by importing the grpc/healthcheck package,
  987. // 3. a service config with non-empty healthCheckConfig field is provided,
  988. // 4. the current load balancer allows it.
  989. hctx, hcancel := context.WithCancel(ac.ctx)
  990. healthcheckManagingState := false
  991. if !ac.cc.dopts.disableHealthCheck && healthCheckConfig != nil && ac.scopts.HealthCheckEnabled {
  992. if ac.cc.dopts.healthCheckFunc == nil {
  993. // TODO: add a link to the health check doc in the error message.
  994. grpclog.Error("the client side LB channel health check function has not been set.")
  995. } else {
  996. // TODO(deklerk) refactor to just return transport
  997. go ac.startHealthCheck(hctx, newTr, addr, healthCheckConfig.ServiceName)
  998. healthcheckManagingState = true
  999. }
  1000. }
  1001. if !healthcheckManagingState {
  1002. ac.updateConnectivityState(connectivity.Ready)
  1003. }
  1004. ac.mu.Unlock()
  1005. // Block until the created transport is down. And when this happens,
  1006. // we restart from the top of the addr list.
  1007. <-reconnect.Done()
  1008. hcancel()
  1009. // Need to reconnect after a READY, the addrConn enters
  1010. // TRANSIENT_FAILURE.
  1011. //
  1012. // This will set addrConn to TRANSIENT_FAILURE for a very short period
  1013. // of time, and turns CONNECTING. It seems reasonable to skip this, but
  1014. // READY-CONNECTING is not a valid transition.
  1015. ac.mu.Lock()
  1016. if ac.state == connectivity.Shutdown {
  1017. ac.mu.Unlock()
  1018. return
  1019. }
  1020. ac.updateConnectivityState(connectivity.TransientFailure)
  1021. ac.mu.Unlock()
  1022. }
  1023. }
  1024. // tryAllAddrs tries to creates a connection to the addresses, and stop when at the
  1025. // first successful one. It returns the transport, the address and a Event in
  1026. // the successful case. The Event fires when the returned transport disconnects.
  1027. func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) (transport.ClientTransport, resolver.Address, *grpcsync.Event, error) {
  1028. for _, addr := range addrs {
  1029. ac.mu.Lock()
  1030. if ac.state == connectivity.Shutdown {
  1031. ac.mu.Unlock()
  1032. return nil, resolver.Address{}, nil, errConnClosing
  1033. }
  1034. ac.updateConnectivityState(connectivity.Connecting)
  1035. ac.transport = nil
  1036. ac.cc.mu.RLock()
  1037. ac.dopts.copts.KeepaliveParams = ac.cc.mkp
  1038. ac.cc.mu.RUnlock()
  1039. copts := ac.dopts.copts
  1040. if ac.scopts.CredsBundle != nil {
  1041. copts.CredsBundle = ac.scopts.CredsBundle
  1042. }
  1043. ac.mu.Unlock()
  1044. if channelz.IsOn() {
  1045. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1046. Desc: fmt.Sprintf("Subchannel picks a new address %q to connect", addr.Addr),
  1047. Severity: channelz.CtINFO,
  1048. })
  1049. }
  1050. newTr, reconnect, err := ac.createTransport(addr, copts, connectDeadline)
  1051. if err == nil {
  1052. return newTr, addr, reconnect, nil
  1053. }
  1054. ac.cc.blockingpicker.updateConnectionError(err)
  1055. }
  1056. // Couldn't connect to any address.
  1057. return nil, resolver.Address{}, nil, fmt.Errorf("couldn't connect to any address")
  1058. }
  1059. // createTransport creates a connection to addr. It returns the transport and a
  1060. // Event in the successful case. The Event fires when the returned transport
  1061. // disconnects.
  1062. func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) (transport.ClientTransport, *grpcsync.Event, error) {
  1063. prefaceReceived := make(chan struct{})
  1064. onCloseCalled := make(chan struct{})
  1065. reconnect := grpcsync.NewEvent()
  1066. target := transport.TargetInfo{
  1067. Addr: addr.Addr,
  1068. Metadata: addr.Metadata,
  1069. Authority: ac.cc.authority,
  1070. }
  1071. onGoAway := func(r transport.GoAwayReason) {
  1072. ac.mu.Lock()
  1073. ac.adjustParams(r)
  1074. ac.mu.Unlock()
  1075. reconnect.Fire()
  1076. }
  1077. onClose := func() {
  1078. close(onCloseCalled)
  1079. reconnect.Fire()
  1080. }
  1081. onPrefaceReceipt := func() {
  1082. close(prefaceReceived)
  1083. }
  1084. connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline)
  1085. defer cancel()
  1086. if channelz.IsOn() {
  1087. copts.ChannelzParentID = ac.channelzID
  1088. }
  1089. newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
  1090. if err != nil {
  1091. // newTr is either nil, or closed.
  1092. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
  1093. return nil, nil, err
  1094. }
  1095. if ac.dopts.reqHandshake == envconfig.RequireHandshakeOn {
  1096. select {
  1097. case <-time.After(connectDeadline.Sub(time.Now())):
  1098. // We didn't get the preface in time.
  1099. newTr.Close()
  1100. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v: didn't receive server preface in time. Reconnecting...", addr)
  1101. return nil, nil, errors.New("timed out waiting for server handshake")
  1102. case <-prefaceReceived:
  1103. // We got the preface - huzzah! things are good.
  1104. case <-onCloseCalled:
  1105. // The transport has already closed - noop.
  1106. return nil, nil, errors.New("connection closed")
  1107. // TODO(deklerk) this should bail on ac.ctx.Done(). Add a test and fix.
  1108. }
  1109. }
  1110. return newTr, reconnect, nil
  1111. }
  1112. func (ac *addrConn) startHealthCheck(ctx context.Context, newTr transport.ClientTransport, addr resolver.Address, serviceName string) {
  1113. // Set up the health check helper functions
  1114. newStream := func() (interface{}, error) {
  1115. return ac.newClientStream(ctx, &StreamDesc{ServerStreams: true}, "/grpc.health.v1.Health/Watch", newTr)
  1116. }
  1117. firstReady := true
  1118. reportHealth := func(ok bool) {
  1119. ac.mu.Lock()
  1120. defer ac.mu.Unlock()
  1121. if ac.transport != newTr {
  1122. return
  1123. }
  1124. if ok {
  1125. if firstReady {
  1126. firstReady = false
  1127. ac.curAddr = addr
  1128. }
  1129. ac.updateConnectivityState(connectivity.Ready)
  1130. } else {
  1131. ac.updateConnectivityState(connectivity.TransientFailure)
  1132. }
  1133. }
  1134. err := ac.cc.dopts.healthCheckFunc(ctx, newStream, reportHealth, serviceName)
  1135. if err != nil {
  1136. if status.Code(err) == codes.Unimplemented {
  1137. if channelz.IsOn() {
  1138. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1139. Desc: "Subchannel health check is unimplemented at server side, thus health check is disabled",
  1140. Severity: channelz.CtError,
  1141. })
  1142. }
  1143. grpclog.Error("Subchannel health check is unimplemented at server side, thus health check is disabled")
  1144. } else {
  1145. grpclog.Errorf("HealthCheckFunc exits with unexpected error %v", err)
  1146. }
  1147. }
  1148. }
  1149. func (ac *addrConn) resetConnectBackoff() {
  1150. ac.mu.Lock()
  1151. close(ac.resetBackoff)
  1152. ac.backoffIdx = 0
  1153. ac.resetBackoff = make(chan struct{})
  1154. ac.mu.Unlock()
  1155. }
  1156. // getReadyTransport returns the transport if ac's state is READY.
  1157. // Otherwise it returns nil, false.
  1158. // If ac's state is IDLE, it will trigger ac to connect.
  1159. func (ac *addrConn) getReadyTransport() (transport.ClientTransport, bool) {
  1160. ac.mu.Lock()
  1161. if ac.state == connectivity.Ready && ac.transport != nil {
  1162. t := ac.transport
  1163. ac.mu.Unlock()
  1164. return t, true
  1165. }
  1166. var idle bool
  1167. if ac.state == connectivity.Idle {
  1168. idle = true
  1169. }
  1170. ac.mu.Unlock()
  1171. // Trigger idle ac to connect.
  1172. if idle {
  1173. ac.connect()
  1174. }
  1175. return nil, false
  1176. }
  1177. // tearDown starts to tear down the addrConn.
  1178. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  1179. // some edge cases (e.g., the caller opens and closes many addrConn's in a
  1180. // tight loop.
  1181. // tearDown doesn't remove ac from ac.cc.conns.
  1182. func (ac *addrConn) tearDown(err error) {
  1183. ac.mu.Lock()
  1184. if ac.state == connectivity.Shutdown {
  1185. ac.mu.Unlock()
  1186. return
  1187. }
  1188. curTr := ac.transport
  1189. ac.transport = nil
  1190. // We have to set the state to Shutdown before anything else to prevent races
  1191. // between setting the state and logic that waits on context cancelation / etc.
  1192. ac.updateConnectivityState(connectivity.Shutdown)
  1193. ac.cancel()
  1194. ac.curAddr = resolver.Address{}
  1195. if err == errConnDrain && curTr != nil {
  1196. // GracefulClose(...) may be executed multiple times when
  1197. // i) receiving multiple GoAway frames from the server; or
  1198. // ii) there are concurrent name resolver/Balancer triggered
  1199. // address removal and GoAway.
  1200. // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
  1201. ac.mu.Unlock()
  1202. curTr.GracefulClose()
  1203. ac.mu.Lock()
  1204. }
  1205. if channelz.IsOn() {
  1206. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1207. Desc: "Subchannel Deleted",
  1208. Severity: channelz.CtINFO,
  1209. Parent: &channelz.TraceEventDesc{
  1210. Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID),
  1211. Severity: channelz.CtINFO,
  1212. },
  1213. })
  1214. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  1215. // the entity beng deleted, and thus prevent it from being deleted right away.
  1216. channelz.RemoveEntry(ac.channelzID)
  1217. }
  1218. ac.mu.Unlock()
  1219. }
  1220. func (ac *addrConn) getState() connectivity.State {
  1221. ac.mu.Lock()
  1222. defer ac.mu.Unlock()
  1223. return ac.state
  1224. }
  1225. func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric {
  1226. ac.mu.Lock()
  1227. addr := ac.curAddr.Addr
  1228. ac.mu.Unlock()
  1229. return &channelz.ChannelInternalMetric{
  1230. State: ac.getState(),
  1231. Target: addr,
  1232. CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted),
  1233. CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded),
  1234. CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed),
  1235. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)),
  1236. }
  1237. }
  1238. func (ac *addrConn) incrCallsStarted() {
  1239. atomic.AddInt64(&ac.czData.callsStarted, 1)
  1240. atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano())
  1241. }
  1242. func (ac *addrConn) incrCallsSucceeded() {
  1243. atomic.AddInt64(&ac.czData.callsSucceeded, 1)
  1244. }
  1245. func (ac *addrConn) incrCallsFailed() {
  1246. atomic.AddInt64(&ac.czData.callsFailed, 1)
  1247. }
  1248. type retryThrottler struct {
  1249. max float64
  1250. thresh float64
  1251. ratio float64
  1252. mu sync.Mutex
  1253. tokens float64 // TODO(dfawley): replace with atomic and remove lock.
  1254. }
  1255. // throttle subtracts a retry token from the pool and returns whether a retry
  1256. // should be throttled (disallowed) based upon the retry throttling policy in
  1257. // the service config.
  1258. func (rt *retryThrottler) throttle() bool {
  1259. if rt == nil {
  1260. return false
  1261. }
  1262. rt.mu.Lock()
  1263. defer rt.mu.Unlock()
  1264. rt.tokens--
  1265. if rt.tokens < 0 {
  1266. rt.tokens = 0
  1267. }
  1268. return rt.tokens <= rt.thresh
  1269. }
  1270. func (rt *retryThrottler) successfulRPC() {
  1271. if rt == nil {
  1272. return
  1273. }
  1274. rt.mu.Lock()
  1275. defer rt.mu.Unlock()
  1276. rt.tokens += rt.ratio
  1277. if rt.tokens > rt.max {
  1278. rt.tokens = rt.max
  1279. }
  1280. }
  1281. type channelzChannel struct {
  1282. cc *ClientConn
  1283. }
  1284. func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
  1285. return c.cc.channelzMetric()
  1286. }
  1287. // ErrClientConnTimeout indicates that the ClientConn cannot establish the
  1288. // underlying connections within the specified timeout.
  1289. //
  1290. // Deprecated: This error is never returned by grpc and should not be
  1291. // referenced by users.
  1292. var ErrClientConnTimeout = errors.New("grpc: timed out when dialing")