示例代码如下:
func TestDiscovery(t *testing.T) {
client, err := clientv3.New(clientv3.Config{
Endpoints: []string{
"127.0.0.1:2379",
},
})
if err != nil {
panic(err)
}
dis := etcd.New(client)
endpoint := "discovery:///system_services"
conn, err := grpc.DialInsecure(context.Background(), grpc.WithDiscovery(dis), grpc.WithEndpoint(endpoint))
if err != nil {
panic(err)
}
user := v1.NewUserInfoClient(conn)
reply, _ := user.Detail(context.Background(), &v1.DetailRequest{
Id: 2,
})
t.Logf("%v", reply)
//panic("boom")
}
这是一个kratos通过etcd调用服务的流程,让我们通过分析kratos的流程来学习一下grpc的使用
endpoint := "discovery:///system_services"
conn, err := grpc.DialInsecure(context.Background(), grpc.WithDiscovery(dis), grpc.WithEndpoint(endpoint))
最开始我们定义了一个endpoint,通过跟踪,发现这个endpoint被直接传给了go标准包grpc中的DialContext
//github.com/go-kratos/kratos/v2/transport/grpc/client.go:147
//...
if insecure {
grpcOpts = append(grpcOpts, grpc.WithTransportCredentials(grpcinsecure.NewCredentials()))
}
if options.tlsConf != nil {
grpcOpts = append(grpcOpts, grpc.WithTransportCredentials(credentials.NewTLS(options.tlsConf)))
}
if len(options.grpcOpts) > 0 {
grpcOpts = append(grpcOpts, options.grpcOpts...)
}
return grpc.DialContext(ctx, options.endpoint, grpcOpts...)
那么这个endpoint里的discovery是什么意思呢?grpc是如何知道我们要调用etcd作为discovery,又是如何获取到服务的链接的呢?
我们继续追踪代码:
//github.com/go-kratos/kratos/v2/transport/grpc/client.go:139
//...
if options.discovery != nil {
grpcOpts = append(grpcOpts,
grpc.WithResolvers(
discovery.NewBuilder(
options.discovery,
discovery.WithInsecure(insecure),
)))
}
发现kratos将discovery包装成一个resolver.Builder传给了grpc.WithResolvers
resolver.Builder由grpc标准包定义:
type Builder interface {
// Build creates a new resolver for the given target.
//
// gRPC dial calls Build synchronously, and fails if the returned error is
// not nil.
Build(target Target, cc ClientConn, opts BuildOptions) (Resolver, error)
// Scheme returns the scheme supported by this resolver.
// Scheme is defined at https://github.com/grpc/grpc/blob/master/doc/naming.md.
Scheme() string
}
规定了两个方法,grpc通过第一个Build方法来构建Resolver对象
// Resolver watches for the updates on the specified target.
// Updates include address updates and service config updates.
type Resolver interface {
// ResolveNow will be called by gRPC to try to resolve the target name
// again. It's just a hint, resolver can ignore this if it's not necessary.
//
// It could be called multiple times concurrently.
ResolveNow(ResolveNowOptions)
// Close closes the resolver.
Close()
}
第二个Schema返回schema的名称,grpc通过这个方法来获取Resolver对象
用实际代码来说明:
//google.golang.org/grpc/clientconn.go:1574
func (cc *ClientConn) parseTargetAndFindResolver() (resolver.Builder, error) {
channelz.Infof(logger, cc.channelzID, "original dial target is: %q", cc.target)
var rb resolver.Builder
parsedTarget, err := parseTarget(cc.target)
if err != nil {
channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", cc.target, err)
} else {
channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget)
rb = cc.getResolver(parsedTarget.Scheme)
if rb != nil {
cc.parsedTarget = parsedTarget
return rb, nil
}
}
//...
这里parseTarget方法解析了我们之前传入的target,规则为[scheme]://[authority]/endpoint
// parseTarget uses RFC 3986 semantics to parse the given target into a
// resolver.Target struct containing scheme, authority and endpoint. Query
// params are stripped from the endpoint.
func parseTarget(target string) (resolver.Target, error) {
u, err := url.Parse(target)
if err != nil {
return resolver.Target{}, err
}
// For targets of the form "[scheme]://[authority]/endpoint, the endpoint
// value returned from url.Parse() contains a leading "/". Although this is
// in accordance with RFC 3986, we do not want to break existing resolver
// implementations which expect the endpoint without the leading "/". So, we
// end up stripping the leading "/" here. But this will result in an
// incorrect parsing for something like "unix:///path/to/socket". Since we
// own the "unix" resolver, we can workaround in the unix resolver by using
// the `URL` field instead of the `Endpoint` field.
endpoint := u.Path
if endpoint == "" {
endpoint = u.Opaque
}
endpoint = strings.TrimPrefix(endpoint, "/")
return resolver.Target{
Scheme: u.Scheme,
Authority: u.Host,
Endpoint: endpoint,
URL: *u,
}, nil
}
其中Schema部分放入resolver.Target的Schema里,endpoint部分放入resovler.Target的endpoint中
这里Host部分放入了Authority,官方的说明是:
authority indicates the DNS server to use, although this is only supported by some implementations. (In C-core, the default DNS resolver does not support this, but the c-ares based resolver supports specifying this in the form "IP:port".)
大意是说,只有在某些实现中才会支持,但至少etcd中我们是用不到的
接着通过getResolver方法获取了Schema对应的resolver.Builder
func (cc *ClientConn) getResolver(scheme string) resolver.Builder {
for _, rb := range cc.dopts.resolvers {
if scheme == rb.Scheme() {
return rb
}
}
return resolver.Get(scheme)
}
清楚了grpc如何获取对应的discovery,下面就是grpc如何获取服务的链接了
我们回过头来看kratos中builder.go,这个方法是kratos对resolver.Builder的
实现
//github.com/go-kratos/kratos/v2/transport/grpc/resolver/discovery/builder.go:61
func (b *builder) Build(target resolver.Target, cc resolver.ClientConn, opts resolver.BuildOptions) (resolver.Resolver, error) {
var (
err error
w registry.Watcher
)
done := make(chan struct{}, 1)
ctx, cancel := context.WithCancel(context.Background())
go func() {
w, err = b.discoverer.Watch(ctx, strings.TrimPrefix(target.URL.Path, "/"))
close(done)
}()
select {
case <-done:
case <-time.After(b.timeout):
err = errors.New("discovery create watcher overtime")
}
if err != nil {
cancel()
return nil, err
}
r := &discoveryResolver{
w: w,
cc: cc,
ctx: ctx,
cancel: cancel,
insecure: b.insecure,
debugLogDisabled: b.debugLogDisabled,
}
go r.watch()
return r, nil
}
kratos先是调用了etcd的watch方法来对服务状态进行监视,详见:Go操作etcd - 知乎 (zhihu.com)
接着创建了一个协程来调用resolver的watch方法
func (r *discoveryResolver) watch() {
for {
select {
case <-r.ctx.Done():
return
default:
}
ins, err := r.w.Next()
if err != nil {
if errors.Is(err, context.Canceled) {
return
}
log.Errorf("[resolver] Failed to watch discovery endpoint: %v", err)
time.Sleep(time.Second)
continue
}
r.update(ins)
}
}
这个方法里kratos自己定义了一个Watcher对象用来监控etcd状态的改变,Next主要用来在每次服务状态改变时获取服务的新实例,在服务状态未改变时会阻塞协程,就不展开了
重点在于update方法
func (r *discoveryResolver) update(ins []*registry.ServiceInstance) {
addrs := make([]resolver.Address, 0)
endpoints := make(map[string]struct{})
for _, in := range ins {
endpoint, err := endpoint.ParseEndpoint(in.Endpoints, endpoint.Scheme("grpc", !r.insecure))
if err != nil {
log.Errorf("[resolver] Failed to parse discovery endpoint: %v", err)
continue
}
if endpoint == "" {
continue
}
// filter redundant endpoints
if _, ok := endpoints[endpoint]; ok {
continue
}
endpoints[endpoint] = struct{}{}
addr := resolver.Address{
ServerName: in.Name,
Attributes: parseAttributes(in.Metadata),
Addr: endpoint,
}
addr.Attributes = addr.Attributes.WithValue("rawServiceInstance", in)
addrs = append(addrs, addr)
}
if len(addrs) == 0 {
log.Warnf("[resolver] Zero endpoint found,refused to write, instances: %v", ins)
return
}
err := r.cc.UpdateState(resolver.State{Addresses: addrs})
if err != nil {
log.Errorf("[resolver] failed to update state: %s", err)
}
if !r.debugLogDisabled {
b, _ := json.Marshal(ins)
log.Infof("[resolver] update instances: %s", b)
}
}
update方法在服务改变时调用了r.cc.UpdateState,而r.cc是一个resolver.ClientConn对象,接受一个resolver.State对象作为参数
我们可以看到,kratos就是在这里把每次改变服务地址写入进去的。
至此我们已经搞清了所有的疑问,在grpc中如何使用服务发现的方法也已经明晰
首先我们要自己实现一个resolver.Builder与resolver.Resolver,通过WithResovers,resolverBuilder用来生成resolver.Resolver
然后将resolver.Builder通过grpc.WithResolvers传入grpc.DialContext
在resolver.Builder的Build方法中,我们需要调用etcd的watch方法来监视服务状态的改变,并且在服务改变后需要调用resolver.ClientConn的UpdateState方法来改变服务的地址