You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
439 lines
11 KiB
439 lines
11 KiB
package tcp |
|
|
|
import ( |
|
"errors" |
|
lg "log" |
|
"net" |
|
"os" |
|
"sync" |
|
"time" |
|
|
|
"go-common/app/infra/databus/conf" |
|
"go-common/app/infra/databus/dsn" |
|
"go-common/app/infra/databus/model" |
|
"go-common/app/infra/databus/service" |
|
"go-common/library/log" |
|
|
|
"github.com/Shopify/sarama" |
|
metrics "github.com/rcrowley/go-metrics" |
|
) |
|
|
|
const ( |
|
// redis proto |
|
_protoStr = '+' |
|
_protoErr = '-' |
|
_protoInt = ':' |
|
_protoBulk = '$' |
|
_protoArray = '*' |
|
|
|
// redis cmd |
|
_ping = "ping" |
|
_auth = "auth" |
|
_quit = "quit" |
|
_set = "set" |
|
_hset = "hset" |
|
_mget = "mget" |
|
_ok = "OK" |
|
_pong = "PONG" |
|
|
|
// client role |
|
_rolePub = "pub" |
|
_roleSub = "sub" |
|
|
|
_listenDelay = 5 * time.Millisecond // how long to sleep on accept failure |
|
_clearDelay = 30 * time.Second |
|
|
|
_batchNum = 100 // batch write message length |
|
_batchInterval = 100 * time.Millisecond // batch write interval |
|
_batchTimeout = 30 * time.Second // return empty if timeout |
|
|
|
// connection timeout |
|
_readTimeout = 5 * time.Second |
|
_writeTimeout = 5 * time.Second |
|
_pubReadTimeout = 20 * time.Minute |
|
_subReadTimeout = _batchTimeout + 10*time.Second |
|
|
|
// conn read buffer size 64K |
|
_readBufSize = 1024 * 64 |
|
// conn write buffer size 8K |
|
_writeBufSize = 1024 * 8 |
|
// conn max value size(kafka 1M) |
|
_maxValueSize = 1000000 |
|
|
|
// prom operation |
|
_opAddConsumerRequest = "request_add_comsuner" |
|
_opCurrentConsumer = "current_consumer" |
|
_opAddProducerRequest = "request_add_producer" |
|
_opAuthError = "auth_error" |
|
_opProducerMsgSpeed = "producer_msg_speed" |
|
_opConsumerMsgSpeed = "consumer_msg_speed" |
|
_opConsumerPartition = "consumer_partition_speed" |
|
_opPartitionOffset = "consumer_partition_offset" |
|
) |
|
|
|
var ( |
|
_nullBulk = []byte("-1") |
|
// kafka header |
|
_headerColor = []byte("color") |
|
_headerMetadata = []byte("metadata") |
|
// encode type pb/json |
|
_encodePB = []byte("pb") |
|
) |
|
|
|
var ( |
|
errCmdAuthFailed = errors.New("auth failed") |
|
errAuthInfo = errors.New("auth info error") |
|
errPubParams = errors.New("pub params error") |
|
errCmdNotSupport = errors.New("command not support") |
|
errClusterNotExist = errors.New("cluster not exist") |
|
errClusterNotSupport = errors.New("cluster not support") |
|
errConnClosedByServer = errors.New("connection closed by databus") |
|
errConnClosedByClient = errors.New("connection closed by client") |
|
errClosedMsgChannel = errors.New("message channel is closed") |
|
errClosedNotifyChannel = errors.New("notification channel is closed") |
|
errNoPubPermission = errors.New("no publish permission") |
|
errNoSubPermission = errors.New("no subscribe permission") |
|
errConsumerClosed = errors.New("kafka consumer closed") |
|
errCommitParams = errors.New("commit offset params error") |
|
errMsgFormat = errors.New("message format must be json") |
|
errConsumerOver = errors.New("too many consumers") |
|
errConsumerTimeout = errors.New("consumer initial timeout") |
|
errConnRead = errors.New("connection read error") |
|
errUseLessConsumer = errors.New("useless consumer") |
|
errKafKaData = errors.New("err kafka data maybe rebalancing") |
|
errCousmerCreateLimiter = errors.New("err consumer create limiter") |
|
) |
|
|
|
var ( |
|
// tcp listener |
|
listener net.Listener |
|
quit = make(chan struct{}) |
|
// producer snapshot, key:group+topic |
|
producers = make(map[string]sarama.SyncProducer) |
|
pLock sync.RWMutex |
|
// Pubs |
|
pubs = make(map[*Pub]struct{}) |
|
pubLock sync.RWMutex |
|
// Subs |
|
subs = make(map[*Sub]struct{}) |
|
subLock sync.RWMutex |
|
// service for auth |
|
svc *service.Service |
|
// limiter |
|
consumerLimter = make(chan struct{}, 100) |
|
) |
|
|
|
// Init init service |
|
func Init(c *conf.Config, s *service.Service) { |
|
var err error |
|
if listener, err = net.Listen("tcp", c.Addr); err != nil { |
|
panic(err) |
|
} |
|
// sarama should be initialized otherwise errors will be ignored when sarama catch error |
|
sarama.Logger = lg.New(os.Stdout, "[Sarama] ", lg.LstdFlags) |
|
// sarama metrics disable |
|
metrics.UseNilMetrics = true |
|
svc = s |
|
log.Info("start tcp listen addr: %s", c.Addr) |
|
go accept() |
|
go clear() |
|
go clusterproc() |
|
} |
|
|
|
func newProducer(group, topic string, pCfg *conf.Kafka) (p sarama.SyncProducer, err error) { |
|
var ( |
|
ok bool |
|
key = key(pCfg.Cluster, group, topic) |
|
) |
|
pLock.RLock() |
|
if p, ok = producers[key]; ok { |
|
pLock.RUnlock() |
|
return |
|
} |
|
pLock.RUnlock() |
|
// new |
|
conf := sarama.NewConfig() |
|
conf.Producer.Return.Successes = true |
|
conf.Version = sarama.V1_0_0_0 |
|
if p, err = sarama.NewSyncProducer(pCfg.Brokers, conf); err != nil { |
|
log.Error("group(%s) topic(%s) cluster(%s) NewSyncProducer error(%v)", group, topic, pCfg.Cluster, err) |
|
return |
|
} |
|
pLock.Lock() |
|
producers[key] = p |
|
pLock.Unlock() |
|
return |
|
} |
|
|
|
// Close close all producers and consumers |
|
func Close() { |
|
close(quit) |
|
if listener != nil { |
|
listener.Close() |
|
} |
|
// close all consumers |
|
subLock.RLock() |
|
for sub := range subs { |
|
sub.Close() |
|
} |
|
subLock.RUnlock() |
|
pubLock.RLock() |
|
for pub := range pubs { |
|
pub.Close(true) |
|
} |
|
pubLock.RUnlock() |
|
pLock.RLock() |
|
for _, p := range producers { |
|
p.Close() |
|
} |
|
pLock.RUnlock() |
|
} |
|
|
|
func accept() { |
|
var ( |
|
err error |
|
ok bool |
|
netC net.Conn |
|
netE net.Error |
|
) |
|
for { |
|
if netC, err = listener.Accept(); err != nil { |
|
if netE, ok = err.(net.Error); ok && netE.Temporary() { |
|
log.Error("tcp: Accept error: %v; retrying in %v", err, _listenDelay) |
|
time.Sleep(_listenDelay) |
|
continue |
|
} |
|
return |
|
} |
|
select { |
|
case <-quit: |
|
netC.Close() |
|
return |
|
default: |
|
} |
|
go serveConn(netC) |
|
} |
|
} |
|
|
|
// serveConn serve tcp connect. |
|
func serveConn(nc net.Conn) { |
|
var ( |
|
err error |
|
p *Pub |
|
s *Sub |
|
d *dsn.DSN |
|
cfg *conf.Kafka |
|
batch int64 |
|
addr = nc.RemoteAddr().String() |
|
) |
|
c := newConn(nc, _readTimeout, _writeTimeout) |
|
if d, cfg, batch, err = auth(c); err != nil { |
|
log.Error("auth failed addr(%s) error(%v)", addr, err) |
|
c.WriteError(err) |
|
return |
|
} |
|
// auth succeed |
|
if err = c.Write(proto{prefix: _protoStr, message: _ok}); err != nil { |
|
log.Error("c.Write() error(%v)", err) |
|
c.Close() |
|
return |
|
} |
|
if err = c.Flush(); err != nil { |
|
c.Close() |
|
return |
|
} |
|
log.Info("auth succeed group(%s) topic(%s) color(%s) cluster(%s) addr(%s) role(%s)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, d.Role) |
|
// command |
|
switch d.Role { |
|
case _rolePub: // producer |
|
svc.CountProm.Incr(_opAddProducerRequest, d.Group, d.Topic) |
|
if p, err = NewPub(c, d.Group, d.Topic, d.Color, cfg); err != nil { |
|
c.WriteError(err) |
|
log.Error("group(%s) topic(%s) color(%s) cluster(%s) addr(%s) NewPub error(%v)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, err) |
|
return |
|
} |
|
pubLock.Lock() |
|
pubs[p] = struct{}{} |
|
pubLock.Unlock() |
|
p.Serve() |
|
case _roleSub: // consumer |
|
svc.CountProm.Incr(_opAddConsumerRequest, d.Group, d.Topic) |
|
select { |
|
case consumerLimter <- struct{}{}: |
|
default: |
|
err = errCousmerCreateLimiter |
|
c.WriteError(err) |
|
log.Error("group(%s) topic(%s) color(%s) cluster(%s) addr(%s) error(%v)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, err) |
|
return |
|
} |
|
if s, err = NewSub(c, d.Group, d.Topic, d.Color, cfg, batch); err != nil { |
|
c.WriteError(err) |
|
log.Error("group(%s) topic(%s) color(%s) cluster(%s) addr(%s) NewSub error(%v)", d.Group, d.Topic, d.Color, cfg.Cluster, addr, err) |
|
return |
|
} |
|
subLock.Lock() |
|
subs[s] = struct{}{} |
|
subLock.Unlock() |
|
s.Serve() |
|
svc.CountProm.Incr(_opCurrentConsumer, d.Group, d.Topic) |
|
default: |
|
// other command will not be, auth check that. |
|
} |
|
} |
|
|
|
func auth(c *conn) (d *dsn.DSN, cfg *conf.Kafka, batch int64, err error) { |
|
var ( |
|
args [][]byte |
|
cmd string |
|
addr = c.conn.RemoteAddr().String() |
|
) |
|
if cmd, args, err = c.Read(); err != nil { |
|
log.Error("c.Read addr(%s) error(%v)", addr, err) |
|
return |
|
} |
|
if cmd != _auth || len(args) != 1 { |
|
log.Error("c.Read addr(%s) first cmd(%s) not auth or have not enough args(%v)", addr, cmd, args) |
|
err = errCmdAuthFailed |
|
return |
|
} |
|
// key:secret@group/topic=?&role=?&offset=? |
|
if d, err = dsn.ParseDSN(string(args[0])); err != nil { |
|
log.Error("auth failed arg(%s) is illegal,addr(%s) error(%v)", args[0], addr, err) |
|
return |
|
} |
|
|
|
cfg, batch, err = Auth(d, addr) |
|
return |
|
} |
|
|
|
// Auth 校验认证信息并反回相应配置 |
|
// 与 http 接口共用,不要在此方法执行 io 操作 |
|
func Auth(d *dsn.DSN, addr string) (cfg *conf.Kafka, batch int64, err error) { |
|
var ( |
|
a *model.Auth |
|
ok bool |
|
) |
|
|
|
if a, ok = svc.AuthApp(d.Group); !ok { |
|
log.Error("addr(%s) group(%s) cant not be found", addr, d.Group) |
|
svc.CountProm.Incr(_opAuthError, d.Group, d.Topic) |
|
err = errAuthInfo |
|
return |
|
} |
|
batch = a.Batch |
|
if err = a.Auth(d.Group, d.Topic, d.Key, d.Secret); err != nil { |
|
log.Error("a.Auth addr(%s) group(%s) topic(%s) color(%s) key(%s) secret(%s) error(%v)", addr, d.Group, d.Topic, d.Color, d.Key, d.Secret, err) |
|
svc.CountProm.Incr(_opAuthError, d.Group, d.Topic) |
|
return |
|
} |
|
switch d.Role { |
|
case _rolePub: |
|
if !a.CanPub() { |
|
err = errNoPubPermission |
|
return |
|
} |
|
case _roleSub: |
|
if !a.CanSub() { |
|
err = errNoSubPermission |
|
return |
|
} |
|
default: |
|
err = errCmdNotSupport |
|
return |
|
} |
|
if len(conf.Conf.Clusters) == 0 { |
|
err = errClusterNotExist |
|
return |
|
} |
|
if cfg, ok = conf.Conf.Clusters[a.Cluster]; !ok || cfg == nil { |
|
log.Error("a.Auth addr(%s) group(%s) topic(%s) color(%s) key(%s) secret(%s) cluster(%s) not support", addr, d.Group, d.Topic, d.Color, d.Key, d.Secret, a.Cluster) |
|
err = errClusterNotSupport |
|
} |
|
// TODO check ip addr |
|
// rAddr = conn.RemoteAddr().String() |
|
return |
|
} |
|
|
|
// ConsumerAddrs returns consumer addrs. |
|
func ConsumerAddrs(group string) (addrs []string, err error) { |
|
subLock.RLock() |
|
for sub := range subs { |
|
if sub.group == group { |
|
addrs = append(addrs, sub.addr) |
|
} |
|
} |
|
subLock.RUnlock() |
|
return |
|
} |
|
|
|
func key(cluster, group, topic string) string { |
|
return cluster + ":" + group + ":" + topic |
|
} |
|
|
|
func clear() { |
|
for { |
|
time.Sleep(_clearDelay) |
|
t := time.Now() |
|
log.Info("clear proc start,id(%d)", t.Nanosecond()) |
|
subLock.Lock() |
|
for sub := range subs { |
|
if sub.Closed() { |
|
delete(subs, sub) |
|
} |
|
} |
|
subLock.Unlock() |
|
pubLock.Lock() |
|
for pub := range pubs { |
|
if pub.Closed() { |
|
delete(pubs, pub) |
|
} |
|
} |
|
pubLock.Unlock() |
|
log.Info("clear proc end,id(%d) used(%d)", t.Nanosecond(), time.Since(t)) |
|
} |
|
} |
|
|
|
func clusterproc() { |
|
for { |
|
oldAuth, ok := <-svc.ClusterEvent() |
|
if !ok { |
|
return |
|
} |
|
log.Info("cluster changed event group(%s) topic(%s) cluster(%s)", oldAuth.Group, oldAuth.Topic, oldAuth.Cluster) |
|
|
|
k := key(oldAuth.Cluster, oldAuth.Group, oldAuth.Topic) |
|
pLock.Lock() |
|
if p, ok := producers[k]; ok { |
|
// renew producer |
|
if newAuth, ok := svc.AuthApp(oldAuth.Group); ok { |
|
pLock.Unlock() |
|
np, err := newProducer(newAuth.Group, newAuth.Topic, conf.Conf.Clusters[newAuth.Cluster]) |
|
pLock.Lock() |
|
// check pubs |
|
pubLock.Lock() |
|
for pub := range pubs { |
|
if pub.group == oldAuth.Group && pub.topic == oldAuth.Topic { |
|
if err != nil { |
|
pub.Close(true) |
|
} else { |
|
pub.producer = np |
|
} |
|
} |
|
} |
|
pubLock.Unlock() |
|
} |
|
// close unused producer |
|
p.Close() |
|
delete(producers, k) |
|
} |
|
pLock.Unlock() |
|
// wait closing subs |
|
subLock.Lock() |
|
for sub := range subs { |
|
if sub.group == oldAuth.Group && sub.topic == oldAuth.Topic { |
|
sub.WaitClosing() |
|
} |
|
} |
|
subLock.Unlock() |
|
} |
|
}
|
|
|