Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Agents auto-retry connect to heartbeat server alternative addresses #368

Open
wants to merge 41 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
480f325
Agents auto-retry connect to heartbeat server alternative addresses, …
Jan 25, 2018
0934f87
Implementation string matcher monitoring.
Mar 20, 2018
06c064f
Merge github.com:open-falcon/falcon-plus
Mar 20, 2018
89f3bd3
Improve string matcher, filter matched strings and save them into DB.
Mar 22, 2018
2a04408
Append github.com/jmoiron/sqlx into govendor.
Mar 22, 2018
4590a80
Append vendor/ into git ignore list.
Mar 23, 2018
feac80a
Fix package sqlx dependencies.
Mar 23, 2018
706aa3e
Set mysql user falcon password falcon instaed of root by default.
Mar 23, 2018
cdc06c6
map ValueRaw in JSON.
Mar 23, 2018
c6d727e
Fix forward ValueRaw in gateway.
Mar 23, 2018
6395ed5
Merge branch 'master' of github.com:shuge/falcon-plus
Mar 23, 2018
77b1a9c
Fix default DB password.
Mar 30, 2018
f9854b5
Add counterType=STRMATCH for function match(pattern, period).
Mar 30, 2018
a6d3021
add Delete hard-coded metric="str.match" in judge.
Mar 30, 2018
6233402
Fix counterType=g.STRMATCH metric in graph.
Mar 30, 2018
b9b81b6
Return invalid metric detail in RPC call response.
Jun 6, 2018
0ae1769
Delete vendor/github.com/ from VC.
Jun 11, 2018
d2d37e8
Fix db falcon_portal password in configuration.
Jun 14, 2018
0e36eac
Append upgrade note for support multiple-metrics extend expression ve…
Jun 14, 2018
bb53982
Fix db falcon_portal password in configuration.
Jun 14, 2018
dd49f4a
Fix db falcon_portal password in configuration.
Jun 14, 2018
2829f10
Intro new model EExpression;
Jun 14, 2018
05f8cbe
Intro new model EExpression;
Jun 14, 2018
a507ad7
Add new RPC method for forwarding EMetric data.
Jun 14, 2018
5c6f9a1
Add new RPC method for forwarding EMetric data.
Jun 14, 2018
012e303
Intro new EMetric and EExpression models and support multiple-metrics…
Jun 14, 2018
262805a
Merge branch 'master' of github.com:shuge/falcon-plus
Jun 14, 2018
43182a9
Use the term EExp instead of EExpression.
Jun 24, 2018
72af697
Support multiple-metrics extend expression.
Jun 25, 2018
92d2ff1
Support multiple-metrics extend expression.
Jun 25, 2018
5c0ee67
Support multiple-metrics extend expression.
Jun 25, 2018
302abb3
Support multiple-metrics extend expression.
Jun 25, 2018
2a04fbd
Mute log.
Jun 26, 2018
7fcfa2a
Fix type.
Jul 10, 2018
b71100c
Use falcon as db default username, password and dbn.
Jul 11, 2018
c05eae3
Disable stringMatcher by default.
Jul 11, 2018
1e1b986
Add MMEE mysql DB schemas.
Jul 11, 2018
43bf957
Delete 7_portal-db-schema-extra.sql.
Jul 11, 2018
963e064
Support redis password.
Jul 11, 2018
549d4f1
Support redis password.
Jul 11, 2018
ced89e1
Fix parse EExp.
Jul 11, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion config/agent.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
},
"heartbeat": {
"enabled": true,
"addr": "%%HBS_RPC%%",
"addrs": [
"%%HBS_RPC%%"
],
"interval": 60,
"timeout": 1000
},
Expand Down
4 changes: 3 additions & 1 deletion modules/agent/cfg.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
},
"heartbeat": {
"enabled": true,
"addr": "127.0.0.1:6030",
"addrs": [
"127.0.0.1:6030"
],
"interval": 60,
"timeout": 1000
},
Expand Down
7 changes: 4 additions & 3 deletions modules/agent/cron/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@
package cron

import (
"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
"log"
"strconv"
"strings"
"time"

"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
)

func SyncBuiltinMetrics() {
if g.Config().Heartbeat.Enabled && g.Config().Heartbeat.Addr != "" {
if g.Config().Heartbeat.Enabled && len(g.Config().Heartbeat.Addrs) > 0 {
go syncBuiltinMetrics()
}
}
Expand Down
7 changes: 4 additions & 3 deletions modules/agent/cron/ips.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
package cron

import (
"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
"log"
"time"

"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
)

func SyncTrustableIps() {
if g.Config().Heartbeat.Enabled && g.Config().Heartbeat.Addr != "" {
if g.Config().Heartbeat.Enabled && len(g.Config().Heartbeat.Addrs) != 0 {
go syncTrustableIps()
}
}
Expand Down
9 changes: 5 additions & 4 deletions modules/agent/cron/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
package cron

import (
"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
"github.com/open-falcon/falcon-plus/modules/agent/plugins"
"log"
"strings"
"time"

"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
"github.com/open-falcon/falcon-plus/modules/agent/plugins"
)

func SyncMinePlugins() {
Expand All @@ -32,7 +33,7 @@ func SyncMinePlugins() {
return
}

if g.Config().Heartbeat.Addr == "" {
if len(g.Config().Heartbeat.Addrs) == 0 {
return
}

Expand Down
7 changes: 4 additions & 3 deletions modules/agent/cron/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ package cron

import (
"fmt"
"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
"log"
"time"

"github.com/open-falcon/falcon-plus/common/model"
"github.com/open-falcon/falcon-plus/modules/agent/g"
)

func ReportAgentStatus() {
if g.Config().Heartbeat.Enabled && g.Config().Heartbeat.Addr != "" {
if g.Config().Heartbeat.Enabled && len(g.Config().Heartbeat.Addrs) != 0 {
go reportAgentStatus(time.Duration(g.Config().Heartbeat.Interval) * time.Second)
}
}
Expand Down
8 changes: 4 additions & 4 deletions modules/agent/g/cfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ type PluginConfig struct {
}

type HeartbeatConfig struct {
Enabled bool `json:"enabled"`
Addr string `json:"addr"`
Interval int `json:"interval"`
Timeout int `json:"timeout"`
Enabled bool `json:"enabled"`
Addrs []string `json:"addrs"`
Interval int `json:"interval"`
Timeout int `json:"timeout"`
}

type TransferConfig struct {
Expand Down
34 changes: 19 additions & 15 deletions modules/agent/g/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import (

type SingleConnRpcClient struct {
sync.Mutex
rpcClient *rpc.Client
RpcServer string
Timeout time.Duration
rpcClient *rpc.Client
RpcServers []string
Timeout time.Duration
}

func (this *SingleConnRpcClient) close() {
Expand All @@ -44,25 +44,29 @@ func (this *SingleConnRpcClient) serverConn() error {
}

var err error
var retry int = 1
var retry int

for {
if this.rpcClient != nil {
return nil
}
for _, addr := range this.RpcServers {
retry = 1

this.rpcClient, err = net.JsonRpcClient("tcp", this.RpcServer, this.Timeout)
RETRY:
this.rpcClient, err = net.JsonRpcClient("tcp", addr, this.Timeout)
if err != nil {
log.Printf("dial %s fail: %v", this.RpcServer, err)
log.Println("net.JsonRpcClient failed", err)
if retry > 3 {
return err
continue
}

time.Sleep(time.Duration(math.Pow(2.0, float64(retry))) * time.Second)
retry++
continue
goto RETRY
}
return err
log.Println("connected RPC server", addr)

return nil
}

return errors.New("connect to RPC servers failed")
}

func (this *SingleConnRpcClient) Call(method string, args interface{}, reply interface{}) error {
Expand All @@ -85,9 +89,9 @@ func (this *SingleConnRpcClient) Call(method string, args interface{}, reply int

select {
case <-time.After(timeout):
log.Printf("[WARN] rpc call timeout %v => %v", this.rpcClient, this.RpcServer)
log.Printf("[WARN] rpc call timeout %v => %v", this.rpcClient, this.RpcServers)
this.close()
return errors.New(this.RpcServer + " rpc call timeout")
return errors.New("rpc call timeout")
case err := <-done:
if err != nil {
this.close()
Expand Down
7 changes: 5 additions & 2 deletions modules/agent/g/transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ func SendMetrics(metrics []*model.MetricValue, resp *model.TransferResponse) {
}

func initTransferClient(addr string) *SingleConnRpcClient {
addrs := []string{
addr,
}
var c *SingleConnRpcClient = &SingleConnRpcClient{
RpcServer: addr,
Timeout: time.Duration(Config().Transfer.Timeout) * time.Millisecond,
RpcServers: addrs,
Timeout: time.Duration(Config().Transfer.Timeout) * time.Millisecond,
}
TransferClientsLock.Lock()
defer TransferClientsLock.Unlock()
Expand Down
28 changes: 17 additions & 11 deletions modules/agent/g/var.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@ package g

import (
"bytes"
"github.com/open-falcon/falcon-plus/common/model"
"github.com/toolkits/slice"
"fmt"
"log"
"net"
"os"
"strings"
"sync"
"time"

"github.com/open-falcon/falcon-plus/common/model"
"github.com/toolkits/slice"
)

var Root string
Expand All @@ -40,12 +42,15 @@ var LocalIp string

func InitLocalIp() {
if Config().Heartbeat.Enabled {
conn, err := net.DialTimeout("tcp", Config().Heartbeat.Addr, time.Second*10)
if err != nil {
log.Println("get local addr failed !")
} else {
LocalIp = strings.Split(conn.LocalAddr().String(), ":")[0]
conn.Close()
for _, addr := range Config().Heartbeat.Addrs {
conn, err := net.DialTimeout("tcp", addr, time.Second*10)
if err != nil {
log.Println(fmt.Sprintf("connect to heartbeat server %s failed", addr))
} else {
defer conn.Close()
LocalIp = strings.Split(conn.LocalAddr().String(), ":")[0]
break
}
}
} else {
log.Println("hearbeat is not enabled, can't get localip")
Expand All @@ -57,12 +62,13 @@ var (
)

func InitRpcClients() {
if Config().Heartbeat.Enabled {
if Config().Heartbeat.Enabled && len(Config().Heartbeat.Addrs) != 0 {
HbsClient = &SingleConnRpcClient{
RpcServer: Config().Heartbeat.Addr,
Timeout: time.Duration(Config().Heartbeat.Timeout) * time.Millisecond,
RpcServers: Config().Heartbeat.Addrs,
Timeout: time.Duration(Config().Heartbeat.Timeout) * time.Millisecond,
}
}

}

func SendToTransfer(metrics []*model.MetricValue) {
Expand Down