[TASK] add no-respondd
This commit is contained in:
parent
94267cf6dd
commit
4faf990845
|
@ -55,6 +55,12 @@ save_interval = "5s"
|
|||
# Set node to offline if not seen within this period
|
||||
offline_after = "10m"
|
||||
|
||||
## Verify if node is really down by ping last seen address of node
|
||||
# send x pings to verify if node is offline (for disable set count < 1)
|
||||
ping_count = 3
|
||||
# timeout of sending ping to a node
|
||||
ping_timeout = "1s"
|
||||
|
||||
|
||||
## [[nodes.output.example]]
|
||||
# Each output format has its own config block and needs to be enabled by adding:
|
||||
|
|
|
@ -36,6 +36,7 @@ func (c *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, s
|
|||
func GlobalStatsFields(name string, stats *runtime.GlobalStats) []graphigo.Metric {
|
||||
return []graphigo.Metric{
|
||||
{Name: name + ".nodes", Value: stats.Nodes},
|
||||
{Name: name + ".nodes.no_respondd", Value: stats.NodesNoRespondd},
|
||||
{Name: name + ".gateways", Value: stats.Gateways},
|
||||
{Name: name + ".clients.total", Value: stats.Clients},
|
||||
{Name: name + ".clients.wifi", Value: stats.ClientsWifi},
|
||||
|
|
|
@ -43,6 +43,7 @@ func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time
|
|||
func GlobalStatsFields(stats *runtime.GlobalStats) map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"nodes": stats.Nodes,
|
||||
"nodes.no_respondd": stats.NodesNoRespondd,
|
||||
"gateways": stats.Gateways,
|
||||
"clients.total": stats.Clients,
|
||||
"clients.wifi": stats.ClientsWifi,
|
||||
|
|
|
@ -50,7 +50,7 @@ func (conn *Connection) InsertLink(link *runtime.Link, time time.Time) {
|
|||
}
|
||||
|
||||
func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, site string, domain string) {
|
||||
conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, ", clients: ", stats.Clients, " models: ", len(stats.Models))
|
||||
conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, " (no respondd: ", stats.NodesNoRespondd, "), clients: ", stats.Clients, " models: ", len(stats.Models))
|
||||
}
|
||||
|
||||
func (conn *Connection) PruneNodes(deleteAfter time.Duration) {
|
||||
|
|
|
@ -203,6 +203,8 @@ state_path = "/var/lib/yanic/state.json"
|
|||
prune_after = "7d"
|
||||
save_interval = "5s"
|
||||
offline_after = "10m"
|
||||
ping_count = 3
|
||||
ping_timeout = "1s"
|
||||
```
|
||||
{% endmethod %}
|
||||
|
||||
|
@ -246,6 +248,26 @@ offline_after = "10m"
|
|||
```
|
||||
{% endmethod %}
|
||||
|
||||
### ping_count
|
||||
{% method %}
|
||||
Verify if node is really down by ping last seen address of node
|
||||
send x pings to verify if node is offline (for disable set count < 1)
|
||||
{% sample lang="toml" %}
|
||||
```toml
|
||||
ping_count = 3
|
||||
```
|
||||
{% endmethod %}
|
||||
|
||||
|
||||
### ping_timeout
|
||||
{% method %}
|
||||
Timeout of sending ping to a node
|
||||
{% sample lang="toml" %}
|
||||
```toml
|
||||
ping_timeout = "1s"
|
||||
```
|
||||
{% endmethod %}
|
||||
|
||||
|
||||
## [[nodes.output.example]]
|
||||
{% method %}
|
||||
|
|
|
@ -13,6 +13,7 @@ type Node struct {
|
|||
Firstseen jsontime.Time `json:"firstseen"`
|
||||
Lastseen jsontime.Time `json:"lastseen"`
|
||||
Online bool `json:"online"`
|
||||
NoRespondd bool `json:"-"`
|
||||
Statistics *data.Statistics `json:"statistics"`
|
||||
Nodeinfo *data.Nodeinfo `json:"nodeinfo"`
|
||||
Neighbours *data.Neighbours `json:"-"`
|
||||
|
|
|
@ -176,7 +176,16 @@ func (nodes *Nodes) expire() {
|
|||
delete(nodes.List, id)
|
||||
} else if node.Lastseen.Before(offlineAfter) {
|
||||
// set to offline
|
||||
if nodes.config.PingCount > 0 && nodes.ping(node) {
|
||||
node.Online = true
|
||||
node.NoRespondd = true
|
||||
|
||||
node.Statistics = nil
|
||||
node.Neighbours = nil
|
||||
} else {
|
||||
node.Online = false
|
||||
node.NoRespondd = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,5 +7,7 @@ type NodesConfig struct {
|
|||
SaveInterval duration.Duration `toml:"save_interval"` // Save nodes periodically
|
||||
OfflineAfter duration.Duration `toml:"offline_after"` // Set node to offline if not seen within this period
|
||||
PruneAfter duration.Duration `toml:"prune_after"` // Remove nodes after n days of inactivity
|
||||
PingCount int `toml:"ping_count"` // send x pings to verify if node is offline (for disable count < 1)
|
||||
PingTimeout duration.Duration `toml:"ping_timeout"` // timeout of sending ping to a node
|
||||
Output map[string]interface{}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/bdlm/log"
|
||||
"github.com/sparrc/go-ping"
|
||||
)
|
||||
|
||||
func (nodes *Nodes) ping(node *Node) bool {
|
||||
logNode := log.WithField("node_id", "unknown")
|
||||
if node.Nodeinfo != nil {
|
||||
logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID)
|
||||
}
|
||||
if node.Address == nil {
|
||||
logNode.Debug("error no address found")
|
||||
return false
|
||||
}
|
||||
addr := node.Address.IP.String()
|
||||
if node.Address.IP.IsLinkLocalUnicast() {
|
||||
addr += "%" + node.Address.Zone
|
||||
}
|
||||
|
||||
logAddr := logNode.WithField("addr", addr)
|
||||
|
||||
pinger, err := ping.NewPinger(addr)
|
||||
if err != nil {
|
||||
logAddr.Debugf("error during ping: %s", err)
|
||||
return false
|
||||
}
|
||||
//pinger.SetPrivileged(true)
|
||||
pinger.Count = nodes.config.PingCount
|
||||
pinger.Timeout = nodes.config.PingTimeout.Duration
|
||||
pinger.Run() // blocks until finished
|
||||
stats := pinger.Statistics()
|
||||
logAddr.WithFields(map[string]interface{}{
|
||||
"pkg_lost": stats.PacketLoss,
|
||||
}).Debug("pong")
|
||||
return stats.PacketLoss < 100
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package runtime
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/bdlm/log"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/FreifunkBremen/yanic/data"
|
||||
)
|
||||
|
||||
func TestPing(t *testing.T) {
|
||||
log.SetLevel(log.DebugLevel)
|
||||
|
||||
assert := assert.New(t)
|
||||
config := &NodesConfig{
|
||||
PingCount: 1,
|
||||
}
|
||||
config.OfflineAfter.Duration = time.Minute * 10
|
||||
// to get default (100%) path of testing
|
||||
// config.PruneAfter.Duration = time.Hour * 24 * 6
|
||||
nodes := &Nodes{
|
||||
config: config,
|
||||
List: make(map[string]*Node),
|
||||
ifaceToNodeID: make(map[string]string),
|
||||
}
|
||||
|
||||
node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{NodeID: "nodeID-Lola"}})
|
||||
node.Address = &net.UDPAddr{Zone: "bat0"}
|
||||
// error during ping
|
||||
assert.False(nodes.ping(node))
|
||||
|
||||
node.Address.IP = net.ParseIP("fe80::1")
|
||||
// error during ping
|
||||
assert.False(nodes.ping(node))
|
||||
}
|
|
@ -17,6 +17,7 @@ type GlobalStats struct {
|
|||
ClientsWifi5 uint32
|
||||
Gateways uint32
|
||||
Nodes uint32
|
||||
NodesNoRespondd uint32
|
||||
|
||||
Firmwares CounterMap
|
||||
Models CounterMap
|
||||
|
@ -81,6 +82,9 @@ func (s *GlobalStats) Add(node *Node) {
|
|||
s.ClientsWifi5 += stats.Clients.Wifi5
|
||||
s.ClientsWifi += stats.Clients.Wifi
|
||||
}
|
||||
if node.NoRespondd {
|
||||
s.NodesNoRespondd++
|
||||
}
|
||||
if node.IsGateway() {
|
||||
s.Gateways++
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ func TestGlobalStats(t *testing.T) {
|
|||
//check GLOBAL_SITE stats
|
||||
assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Gateways)
|
||||
assert.EqualValues(3, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Nodes)
|
||||
assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].NodesNoRespondd)
|
||||
assert.EqualValues(25, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Clients)
|
||||
|
||||
// check models
|
||||
|
@ -99,6 +100,7 @@ func createTestNodes() *Nodes {
|
|||
|
||||
nodes.AddNode(&Node{
|
||||
Online: true,
|
||||
NoRespondd: true,
|
||||
Statistics: &data.Statistics{
|
||||
Clients: data.Clients{
|
||||
Total: 2,
|
||||
|
|
Loading…
Reference in New Issue