[TASK] add no-respondd

This commit is contained in:
Martin/Geno 2019-01-19 18:45:59 +01:00
parent 94267cf6dd
commit 4faf990845
No known key found for this signature in database
GPG Key ID: 9D7D3C6BFF600C6A
12 changed files with 139 additions and 15 deletions

View File

@ -55,6 +55,12 @@ save_interval = "5s"
# Set node to offline if not seen within this period
offline_after = "10m"
## Verify if node is really down by ping last seen address of node
# send x pings to verify if node is offline (for disable set count < 1)
ping_count = 3
# timeout of sending ping to a node
ping_timeout = "1s"
## [[nodes.output.example]]
# Each output format has its own config block and needs to be enabled by adding:

View File

@ -36,6 +36,7 @@ func (c *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, s
func GlobalStatsFields(name string, stats *runtime.GlobalStats) []graphigo.Metric {
return []graphigo.Metric{
{Name: name + ".nodes", Value: stats.Nodes},
{Name: name + ".nodes.no_respondd", Value: stats.NodesNoRespondd},
{Name: name + ".gateways", Value: stats.Gateways},
{Name: name + ".clients.total", Value: stats.Clients},
{Name: name + ".clients.wifi", Value: stats.ClientsWifi},

View File

@ -42,12 +42,13 @@ func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time
// GlobalStatsFields returns fields for InfluxDB
func GlobalStatsFields(stats *runtime.GlobalStats) map[string]interface{} {
return map[string]interface{}{
"nodes": stats.Nodes,
"gateways": stats.Gateways,
"clients.total": stats.Clients,
"clients.wifi": stats.ClientsWifi,
"clients.wifi24": stats.ClientsWifi24,
"clients.wifi5": stats.ClientsWifi5,
"nodes": stats.Nodes,
"nodes.no_respondd": stats.NodesNoRespondd,
"gateways": stats.Gateways,
"clients.total": stats.Clients,
"clients.wifi": stats.ClientsWifi,
"clients.wifi24": stats.ClientsWifi24,
"clients.wifi5": stats.ClientsWifi5,
}
}

View File

@ -50,7 +50,7 @@ func (conn *Connection) InsertLink(link *runtime.Link, time time.Time) {
}
func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, site string, domain string) {
conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, ", clients: ", stats.Clients, " models: ", len(stats.Models))
conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, " (no respondd: ", stats.NodesNoRespondd, "), clients: ", stats.Clients, " models: ", len(stats.Models))
}
func (conn *Connection) PruneNodes(deleteAfter time.Duration) {

View File

@ -203,6 +203,8 @@ state_path = "/var/lib/yanic/state.json"
prune_after = "7d"
save_interval = "5s"
offline_after = "10m"
ping_count = 3
ping_timeout = "1s"
```
{% endmethod %}
@ -246,6 +248,26 @@ offline_after = "10m"
```
{% endmethod %}
### ping_count
{% method %}
Verify if node is really down by ping last seen address of node
send x pings to verify if node is offline (for disable set count < 1)
{% sample lang="toml" %}
```toml
ping_count = 3
```
{% endmethod %}
### ping_timeout
{% method %}
Timeout of sending ping to a node
{% sample lang="toml" %}
```toml
ping_timeout = "1s"
```
{% endmethod %}
## [[nodes.output.example]]
{% method %}

View File

@ -13,6 +13,7 @@ type Node struct {
Firstseen jsontime.Time `json:"firstseen"`
Lastseen jsontime.Time `json:"lastseen"`
Online bool `json:"online"`
NoRespondd bool `json:"-"`
Statistics *data.Statistics `json:"statistics"`
Nodeinfo *data.Nodeinfo `json:"nodeinfo"`
Neighbours *data.Neighbours `json:"-"`

View File

@ -176,7 +176,16 @@ func (nodes *Nodes) expire() {
delete(nodes.List, id)
} else if node.Lastseen.Before(offlineAfter) {
// set to offline
node.Online = false
if nodes.config.PingCount > 0 && nodes.ping(node) {
node.Online = true
node.NoRespondd = true
node.Statistics = nil
node.Neighbours = nil
} else {
node.Online = false
node.NoRespondd = false
}
}
}
}

View File

@ -7,5 +7,7 @@ type NodesConfig struct {
SaveInterval duration.Duration `toml:"save_interval"` // Save nodes periodically
OfflineAfter duration.Duration `toml:"offline_after"` // Set node to offline if not seen within this period
PruneAfter duration.Duration `toml:"prune_after"` // Remove nodes after n days of inactivity
PingCount int `toml:"ping_count"` // send x pings to verify if node is offline (for disable count < 1)
PingTimeout duration.Duration `toml:"ping_timeout"` // timeout of sending ping to a node
Output map[string]interface{}
}

38
runtime/nodes_ping.go Normal file
View File

@ -0,0 +1,38 @@
package runtime
import (
"github.com/bdlm/log"
"github.com/sparrc/go-ping"
)
func (nodes *Nodes) ping(node *Node) bool {
logNode := log.WithField("node_id", "unknown")
if node.Nodeinfo != nil {
logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID)
}
if node.Address == nil {
logNode.Debug("error no address found")
return false
}
addr := node.Address.IP.String()
if node.Address.IP.IsLinkLocalUnicast() {
addr += "%" + node.Address.Zone
}
logAddr := logNode.WithField("addr", addr)
pinger, err := ping.NewPinger(addr)
if err != nil {
logAddr.Debugf("error during ping: %s", err)
return false
}
//pinger.SetPrivileged(true)
pinger.Count = nodes.config.PingCount
pinger.Timeout = nodes.config.PingTimeout.Duration
pinger.Run() // blocks until finished
stats := pinger.Statistics()
logAddr.WithFields(map[string]interface{}{
"pkg_lost": stats.PacketLoss,
}).Debug("pong")
return stats.PacketLoss < 100
}

View File

@ -0,0 +1,38 @@
package runtime
import (
"net"
"testing"
"time"
"github.com/bdlm/log"
"github.com/stretchr/testify/assert"
"github.com/FreifunkBremen/yanic/data"
)
func TestPing(t *testing.T) {
log.SetLevel(log.DebugLevel)
assert := assert.New(t)
config := &NodesConfig{
PingCount: 1,
}
config.OfflineAfter.Duration = time.Minute * 10
// to get default (100%) path of testing
// config.PruneAfter.Duration = time.Hour * 24 * 6
nodes := &Nodes{
config: config,
List: make(map[string]*Node),
ifaceToNodeID: make(map[string]string),
}
node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{NodeID: "nodeID-Lola"}})
node.Address = &net.UDPAddr{Zone: "bat0"}
// error during ping
assert.False(nodes.ping(node))
node.Address.IP = net.ParseIP("fe80::1")
// error during ping
assert.False(nodes.ping(node))
}

View File

@ -11,12 +11,13 @@ type CounterMap map[string]uint32
// GlobalStats struct
type GlobalStats struct {
Clients uint32
ClientsWifi uint32
ClientsWifi24 uint32
ClientsWifi5 uint32
Gateways uint32
Nodes uint32
Clients uint32
ClientsWifi uint32
ClientsWifi24 uint32
ClientsWifi5 uint32
Gateways uint32
Nodes uint32
NodesNoRespondd uint32
Firmwares CounterMap
Models CounterMap
@ -81,6 +82,9 @@ func (s *GlobalStats) Add(node *Node) {
s.ClientsWifi5 += stats.Clients.Wifi5
s.ClientsWifi += stats.Clients.Wifi
}
if node.NoRespondd {
s.NodesNoRespondd++
}
if node.IsGateway() {
s.Gateways++
}

View File

@ -22,6 +22,7 @@ func TestGlobalStats(t *testing.T) {
//check GLOBAL_SITE stats
assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Gateways)
assert.EqualValues(3, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Nodes)
assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].NodesNoRespondd)
assert.EqualValues(25, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Clients)
// check models
@ -98,7 +99,8 @@ func createTestNodes() *Nodes {
nodes.AddNode(nodeData)
nodes.AddNode(&Node{
Online: true,
Online: true,
NoRespondd: true,
Statistics: &data.Statistics{
Clients: data.Clients{
Total: 2,