[TASK] add respondd-crashed

a tool to find wrong offline detected nodes.
by pinging the nodes which does not answer by respondd
This commit is contained in:
Martin/Geno 2019-01-19 15:45:35 +01:00
parent 94267cf6dd
commit d6875c5a39
No known key found for this signature in database
GPG Key ID: 9D7D3C6BFF600C6A
7 changed files with 325 additions and 0 deletions

View File

@ -0,0 +1,30 @@
# respondd-crashed
This tool ping every "offline" node at every ip address of a meshviewer.json to detect, if a respondd deamon is not running anymore.
## give access to run ping
```bash
sudo setcap cap_net_raw=+ep %GOPATH/bin/respondd-crashed
```
## Usage
Usage of respondd-crashed:
-ll-iface string
interface to ping linklocal-address
-loglevel uint
Show log message starting at level (default 40)
-meshviewer-path string
path to meshviewer.json from yanic (default "meshviewer.json")
-ping-count int
count of pings (default 3)
-ping-timeout duration
timeout to wait for response (default 5s)
-run-every duration
repeat check every (default 1m0s)
-status-path string
path to store status (default "respondd-crashed.json")
-timestamps
Enables timestamps for log output

View File

@ -0,0 +1,24 @@
package main
import (
"encoding/json"
"net/http"
"time"
)
func JSONRequest(url string, value interface{}) error {
var netClient = &http.Client{
Timeout: time.Second * 20,
}
resp, err := netClient.Get(url)
if err != nil {
return err
}
err = json.NewDecoder(resp.Body).Decode(&value)
if err != nil {
return err
}
return nil
}

View File

@ -0,0 +1 @@
package main

View File

@ -0,0 +1,34 @@
package main
import (
"os"
"github.com/bdlm/log"
stdLogger "github.com/bdlm/std/logger"
)
type Hook struct{}
func (hook *Hook) Fire(entry *log.Entry) error {
switch entry.Level {
case log.PanicLevel:
entry.Logger.Out = os.Stderr
case log.FatalLevel:
entry.Logger.Out = os.Stderr
case log.ErrorLevel:
entry.Logger.Out = os.Stderr
case log.WarnLevel:
entry.Logger.Out = os.Stdout
case log.InfoLevel:
entry.Logger.Out = os.Stdout
case log.DebugLevel:
entry.Logger.Out = os.Stdout
default:
}
return nil
}
func (hook *Hook) Levels() []stdLogger.Level {
return log.AllLevels
}

View File

@ -0,0 +1,86 @@
package main
import (
"flag"
"os"
"os/signal"
"sync"
"syscall"
"time"
"github.com/bdlm/log"
stdLogger "github.com/bdlm/std/logger"
"github.com/digineo/go-ping"
)
var (
timestamps bool
loglevel uint
runEvery time.Duration
iface string
pingCount int
pingTimeout time.Duration
meshviewerPATH string
statusPath string
)
func main() {
flag.BoolVar(&timestamps, "timestamps", false, "Enables timestamps for log output")
flag.UintVar(&loglevel, "loglevel", 40, "Show log message starting at level")
flag.DurationVar(&runEvery, "run-every", time.Duration(time.Minute), "repeat check every")
flag.StringVar(&iface, "ll-iface", "", "interface to ping linklocal-address")
flag.IntVar(&pingCount, "ping-count", 3, "count of pings")
flag.DurationVar(&pingTimeout, "ping-timeout", time.Duration(time.Second*5), "timeout to wait for response")
flag.StringVar(&statusPath, "status-path", "respondd-crashed.json", "path to store status")
flag.StringVar(&meshviewerPATH, "meshviewer-path", "meshviewer.json", "path to meshviewer.json from yanic")
flag.Parse()
log.AddHook(&Hook{})
log.SetLevel(stdLogger.Level(loglevel))
log.SetFormatter(&log.TextFormatter{
DisableTimestamp: timestamps,
})
pinger, err := ping.New("", "::")
if err != nil {
log.Panicf("not able to bind pinger: %s", err)
}
timer := time.NewTimer(runEvery)
stop := false
wg := sync.WaitGroup{}
log.Info("start tester")
func() {
wg.Add(1)
for !stop {
select {
case <-timer.C:
run(pinger)
timer.Reset(runEvery)
}
}
timer.Stop()
wg.Done()
}()
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
sig := <-sigs
stop = true
wg.Wait()
log.Infof("stopped: %s", sig)
}

View File

@ -0,0 +1,119 @@
package main
import (
"encoding/json"
"net"
"os"
"sync"
"github.com/bdlm/log"
"github.com/digineo/go-ping"
meshviewerFFRGB "github.com/FreifunkBremen/yanic/output/meshviewer-ffrgb"
)
func pingNode(pinger *ping.Pinger, node *meshviewerFFRGB.Node, addrStr string) bool {
logNode := log.WithField("node_id", node.NodeID)
addr, err := net.ResolveIPAddr("ip6", addrStr)
if err != nil {
logNode.Warnf("error parse ip address for ping: %s", err)
}
if addrStr[:5] == "fe80:" {
if iface == "" {
logNode.Debug("skip ll-addr")
return false
}
addr.Zone = iface
}
logNode = logNode.WithField("addr", addr.String())
_, err = pinger.PingAttempts(addr, pingTimeout, pingCount)
logNode.WithFields(map[string]interface{}{
"success": err == nil,
}).Debug("pong")
return err == nil
}
func run(pinger *ping.Pinger) {
status := &Status{NodesCrashed: []*Node{}}
var meshviewerjson meshviewerFFRGB.Meshviewer
if meshviewerPATH[:4] == "http" {
if err := JSONRequest(meshviewerPATH, &meshviewerjson); err != nil {
status.Error = err.Error()
log.Errorf("error during fetch meshviewer.json: %s", err)
}
} else {
meshviewerFile, err := os.Open(meshviewerPATH)
if err != nil {
status.Error = err.Error()
log.Errorf("error during fetch meshviewer.json: %s", err)
} else if err := json.NewDecoder(meshviewerFile).Decode(&meshviewerjson); err != nil {
status.Error = err.Error()
log.Errorf("error during decode meshviewer.json: %s", err)
}
}
log.Debug("fetched meshviewer.json")
wg := sync.WaitGroup{}
wg.Add(len(meshviewerjson.Nodes))
offline := 0
for _, node := range meshviewerjson.Nodes {
go func(node *meshviewerFFRGB.Node) {
defer wg.Done()
if node.IsOnline {
return
}
logNode := log.WithField("node", node.NodeID)
wgNode := sync.WaitGroup{}
wgNode.Add(len(node.Addresses))
offline += 1
notReachable := true
for _, addr := range node.Addresses {
go func(node *meshviewerFFRGB.Node, addr string) {
if ok := pingNode(pinger, node, addr); ok {
notReachable = false
}
wgNode.Done()
}(node, addr)
}
wgNode.Wait()
if !notReachable {
logNode.Info("add to crashed list")
status.AddNode(node)
}
}(node)
}
wg.Wait()
status.Lock()
status.NodesCount = len(meshviewerjson.Nodes)
status.NodesOfflineCount = offline
status.Unlock()
tmpFile := statusPath + ".tmp"
statusFile, err := os.OpenFile(tmpFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
log.Warnf("unable to open status file: %s", err)
}
defer statusFile.Close()
if err := json.NewEncoder(statusFile).Encode(status); err != nil {
log.Warnf("unable to write status json: %s", err)
}
if err := os.Rename(tmpFile, statusPath); err != nil {
log.Warnf("unable to move status file: %s", err)
}
log.WithFields(map[string]interface{}{
"count_meshviewer": status.NodesCount,
"count_offline": status.NodesOfflineCount,
"count_status": len(status.NodesCrashed),
}).Info("test complete")
}

View File

@ -0,0 +1,31 @@
package main
import (
"sync"
meshviewerFFRGB "github.com/FreifunkBremen/yanic/output/meshviewer-ffrgb"
)
type Node struct {
NodeID string `json:"node_id"`
Hostname string `json:"hostname"`
Addresses []string `json:"addresses"`
}
type Status struct {
Error string `json:"error,omitempty"`
NodesCount int `json:"nodes_count"`
NodesOfflineCount int `json:"nodes_offline_count"`
NodesCrashed []*Node `json:"nodes_crashed"`
sync.Mutex
}
func (s *Status) AddNode(node *meshviewerFFRGB.Node) {
s.Lock()
s.NodesCrashed = append(s.NodesCrashed, &Node{
NodeID: node.NodeID,
Hostname: node.Hostname,
Addresses: node.Addresses,
})
s.Unlock()
}