From d7402fb5467b74053dfeb269e36d85228fbe4287 Mon Sep 17 00:00:00 2001 From: genofire Date: Sat, 9 Jul 2022 20:20:48 +0200 Subject: [PATCH] add prometheus exporter --- cmd/serve.go | 24 ++-- config_example.toml | 9 ++ webserver/config.go | 9 +- webserver/prometheus/config.go | 31 +++++ webserver/prometheus/exporter.go | 124 ++++++++++++++++++ webserver/prometheus/metric.go | 38 ++++++ webserver/prometheus/metric_test.go | 73 +++++++++++ webserver/prometheus/transform.go | 166 +++++++++++++++++++++++++ webserver/prometheus/transform_test.go | 124 ++++++++++++++++++ webserver/webserver.go | 6 +- webserver/webserver_test.go | 7 +- 11 files changed, 597 insertions(+), 14 deletions(-) create mode 100644 webserver/prometheus/config.go create mode 100644 webserver/prometheus/exporter.go create mode 100644 webserver/prometheus/metric.go create mode 100644 webserver/prometheus/metric_test.go create mode 100644 webserver/prometheus/transform.go create mode 100644 webserver/prometheus/transform_test.go diff --git a/cmd/serve.go b/cmd/serve.go index 0fd283d..9c601a6 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -14,6 +14,7 @@ import ( "github.com/FreifunkBremen/yanic/respond" "github.com/FreifunkBremen/yanic/runtime" "github.com/FreifunkBremen/yanic/webserver" + "github.com/FreifunkBremen/yanic/webserver/prometheus" ) // serveCmd represents the serve command @@ -39,13 +40,6 @@ var serveCmd = &cobra.Command{ } defer allOutput.Close() - if config.Webserver.Enable { - log.Infof("starting webserver on %s", config.Webserver.Bind) - srv := webserver.New(config.Webserver.Bind, config.Webserver.Webroot) - go webserver.Start(srv) - defer srv.Close() - } - if config.Respondd.Enable { // Delaying startup to start at a multiple of `duration` since the zero time. if duration := config.Respondd.Synchronize.Duration; duration > 0 { @@ -60,6 +54,22 @@ var serveCmd = &cobra.Command{ defer collector.Close() } + if config.Webserver.Enable { + log.Infof("starting webserver on %s", config.Webserver.Bind) + srv := webserver.New(config.Webserver) + go webserver.Start(srv) + if config.Webserver.Prometheus.Enable { + if config.Respondd.Enable { + prometheus.CreateExporter(config.Webserver.Prometheus, srv, collector, nodes) + } else { + log.Panic("to use prometheus exporter, please enable [respondd].") + } + } + defer srv.Close() + } else if config.Webserver.Prometheus.Enable { + log.Panic("to use prometheus exporter, please enable [webserver].") + } + // Wait for INT/TERM sigs := make(chan os.Signal, 1) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) diff --git a/config_example.toml b/config_example.toml index 8c1604e..05482e5 100644 --- a/config_example.toml +++ b/config_example.toml @@ -54,6 +54,15 @@ enable = false bind = "127.0.0.1:8080" webroot = "/var/www/html/meshviewer" +[webserver.prometheus] +# need webserver and respondd enabled (the exporter used the first interface +enable = false +# how long to wait till answer are calulated +wait = "1s" +# how old values are allowed +outdated = "2s" + + [nodes] # Cache file diff --git a/webserver/config.go b/webserver/config.go index 6edca0b..4998caa 100644 --- a/webserver/config.go +++ b/webserver/config.go @@ -1,7 +1,10 @@ package webserver +import "github.com/FreifunkBremen/yanic/webserver/prometheus" + type Config struct { - Enable bool `toml:"enable"` - Bind string `toml:"bind"` - Webroot string `toml:"webroot"` + Enable bool `toml:"enable"` + Bind string `toml:"bind"` + Webroot string `toml:"webroot"` + Prometheus prometheus.Config `toml:"prometheus"` } diff --git a/webserver/prometheus/config.go b/webserver/prometheus/config.go new file mode 100644 index 0000000..ae0ee1e --- /dev/null +++ b/webserver/prometheus/config.go @@ -0,0 +1,31 @@ +package prometheus + +import ( + "net/http" + + "github.com/FreifunkBremen/yanic/lib/duration" + + "github.com/FreifunkBremen/yanic/respond" + "github.com/FreifunkBremen/yanic/runtime" +) + +type Config struct { + Enable bool `toml:"enable"` + Wait duration.Duration `toml:"wait"` + Outdated duration.Duration `toml:"outdated"` +} + +func CreateExporter(config Config, srv *http.Server, coll *respond.Collector, nodes *runtime.Nodes) { + mux := http.NewServeMux() + ex := &Exporter{ + config: config, + srv: srv, + coll: coll, + nodes: nodes, + } + mux.Handle("/metric", ex) + if srv.Handler != nil { + mux.Handle("/", srv.Handler) + } + srv.Handler = mux +} diff --git a/webserver/prometheus/exporter.go b/webserver/prometheus/exporter.go new file mode 100644 index 0000000..2421e61 --- /dev/null +++ b/webserver/prometheus/exporter.go @@ -0,0 +1,124 @@ +package prometheus + +import ( + "io" + "net" + "net/http" + "time" + + "github.com/bdlm/log" + + "github.com/FreifunkBremen/yanic/respond" + "github.com/FreifunkBremen/yanic/runtime" +) + +type Exporter struct { + config Config + srv *http.Server + coll *respond.Collector + nodes *runtime.Nodes +} + +func (ex *Exporter) ServeHTTP(res http.ResponseWriter, req *http.Request) { + var ip net.IP + nodeID := "" + + queryValues := req.URL.Query() + + if nodeIDs := queryValues["node_id"]; len(nodeIDs) > 0 { + nodeID = nodeIDs[0] + node, ok := ex.nodes.List[nodeID] + if !ok || node.Address == nil { + http.Error(res, "not able to get node by cached nodeid", http.StatusNotFound) + return + } + ip = node.Address.IP + if ex.writeNode(res, node, true) { + log.WithFields(map[string]interface{}{ + "ip": ip, + "node_id": nodeID, + }).Debug("take node from cache") + return + } + } else if ipstr := queryValues["ip"]; len(ipstr) > 0 { + ip = net.ParseIP(ipstr[0]) + if ip == nil { + http.Error(res, "not able to parse ip address", http.StatusBadRequest) + return + } + node_select := ex.nodes.Select(func(n *runtime.Node) bool { + n_addr := n.Address + nodeID = n.Nodeinfo.NodeID + return n_addr != nil && ip.Equal(n_addr.IP) + }) + if len(node_select) == 1 { + if ex.writeNode(res, node_select[0], true) { + log.WithFields(map[string]interface{}{ + "ip": ip, + "node_id": nodeID, + }).Debug("take node from cache") + return + } + } else if len(node_select) > 1 { + log.Error("strange count of nodes") + } + } else { + http.Error(res, "please request with ?ip= or ?node_id=", http.StatusNotFound) + return + } + + // send request + ex.coll.SendPacket(ip) + + // wait + log.WithFields(map[string]interface{}{ + "ip": ip, + "node_id": nodeID, + }).Debug("waited for") + time.Sleep(ex.config.Wait.Duration) + + // result + node, ok := ex.nodes.List[nodeID] + if !ok { + http.Error(res, "not able to fetch this node", http.StatusGatewayTimeout) + return + } + ex.writeNode(res, node, false) +} + +func (ex *Exporter) writeNode(res http.ResponseWriter, node *runtime.Node, dry bool) bool { + logger := log.WithField("database", "prometheus") + if nodeinfo := node.Nodeinfo; nodeinfo != nil { + logger = logger.WithField("node_id", nodeinfo.NodeID) + } + + if !time.Now().Before(node.Lastseen.GetTime().Add(ex.config.Outdated.Duration)) { + if dry { + return false + } + m := Metric{Labels: MetricLabelsFromNode(node), Name: "yanic_node_up", Value: 0} + str, err := m.String() + if err == nil { + _, err = io.WriteString(res, str+"\n") + } + if err != nil { + logger.Warnf("not able to get metrics from node: %s", err) + http.Error(res, "not able to generate metric from node", http.StatusInternalServerError) + } + return false + } + + metrics := MetricsFromNode(ex.nodes, node) + for _, m := range metrics { + str, err := m.String() + if err == nil { + _, err = io.WriteString(res, str+"\n") + } + if err != nil { + logger.Warnf("not able to get metrics from node: %s", err) + http.Error(res, "not able to generate metric from node", http.StatusInternalServerError) + } + } + + return true +} diff --git a/webserver/prometheus/metric.go b/webserver/prometheus/metric.go new file mode 100644 index 0000000..9b06dc9 --- /dev/null +++ b/webserver/prometheus/metric.go @@ -0,0 +1,38 @@ +package prometheus + +import ( + "errors" + "fmt" + "strings" +) + +type Metric struct { + Name string + Value interface{} + Labels map[string]interface{} +} + +func (m *Metric) String() (string, error) { + if m.Value == nil { + return "", errors.New("no value of metric found") + } + output := m.Name + if len(m.Labels) > 0 { + output += "{" + for label, v := range m.Labels { + switch value := v.(type) { + case string: + output = fmt.Sprintf("%s%s=\"%s\",", output, label, strings.ReplaceAll(value, "\"", "'")) + case float32: + output = fmt.Sprintf("%s%s=\"%.4f\",", output, label, value) + case float64: + output = fmt.Sprintf("%s%s=\"%.4f\",", output, label, value) + default: + output = fmt.Sprintf("%s%s=\"%v\",", output, label, value) + } + } + lastChar := len(output) - 1 + output = output[:lastChar] + "}" + } + return fmt.Sprintf("%s %v", output, m.Value), nil +} diff --git a/webserver/prometheus/metric_test.go b/webserver/prometheus/metric_test.go new file mode 100644 index 0000000..8ebef7a --- /dev/null +++ b/webserver/prometheus/metric_test.go @@ -0,0 +1,73 @@ +package prometheus + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestMetric(t *testing.T) { + assert := assert.New(t) + + var tests = []struct { + input Metric + err string + output []string + }{ + { + input: Metric{Name: "test1"}, + err: "no value of metric found", + }, + { + input: Metric{Name: "test2", Value: 3}, + output: []string{"test2 3"}, + }, + { + input: Metric{Name: "test2-obj", Value: 1, + Labels: map[string]interface{}{ + "test": []string{"4"}, + }, + }, + output: []string{`test2-obj{test="[4]"} 1`}, + }, + { + input: Metric{Name: "test3", Value: 3.2, + Labels: map[string]interface{}{ + "site_code": "lola", + }, + }, + output: []string{`test3{site_code="lola"} 3.2`}, + }, + { + input: Metric{Name: "test4", Value: "0", + Labels: map[string]interface{}{ + "frequency": float32(3.2), + }, + }, + output: []string{`test4{frequency="3.2000"} 0`}, + }, + { + input: Metric{Name: "test5", Value: 3, + Labels: map[string]interface{}{ + "node_id": "lola", + "blub": 3.3423533, + }, + }, + output: []string{ + `test5{blub="3.3424",node_id="lola"} 3`, + `test5{node_id="lola",blub="3.3424"} 3`, + }, + }, + } + + for _, test := range tests { + output, err := test.input.String() + + if test.err == "" { + assert.NoError(err) + assert.Contains(test.output, output, "not acceptable output found") + } else { + assert.EqualError(err, test.err) + } + } +} diff --git a/webserver/prometheus/transform.go b/webserver/prometheus/transform.go new file mode 100644 index 0000000..fe3406c --- /dev/null +++ b/webserver/prometheus/transform.go @@ -0,0 +1,166 @@ +package prometheus + +import ( + "github.com/FreifunkBremen/yanic/runtime" +) + +func MetricLabelsFromNode(node *runtime.Node) (labels map[string]interface{}) { + labels = make(map[string]interface{}) + + nodeinfo := node.Nodeinfo + if nodeinfo == nil { + return + } + + labels["node_id"] = nodeinfo.NodeID + labels["hostname"] = nodeinfo.Hostname + + if nodeinfo.System.SiteCode != "" { + labels["site_code"] = nodeinfo.System.SiteCode + } + if nodeinfo.System.DomainCode != "" { + labels["domain_code"] = nodeinfo.System.DomainCode + } + if owner := nodeinfo.Owner; owner != nil { + labels["owner"] = owner.Contact + } + // Hardware + labels["model"] = nodeinfo.Hardware.Model + labels["nproc"] = nodeinfo.Hardware.Nproc + if firmware := nodeinfo.Software.Firmware; firmware != nil { + labels["firmware_base"] = firmware.Base + labels["firmware_release"] = firmware.Release + } + if nodeinfo.Software.Autoupdater != nil && nodeinfo.Software.Autoupdater.Enabled { + labels["autoupdater"] = nodeinfo.Software.Autoupdater.Branch + } else { + labels["autoupdater"] = runtime.DISABLED_AUTOUPDATER + } + + if location := nodeinfo.Location; location != nil { + labels["location_lat"] = location.Latitude + labels["location_long"] = location.Longitude + } + + return +} + +func MetricsFromNode(nodes *runtime.Nodes, node *runtime.Node) []Metric { + m := []Metric{} + + // before node metrics to get link statics undependent of node validation + for _, link := range nodes.NodeLinks(node) { + label := map[string]interface{}{ + "source_id": link.SourceID, + "source_addr": link.SourceAddress, + "source_hostname": link.SourceHostname, + "target_id": link.TargetID, + "target_addr": link.TargetAddress, + } + if hostname := link.SourceHostname; hostname != "" { + label["source_hostname"] = hostname + } + if hostname := link.TargetHostname; hostname != "" { + label["target_hostname"] = hostname + } + m = append(m, Metric{ + Labels: label, + Name: "yanic_link", + Value: link.TQ * 100, + }) + } + + nodeinfo := node.Nodeinfo + stats := node.Statistics + + // validation + if nodeinfo == nil || stats == nil { + return m + } + + labels := MetricLabelsFromNode(node) + + addMetric := func(name string, value interface{}) { + m = append(m, Metric{Labels: labels, Name: "yanic_" + name, Value: value}) + } + + if node.Online { + addMetric("node_up", 1) + } else { + addMetric("node_up", 0) + } + + addMetric("node_load", stats.LoadAverage) + + addMetric("node_time_up", stats.Uptime) + addMetric("node_time_idle", stats.Idletime) + + addMetric("node_proc_running", stats.Processes.Running) + + addMetric("node_clients_wifi", stats.Clients.Wifi) + addMetric("node_clients_wifi24", stats.Clients.Wifi24) + addMetric("node_clients_wifi5", stats.Clients.Wifi5) + addMetric("node_clients_total", stats.Clients.Total) + + addMetric("node_memory_buffers", stats.Memory.Buffers) + addMetric("node_memory_cached", stats.Memory.Cached) + addMetric("node_memory_free", stats.Memory.Free) + addMetric("node_memory_total", stats.Memory.Total) + addMetric("node_memory_available", stats.Memory.Available) + + //TODO Neighbours count after merging improvement in influxdb and graphite + + if procstat := stats.ProcStats; procstat != nil { + addMetric("node_stat_cpu_user", procstat.CPU.User) + addMetric("node_stat_cpu_nice", procstat.CPU.Nice) + addMetric("node_stat_cpu_system", procstat.CPU.System) + addMetric("node_stat_cpu_idle", procstat.CPU.Idle) + addMetric("node_stat_cpu_iowait", procstat.CPU.IOWait) + addMetric("node_stat_cpu_irq", procstat.CPU.IRQ) + addMetric("node_stat_cpu_softirq", procstat.CPU.SoftIRQ) + addMetric("node_stat_intr", procstat.Intr) + addMetric("node_stat_ctxt", procstat.ContextSwitches) + addMetric("node_stat_softirq", procstat.SoftIRQ) + addMetric("node_stat_processes", procstat.Processes) + } + + if t := stats.Traffic.Rx; t != nil { + addMetric("node_traffic_rx_bytes", t.Bytes) + addMetric("node_traffic_rx_packets", t.Packets) + } + if t := stats.Traffic.Tx; t != nil { + addMetric("node_traffic_tx_bytes", t.Bytes) + addMetric("node_traffic_tx_packets", t.Packets) + addMetric("node_traffic_tx_dropped", t.Dropped) + } + if t := stats.Traffic.Forward; t != nil { + addMetric("node_traffic_forward_bytes", t.Bytes) + addMetric("node_traffic_forward_packets", t.Packets) + } + if t := stats.Traffic.MgmtRx; t != nil { + addMetric("node_traffic_mgmt_rx_bytes", t.Bytes) + addMetric("node_traffic_mgmt_rx_packets", t.Packets) + } + if t := stats.Traffic.MgmtTx; t != nil { + addMetric("node_traffic_mgmt_tx_bytes", t.Bytes) + addMetric("node_traffic_mgmt_tx_packets", t.Packets) + } + + for _, airtime := range stats.Wireless { + labels["frequency_name"] = airtime.FrequencyName() + addMetric("node_frequency", airtime.Frequency) + addMetric("node_airtime_chan_util", airtime.ChanUtil) + addMetric("node_airtime_rx_util", airtime.RxUtil) + addMetric("node_airtime_tx_util", airtime.TxUtil) + addMetric("node_airtime_noise", airtime.Noise) + if wireless := nodeinfo.Wireless; wireless != nil { + if airtime.Frequency < 5000 { + addMetric("node_wireless_txpower", wireless.TxPower24) + } else { + addMetric("node_wireless_txpower", wireless.TxPower5) + } + } + } + + return m +} diff --git a/webserver/prometheus/transform_test.go b/webserver/prometheus/transform_test.go new file mode 100644 index 0000000..42f7ad6 --- /dev/null +++ b/webserver/prometheus/transform_test.go @@ -0,0 +1,124 @@ +package prometheus + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/FreifunkBremen/yanic/data" + "github.com/FreifunkBremen/yanic/runtime" +) + +func TestMetricsFromNode(t *testing.T) { + assert := assert.New(t) + + m := MetricsFromNode(nil, &runtime.Node{}) + assert.Len(m, 0) + + nodes := runtime.NewNodes(&runtime.NodesConfig{}) + nodes.AddNode(&runtime.Node{ + Nodeinfo: &data.Nodeinfo{ + NodeID: "lola", + Network: data.Network{ + Mesh: map[string]*data.NetworkInterface{ + "mesh1": { + Interfaces: struct { + Wireless []string `json:"wireless,omitempty"` + Other []string `json:"other,omitempty"` + Tunnel []string `json:"tunnel,omitempty"` + }{ + Tunnel: []string{"fe80::2"}, + }, + }, + }, + }, + }, + }) + + node := &runtime.Node{ + Online: false, + Nodeinfo: &data.Nodeinfo{ + NodeID: "wasd1", + Network: data.Network{ + Mesh: map[string]*data.NetworkInterface{ + "mesh0": { + Interfaces: struct { + Wireless []string `json:"wireless,omitempty"` + Other []string `json:"other,omitempty"` + Tunnel []string `json:"tunnel,omitempty"` + }{ + Tunnel: []string{"fe80::1"}, + }, + }, + }, + }, + Software: data.Software{ + Autoupdater: &struct { + Enabled bool `json:"enabled,omitempty"` + Branch string `json:"branch,omitempty"` + }{ + Enabled: true, + Branch: "testing", + }, + }, + }, + Statistics: &data.Statistics{}, + Neighbours: &data.Neighbours{ + NodeID: "wasd1", + Babel: map[string]data.BabelNeighbours{ + "mesh0": { + LinkLocalAddress: "fe80::1", + Neighbours: map[string]data.BabelLink{ + "fe80::2": {Cost: 20000}, + }, + }, + }, + }, + } + nodes.AddNode(node) + m = MetricsFromNode(nodes, node) + assert.Len(m, 15) + assert.Equal(m[0].Labels["source_id"], "wasd1") + + m = MetricsFromNode(nil, &runtime.Node{ + Online: true, + Nodeinfo: &data.Nodeinfo{ + NodeID: "wasd", + System: data.System{ + SiteCode: "ffhb", + DomainCode: "city", + }, + Owner: &data.Owner{Contact: "mailto:blub@example.org"}, + Location: &data.Location{ + Latitude: 52.0, + Longitude: 4.0, + }, + Wireless: &data.Wireless{ + TxPower24: 0, + }, + }, + Statistics: &data.Statistics{ + ProcStats: &data.ProcStats{}, + Traffic: struct { + Tx *data.Traffic `json:"tx"` + Rx *data.Traffic `json:"rx"` + Forward *data.Traffic `json:"forward"` + MgmtTx *data.Traffic `json:"mgmt_tx"` + MgmtRx *data.Traffic `json:"mgmt_rx"` + }{ + Tx: &data.Traffic{}, + Rx: &data.Traffic{}, + Forward: &data.Traffic{}, + MgmtTx: &data.Traffic{}, + MgmtRx: &data.Traffic{}, + }, + Wireless: data.WirelessStatistics{ + &data.WirelessAirtime{Frequency: 5002}, + &data.WirelessAirtime{Frequency: 2430}, + }, + }, + }) + + assert.Len(m, 48) + assert.Equal(m[0].Labels["node_id"], "wasd") +} diff --git a/webserver/webserver.go b/webserver/webserver.go index 0a27769..eb0193b 100644 --- a/webserver/webserver.go +++ b/webserver/webserver.go @@ -8,10 +8,10 @@ import ( ) // New creates a new webserver and starts it -func New(bindAddr, webroot string) *http.Server { +func New(config Config) *http.Server { return &http.Server{ - Addr: bindAddr, - Handler: gziphandler.GzipHandler(http.FileServer(http.Dir(webroot))), + Addr: config.Bind, + Handler: gziphandler.GzipHandler(http.FileServer(http.Dir(config.Webroot))), } } diff --git a/webserver/webserver_test.go b/webserver/webserver_test.go index d612799..ceafc2c 100644 --- a/webserver/webserver_test.go +++ b/webserver/webserver_test.go @@ -10,7 +10,12 @@ import ( func TestWebserver(t *testing.T) { assert := assert.New(t) - srv := New(":12345", "/tmp") + config := Config{ + Bind: ":12345", + Webroot: "/tmp", + } + + srv := New(config) assert.NotNil(srv) go Start(srv)