Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NETOBSERV-1935: enable metrics from list/nested fields #863

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion apis/flowcollector/v1beta1/flowcollector_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,14 @@ func TestBeta1ConversionRoundtrip_Metrics(t *testing.T) {
err := initial.ConvertTo(&converted)
assert.NoError(err)

expectedDefaultMetrics := []v1beta2.FLPMetric{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total", "namespace_dns_latency_seconds"}
expectedDefaultMetrics := []v1beta2.FLPMetric{
"namespace_egress_packets_total",
"namespace_flows_total",
"namespace_rtt_seconds",
"namespace_drop_packets_total",
"namespace_dns_latency_seconds",
"namespace_network_policy_events_total",
}
assert.Equal([]v1beta2.FLPAlert{v1beta2.AlertLokiError}, converted.Spec.Processor.Metrics.DisableAlerts)
assert.NotNil(converted.Spec.Processor.Metrics.IncludeList)
assert.Equal(expectedDefaultMetrics, *converted.Spec.Processor.Metrics.IncludeList)
Expand Down
5 changes: 3 additions & 2 deletions apis/flowcollector/v1beta2/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ const (
)

// Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
// +kubebuilder:validation:Enum:="namespace_egress_bytes_total";"namespace_egress_packets_total";"namespace_ingress_bytes_total";"namespace_ingress_packets_total";"namespace_flows_total";"node_egress_bytes_total";"node_egress_packets_total";"node_ingress_bytes_total";"node_ingress_packets_total";"node_flows_total";"workload_egress_bytes_total";"workload_egress_packets_total";"workload_ingress_bytes_total";"workload_ingress_packets_total";"workload_flows_total";"namespace_drop_bytes_total";"namespace_drop_packets_total";"node_drop_bytes_total";"node_drop_packets_total";"workload_drop_bytes_total";"workload_drop_packets_total";"namespace_rtt_seconds";"node_rtt_seconds";"workload_rtt_seconds";"namespace_dns_latency_seconds";"node_dns_latency_seconds";"workload_dns_latency_seconds"
// +kubebuilder:validation:Enum:="namespace_egress_bytes_total";"namespace_egress_packets_total";"namespace_ingress_bytes_total";"namespace_ingress_packets_total";"namespace_flows_total";"node_egress_bytes_total";"node_egress_packets_total";"node_ingress_bytes_total";"node_ingress_packets_total";"node_flows_total";"workload_egress_bytes_total";"workload_egress_packets_total";"workload_ingress_bytes_total";"workload_ingress_packets_total";"workload_flows_total";"namespace_drop_bytes_total";"namespace_drop_packets_total";"node_drop_bytes_total";"node_drop_packets_total";"workload_drop_bytes_total";"workload_drop_packets_total";"namespace_rtt_seconds";"node_rtt_seconds";"workload_rtt_seconds";"namespace_dns_latency_seconds";"node_dns_latency_seconds";"workload_dns_latency_seconds";"node_network_policy_events_total";"namespace_network_policy_events_total";"workload_network_policy_events_total"
type FLPMetric string

// `FLPMetrics` define the desired FLP configuration regarding metrics
Expand All @@ -547,7 +547,8 @@ type FLPMetrics struct {
// Metrics enabled by default are:
// `namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
// `workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
// `namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
// `namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
// `namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
// More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
// +optional
IncludeList *[]FLPMetric `json:"includeList,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions apis/flowmetrics/v1alpha1/flowmetric_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ type FlowMetricSpec struct {
// +optional
Labels []string `json:"labels"`

// `flatten` is a list of list-type fields that must be flattened, such as Interfaces and NetworkEvents. Flattened fields generate one metric per item in that field.
// For instance, when flattening `Interfaces` on a bytes counter, a flow having Interfaces [br-ex, ens5] increases one counter for `br-ex` and another for `ens5`.
// +optional
Flatten []string `json:"flatten"`

// Set the `remap` property to use different names for the generated metric labels than the flow fields. Use the origin flow fields as keys, and the desired label names as values.
// +optional
Remap map[string]string `json:"remap"`
Expand Down
29 changes: 19 additions & 10 deletions apis/flowmetrics/v1alpha1/flowmetric_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,17 @@ func checkFlowMetricCartinality(fMetric *FlowMetric) admission.Warnings {
}

func validateFlowMetric(_ context.Context, fMetric *FlowMetric) (admission.Warnings, error) {
var str []string
var fields []string
var allErrs field.ErrorList

for _, f := range fMetric.Spec.Filters {
str = append(str, f.Field)
fields = append(fields, f.Field)
}

if len(str) != 0 {
if !helper.FindFields(str, false) {
allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "filters"), str,
fmt.Sprintf("invalid filter field: %s", str)))
if len(fields) != 0 {
if !helper.FindFields(fields, false) {
allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "filters"), fields,
fmt.Sprintf("invalid filter field: %s", fields)))
}
}

Expand All @@ -100,12 +100,13 @@ func validateFlowMetric(_ context.Context, fMetric *FlowMetric) (admission.Warni
fmt.Sprintf("invalid label name: %s", fMetric.Spec.Labels)))
}

labelsMap := make(map[string]any, len(fMetric.Spec.Labels))
for _, label := range fMetric.Spec.Labels {
labelsMap[label] = nil
}

// Only fields defined as Labels are valid for remapping
if len(fMetric.Spec.Remap) != 0 {
labelsMap := make(map[string]any, len(fMetric.Spec.Labels))
for _, label := range fMetric.Spec.Labels {
labelsMap[label] = nil
}
var invalidMapping []string
for toRemap := range fMetric.Spec.Remap {
if _, ok := labelsMap[toRemap]; !ok {
Expand All @@ -117,6 +118,14 @@ func validateFlowMetric(_ context.Context, fMetric *FlowMetric) (admission.Warni
fmt.Sprintf("some fields defined for remapping are not defined as labels: %v", invalidMapping)))
}
}

// Check for valid fields
if len(fMetric.Spec.Flatten) != 0 {
if !helper.FindFields(fMetric.Spec.Flatten, false) {
allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "flatten"), fMetric.Spec.Flatten,
fmt.Sprintf("invalid fields to flatten: %s", fMetric.Spec.Flatten)))
}
}
}

if fMetric.Spec.ValueField != "" {
Expand Down
21 changes: 21 additions & 0 deletions apis/flowmetrics/v1alpha1/flowmetric_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,27 @@ func TestFlowMetric(t *testing.T) {
},
expectedError: "invalid value field",
},
{
desc: "Valid nested fields",
m: &FlowMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test1",
Namespace: "test-namespace",
},
Spec: FlowMetricSpec{
Labels: []string{"NetworkEvents>Name"},
Flatten: []string{"NetworkEvents"},
Filters: []MetricFilter{
{
Field: "NetworkEvents>Type",
Value: "acl",
},
},
Remap: map[string]string{"NetworkEvents>Name": "name"},
},
},
expectedError: "",
},
}

for _, test := range tests {
Expand Down
5 changes: 5 additions & 0 deletions apis/flowmetrics/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion bundle/manifests/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8378,7 +8378,8 @@ spec:
Metrics enabled by default are:
`namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
`workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
`namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
items:
description: Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
Expand Down Expand Up @@ -8410,6 +8411,9 @@ spec:
- namespace_dns_latency_seconds
- node_dns_latency_seconds
- workload_dns_latency_seconds
- node_network_policy_events_total
- namespace_network_policy_events_total
- workload_network_policy_events_total
type: string
type: array
server:
Expand Down
7 changes: 7 additions & 0 deletions bundle/manifests/flows.netobserv.io_flowmetrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ spec:
- matchType
type: object
type: array
flatten:
description: |-
`flatten` is a list of list-type fields that must be flattened, such as Interfaces and NetworkEvents. Flattened fields generate one metric per item in that field.
For instance, when flattening `Interfaces` on a bytes counter, a flow having Interfaces [br-ex, ens5] increases one counter for `br-ex` and another for `ens5`.
items:
type: string
type: array
labels:
description: |-
`labels` is a list of fields that should be used as Prometheus labels, also known as dimensions.
Expand Down
6 changes: 5 additions & 1 deletion config/crd/bases/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7733,7 +7733,8 @@ spec:
Metrics enabled by default are:
`namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
`workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
`namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
items:
description: Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
Expand Down Expand Up @@ -7765,6 +7766,9 @@ spec:
- namespace_dns_latency_seconds
- node_dns_latency_seconds
- workload_dns_latency_seconds
- node_network_policy_events_total
- namespace_network_policy_events_total
- workload_network_policy_events_total
type: string
type: array
server:
Expand Down
7 changes: 7 additions & 0 deletions config/crd/bases/flows.netobserv.io_flowmetrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,13 @@ spec:
- matchType
type: object
type: array
flatten:
description: |-
`flatten` is a list of list-type fields that must be flattened, such as Interfaces and NetworkEvents. Flattened fields generate one metric per item in that field.
For instance, when flattening `Interfaces` on a bytes counter, a flow having Interfaces [br-ex, ens5] increases one counter for `br-ex` and another for `ens5`.
items:
type: string
type: array
labels:
description: |-
`labels` is a list of fields that should be used as Prometheus labels, also known as dimensions.
Expand Down
21 changes: 21 additions & 0 deletions config/samples/flowmetrics/network-policies.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Count network policy events
# More examples in https://github.com/netobserv/network-observability-operator/tree/main/config/samples/flowmetrics
apiVersion: flows.netobserv.io/v1alpha1
kind: FlowMetric
metadata:
name: network-policy-events
namespace: netobserv
spec:
metricName: network_policy_events_total
type: Counter
labels: [NetworkEvents>Type, NetworkEvents>Namespace, NetworkEvents>Name, NetworkEvents>Action, NetworkEvents>Direction]
filters:
- field: NetworkEvents>Feature
value: acl
flatten: [NetworkEvents]
remap:
"NetworkEvents>Type": type
"NetworkEvents>Namespace": namespace
"NetworkEvents>Name": name
"NetworkEvents>Action": action
"NetworkEvents>Direction": direction
14 changes: 14 additions & 0 deletions config/samples/flowmetrics/per-interface.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Count flows per network interface
# More examples in https://github.com/netobserv/network-observability-operator/tree/main/config/samples/flowmetrics
apiVersion: flows.netobserv.io/v1alpha1
kind: FlowMetric
metadata:
name: per-interface
namespace: netobserv
spec:
metricName: per_interface
type: Counter
labels: [Interfaces]
flatten: [Interfaces]
remap:
Interfaces: interface
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ filters:
component: number
hint: Specify a TCP smoothed Round Trip Time in nanoseconds.
- id: network_events
name: Network events flow monitoring
name: Network Events
component: text
hint: Specify a single network event.
scopes:
Expand Down
1 change: 1 addition & 0 deletions controllers/flp/flp_pipeline_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ func flowMetricToFLP(flowMetric *metricslatest.FlowMetricSpec) (*api.MetricsItem
Filters: []api.MetricsFilter{},
Labels: flowMetric.Labels,
Remap: flowMetric.Remap,
Flatten: flowMetric.Flatten,
ValueKey: flowMetric.ValueField,
}
for _, f := range metrics.GetFilters(flowMetric) {
Expand Down
3 changes: 2 additions & 1 deletion docs/FlowCollector.md
Original file line number Diff line number Diff line change
Expand Up @@ -17179,7 +17179,8 @@ Note that the more metrics you add, the bigger is the impact on Prometheus workl
Metrics enabled by default are:
`namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
`workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
`namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md<br/>
</td>
<td>false</td>
Expand Down
8 changes: 8 additions & 0 deletions docs/FlowMetric.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ be used to eliminate duplicates: `Duplicate != "true"` and `FlowDirection = "0"`
Refer to the documentation for the list of available fields: https://docs.openshift.com/container-platform/latest/observability/network_observability/json-flows-format-reference.html.<br/>
</td>
<td>false</td>
</tr><tr>
<td><b>flatten</b></td>
<td>[]string</td>
<td>
`flatten` is a list of list-type fields that must be flattened, such as Interfaces and NetworkEvents. Flattened fields generate one metric per item in that field.
For instance, when flattening `Interfaces` on a bytes counter, a flow having Interfaces [br-ex, ens5] increases one counter for `br-ex` and another for `ens5`.<br/>
</td>
<td>false</td>
</tr><tr>
<td><b>labels</b></td>
<td>[]string</td>
Expand Down
5 changes: 5 additions & 0 deletions docs/Metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ When the `DNSTracking` feature is enabled in `spec.agent.ebpf.features`, additio
- `node_dns_latency_seconds`
- `workload_dns_latency_seconds` `**`

When the `NetworkEvents` feature is enabled in `spec.agent.ebpf.features`,
- `namespace_network_policy_events_total` `*`
- `node_network_policy_events_total`
- `workload_network_policy_events_total`

## Custom metrics using the FlowMetrics API

The FlowMetrics API ([spec reference](./FlowMetric.md)) has been designed to give you full control on the metrics generation out of the NetObserv' enriched NetFlow data.
Expand Down
4 changes: 2 additions & 2 deletions pkg/dashboards/dashboard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ func TestCreateFlowMetricsDashboard_All(t *testing.T) {

assert.Equal("NetObserv / Main", d.Title)

assert.Equal([]string{"Overview", "Traffic rates", "TCP latencies", "Byte and packet drops", "DNS"}, d.Titles())
assert.Equal([]string{"Overview", "Traffic rates", "TCP latencies", "Byte and packet drops", "DNS", "Network Policy"}, d.Titles())

assert.Len(d.Rows[0].Panels, 16)
assert.Len(d.Rows[0].Panels, 18)
assert.Len(d.Rows[1].Panels, 20)

p := d.FindPanel("Top egress traffic per node")
Expand Down
3 changes: 3 additions & 0 deletions pkg/helper/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ func FindFields(labels []string, isNumber bool) bool {
}

for _, l := range labels {
// Split field for nesting, e.g. "NetworkEvents>Name" (and we don't verify the nested part)
parts := strings.Split(l, ">")
l = parts[0]
if ok := labelMap[l].exists; !ok {
return false
}
Expand Down
43 changes: 43 additions & 0 deletions pkg/metrics/predefined_charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,49 @@ func dnsCharts(group string) []metricslatest.Chart {
}, group, "")...)
}

func netpolCharts(group string) []metricslatest.Chart {
sectionName := "Network Policy"
charts := []metricslatest.Chart{
{
Type: metricslatest.ChartTypeSingleStat,
SectionName: "",
DashboardName: mainDashboard,
Title: "Policy drop rate",
Queries: []metricslatest.Query{{PromQL: `sum(rate($METRIC{action="drop"}[2m]))`}},
},
{
Type: metricslatest.ChartTypeSingleStat,
SectionName: "",
DashboardName: mainDashboard,
Title: "Policy allow rate",
Queries: []metricslatest.Query{{PromQL: `sum(rate($METRIC{action=~"allow.*"}[2m]))`}},
},
}

charts = append(charts,
chartVariantsFor(&metricslatest.Chart{
Type: metricslatest.ChartTypeStackArea,
SectionName: sectionName,
DashboardName: mainDashboard,
Title: "Drop rate",
Queries: []metricslatest.Query{{
PromQL: `sum(rate($METRIC{action="drop",$FILTERS}[2m])) by (type,direction,$LABELS)`,
Legend: "$LEGEND, {{ type }}, {{ direction }}",
}},
}, group, "")...)
return append(charts,
chartVariantsFor(&metricslatest.Chart{
Type: metricslatest.ChartTypeStackArea,
SectionName: sectionName,
DashboardName: mainDashboard,
Title: "Allow rate",
Queries: []metricslatest.Query{{
PromQL: `sum(rate($METRIC{action=~"allow.*",$FILTERS}[2m])) by (type,direction,$LABELS)`,
Legend: "$LEGEND, {{ type }}, {{ direction }}",
}},
}, group, "")...)
}

func chartVariantsFor(chart *metricslatest.Chart, group, unit string) []metricslatest.Chart {
switch group {
case tagNodes:
Expand Down
Loading