Skip to content

Commit

Permalink
Add FLP-based deduper options
Browse files Browse the repository at this point in the history
  • Loading branch information
jotak committed Mar 19, 2024
1 parent c1bf987 commit 5e7bbbd
Show file tree
Hide file tree
Showing 12 changed files with 453 additions and 7 deletions.
28 changes: 28 additions & 0 deletions apis/flowcollector/v1beta1/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,13 +505,41 @@ type FlowCollectorFLP struct {
// This feature requires the "topology.kubernetes.io/zone" label to be set on nodes.
AddZone *bool `json:"addZone,omitempty"`

//+optional
// `deduper` allows to sample or drop flows identified as duplicates, in order to save on resource usage.
Deduper *FLPDeduper `json:"deduper,omitempty"`

// `debug` allows setting some aspects of the internal configuration of the flow processor.
// This section is aimed exclusively for debugging and fine-grained performance optimizations,
// such as `GOGC` and `GOMAXPROCS` env vars. Set these values at your own risk.
// +optional
Debug DebugConfig `json:"debug,omitempty"`
}

type FLPDeduperMode string

const (
FLPDeduperDisabled FLPDeduperMode = "Disabled"
FLPDeduperDrop FLPDeduperMode = "Drop"
FLPDeduperSample FLPDeduperMode = "Sample"
)

// `FLPDeduper` defines the desired configuration for FLP-based deduper
type FLPDeduper struct {
// Set the Processor deduper mode (de-duplication). It comes in addition to the Agent deduper because the Agent cannot de-duplicate same flows reported from different nodes.<br>
// - Use `Drop` to drop every flow considered as duplicates, allowing saving more on resource usage but potentially loosing some information such as the network interfaces used from peer.<br>
// - Use `Sample` to randomly keep only 1 flow on 50 (by default) among the ones considered as duplicates. This is a compromise between dropping every duplicates or keeping every duplicates. This sampling action comes in addition to the Agent-based sampling. If both Agent and Processor sampling are 50, the combined sampling is 1:2500.<br>
// - Use `Disabled` to turn off Processor-based de-duplication.<br>
// +kubebuilder:validation:Enum:="Disabled";"Drop";"Sample"
// +kubebuilder:default:=Disabled
Mode FLPDeduperMode `json:"mode,omitempty"`

// `sampling` is the sampling rate when deduper `mode` is `Sample`.
//+kubebuilder:validation:Minimum=0
//+kubebuilder:default:=50
Sampling int32 `json:"sampling,omitempty"`
}

const (
HPAStatusDisabled = "DISABLED"
HPAStatusEnabled = "ENABLED"
Expand Down
34 changes: 34 additions & 0 deletions apis/flowcollector/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions apis/flowcollector/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions apis/flowcollector/v1beta2/flowcollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,13 +462,41 @@ type FlowCollectorFLP struct {
// This feature requires the "topology.kubernetes.io/zone" label to be set on nodes.
AddZone *bool `json:"addZone,omitempty"`

//+optional
// `deduper` allows to sample or drop flows identified as duplicates, in order to save on resource usage.
Deduper *FLPDeduper `json:"deduper,omitempty"`

// `advanced` allows setting some aspects of the internal configuration of the flow processor.
// This section is aimed mostly for debugging and fine-grained performance optimizations,
// such as `GOGC` and `GOMAXPROCS` env vars. Set these values at your own risk.
// +optional
Advanced *AdvancedProcessorConfig `json:"advanced,omitempty"`
}

type FLPDeduperMode string

const (
FLPDeduperDisabled FLPDeduperMode = "Disabled"
FLPDeduperDrop FLPDeduperMode = "Drop"
FLPDeduperSample FLPDeduperMode = "Sample"
)

// `FLPDeduper` defines the desired configuration for FLP-based deduper
type FLPDeduper struct {
// Set the Processor deduper mode (de-duplication). It comes in addition to the Agent deduper because the Agent cannot de-duplicate same flows reported from different nodes.<br>
// - Use `Drop` to drop every flow considered as duplicates, allowing saving more on resource usage but potentially loosing some information such as the network interfaces used from peer.<br>
// - Use `Sample` to randomly keep only 1 flow on 50 (by default) among the ones considered as duplicates. This is a compromise between dropping every duplicates or keeping every duplicates. This sampling action comes in addition to the Agent-based sampling. If both Agent and Processor sampling are 50, the combined sampling is 1:2500.<br>
// - Use `Disabled` to turn off Processor-based de-duplication.<br>
// +kubebuilder:validation:Enum:="Disabled";"Drop";"Sample"
// +kubebuilder:default:=Disabled
Mode FLPDeduperMode `json:"mode,omitempty"`

// `sampling` is the sampling rate when deduper `mode` is `Sample`.
//+kubebuilder:validation:Minimum=0
//+kubebuilder:default:=50
Sampling int32 `json:"sampling,omitempty"`
}

type HPAStatus string

const (
Expand Down
20 changes: 20 additions & 0 deletions apis/flowcollector/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions bundle/manifests/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,38 @@ spec:
scenarios.'
type: object
type: object
deduper:
description: '`deduper` allows to sample or drop flows identified
as duplicates, in order to save on resource usage.'
properties:
mode:
default: Disabled
description: Set the Processor deduper mode (de-duplication).
It comes in addition to the Agent deduper because the Agent
cannot de-duplicate same flows reported from different nodes.<br>
- Use `Drop` to drop every flow considered as duplicates,
allowing saving more on resource usage but potentially loosing
some information such as the network interfaces used from
peer.<br> - Use `Sample` to randomly keep only 1 flow on
50 (by default) among the ones considered as duplicates.
This is a compromise between dropping every duplicates or
keeping every duplicates. This sampling action comes in
addition to the Agent-based sampling. If both Agent and
Processor sampling are 50, the combined sampling is 1:2500.<br>
- Use `Disabled` to turn off Processor-based de-duplication.<br>
enum:
- Disabled
- Drop
- Sample
type: string
sampling:
default: 50
description: '`sampling` is the sampling rate when deduper
`mode` is `Sample`.'
format: int32
minimum: 0
type: integer
type: object
dropUnusedFields:
default: true
description: '`dropUnusedFields` [deprecated (*)] this setting
Expand Down Expand Up @@ -4990,6 +5022,38 @@ spec:
in the flows data. This is useful in a multi-cluster context.
When using OpenShift, leave empty to make it automatically determined.'
type: string
deduper:
description: '`deduper` allows to sample or drop flows identified
as duplicates, in order to save on resource usage.'
properties:
mode:
default: Disabled
description: Set the Processor deduper mode (de-duplication).
It comes in addition to the Agent deduper because the Agent
cannot de-duplicate same flows reported from different nodes.<br>
- Use `Drop` to drop every flow considered as duplicates,
allowing saving more on resource usage but potentially loosing
some information such as the network interfaces used from
peer.<br> - Use `Sample` to randomly keep only 1 flow on
50 (by default) among the ones considered as duplicates.
This is a compromise between dropping every duplicates or
keeping every duplicates. This sampling action comes in
addition to the Agent-based sampling. If both Agent and
Processor sampling are 50, the combined sampling is 1:2500.<br>
- Use `Disabled` to turn off Processor-based de-duplication.<br>
enum:
- Disabled
- Drop
- Sample
type: string
sampling:
default: 50
description: '`sampling` is the sampling rate when deduper
`mode` is `Sample`.'
format: int32
minimum: 0
type: integer
type: object
imagePullPolicy:
default: IfNotPresent
description: '`imagePullPolicy` is the Kubernetes pull policy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,12 @@ spec:
path: loki.readTimeout
- displayName: Namespace
path: namespace
- displayName: Deduper
path: processor.deduper
- displayName: Mode
path: processor.deduper.mode
- displayName: Sampling
path: processor.deduper.sampling
- displayName: Log types
path: processor.logTypes
- displayName: Disable alerts
Expand Down
64 changes: 64 additions & 0 deletions config/crd/bases/flows.netobserv.io_flowcollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1860,6 +1860,38 @@ spec:
scenarios.'
type: object
type: object
deduper:
description: '`deduper` allows to sample or drop flows identified
as duplicates, in order to save on resource usage.'
properties:
mode:
default: Disabled
description: Set the Processor deduper mode (de-duplication).
It comes in addition to the Agent deduper because the Agent
cannot de-duplicate same flows reported from different nodes.<br>
- Use `Drop` to drop every flow considered as duplicates,
allowing saving more on resource usage but potentially loosing
some information such as the network interfaces used from
peer.<br> - Use `Sample` to randomly keep only 1 flow on
50 (by default) among the ones considered as duplicates.
This is a compromise between dropping every duplicates or
keeping every duplicates. This sampling action comes in
addition to the Agent-based sampling. If both Agent and
Processor sampling are 50, the combined sampling is 1:2500.<br>
- Use `Disabled` to turn off Processor-based de-duplication.<br>
enum:
- Disabled
- Drop
- Sample
type: string
sampling:
default: 50
description: '`sampling` is the sampling rate when deduper
`mode` is `Sample`.'
format: int32
minimum: 0
type: integer
type: object
dropUnusedFields:
default: true
description: '`dropUnusedFields` [deprecated (*)] this setting
Expand Down Expand Up @@ -4977,6 +5009,38 @@ spec:
in the flows data. This is useful in a multi-cluster context.
When using OpenShift, leave empty to make it automatically determined.'
type: string
deduper:
description: '`deduper` allows to sample or drop flows identified
as duplicates, in order to save on resource usage.'
properties:
mode:
default: Disabled
description: Set the Processor deduper mode (de-duplication).
It comes in addition to the Agent deduper because the Agent
cannot de-duplicate same flows reported from different nodes.<br>
- Use `Drop` to drop every flow considered as duplicates,
allowing saving more on resource usage but potentially loosing
some information such as the network interfaces used from
peer.<br> - Use `Sample` to randomly keep only 1 flow on
50 (by default) among the ones considered as duplicates.
This is a compromise between dropping every duplicates or
keeping every duplicates. This sampling action comes in
addition to the Agent-based sampling. If both Agent and
Processor sampling are 50, the combined sampling is 1:2500.<br>
- Use `Disabled` to turn off Processor-based de-duplication.<br>
enum:
- Disabled
- Drop
- Sample
type: string
sampling:
default: 50
description: '`sampling` is the sampling rate when deduper
`mode` is `Sample`.'
format: int32
minimum: 0
type: integer
type: object
imagePullPolicy:
default: IfNotPresent
description: '`imagePullPolicy` is the Kubernetes pull policy
Expand Down
Loading

0 comments on commit 5e7bbbd

Please sign in to comment.