Skip to content

Commit

Permalink
bpf-map-pinning: fixes a few issues
Browse files Browse the repository at this point in the history
* Update the documentation to reflect configuration
changes needed.
* Update the program to pin the xsk map to just use
xdp-loader. The previous c implementation had issues
and the version of libxdp in the container images is
too old for libxdp pinning support. Reverting to bpf
(for pinning support) would break the libxdp based
program unloading.
* Don't return an error from the bpf clean up function
when there's no program on the interface.

Signed-off-by: Maryam Tahhan <[email protected]>
  • Loading branch information
maryamtahhan committed Dec 4, 2023
1 parent d3d4664 commit 595d1b6
Show file tree
Hide file tree
Showing 15 changed files with 220 additions and 152 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,10 @@ UdsServerDisable is a Boolean configuration. If set to true, devices in this poo

BpfMapPinningEnable is a Boolean configuration. If set to true, will use BPF map pinning instead of a UDS to share an XSK map with a pod. By default, this is set to false. Should set UdsServerDisable to true when using this configuration.

> **_NOTE:_** When using this setting it's important to adjust the securityContext for the DP Daemonset to `privileged: true` to allow for bidirectional propagation of the volume where the BPF maps are pinned. this will no longer be needed when the DP is integrated with bpfman. An example deamonset configuration is shown in [deamonset-pinning.yaml](./deployments/daemonset-pinning.yaml)

> **_NOTE:_** If the kernel in the Pod image is <= 5.18, CAP_BPF capability should be added to the container.

#### UdsTimeout

UdsTimeout is an integer configuration. This value sets the amount of time, in seconds, that the UDS server will wait while there is no activity on the UDS. When this timeout limit is reached, the UDS server terminates and the UDS is deleted from the filesystem. This can be a useful setting, for example, in scenarios where large batches of pods are created together. Large batches of pods tend to take some time to spin up, so it might be beneficial to have the UDS server sit waiting a little longer for the pod to start. The maximum allowed value is 300 seconds (5 min). The minimum and default value is 30 seconds.
Expand Down
13 changes: 12 additions & 1 deletion cmd/deviceplugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func main() {
}
logging.Infof("Host meets requirements")

//START THE SYNCER SERVER TODO CHECK BPF MAP
//Start the syncer server
dpCniSyncerServer, err := dpcnisyncerserver.NewSyncerServer()
if err != nil {
logging.Errorf("Error creating the DpCniSyncerServer")
Expand Down Expand Up @@ -144,6 +144,17 @@ func main() {
}
}

if _, err := os.Stat(constants.Bpf.PinMapBaseDir); err == nil {

if err = syscall.Unmount(constants.Bpf.PinMapBaseDir, 0); err != nil {
logging.Errorf("failed to umount %s: %v", constants.Bpf.PinMapBaseDir, err.Error())
}

if err = os.RemoveAll(constants.Bpf.PinMapBaseDir); err != nil {
logging.Errorf("Cleanup error: %v", err)
}
logging.Infof("Cleaned up dir %s", constants.Bpf.PinMapBaseDir)
}
}

func configureLogging(cfg deviceplugin.PluginConfig) error {
Expand Down
3 changes: 3 additions & 0 deletions constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ var (
udsPodSock = "/afxdp.sock"

/* BPF*/
pinMapBaseDir = "/var/run/afxdp_dp/"
bpfMapPodPath = "/tmp/afxdp_dp/"
xsk_map = "/xsks_map"

Expand Down Expand Up @@ -222,6 +223,7 @@ type uds struct {
}

type bpf struct {
PinMapBaseDir string
BpfMapPodPath string
Xsk_map string
}
Expand Down Expand Up @@ -349,6 +351,7 @@ func init() {
}

Bpf = bpf{
PinMapBaseDir: pinMapBaseDir,
BpfMapPodPath: bpfMapPodPath,
Xsk_map: xsk_map,
}
Expand Down
113 changes: 113 additions & 0 deletions deployments/daemonset-pinning.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: afxdp-dp-config
namespace: kube-system
data:
config.json: |
{
"logLevel":"debug",
"logFile":"afxdp-dp.log",
"pools":[
{
"name":"myPool",
"mode":"primary",
"drivers":[
{
"name":"i40e"
},
{
"name":"ice"
}
]
}
]
}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: afxdp-device-plugin
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-afxdp-device-plugin
namespace: kube-system
labels:
tier: node
app: afxdp
spec:
selector:
matchLabels:
name: afxdp-device-plugin
template:
metadata:
labels:
name: afxdp-device-plugin
tier: node
app: afxdp
spec:
hostNetwork: true
nodeSelector:
kubernetes.io/arch: amd64
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
serviceAccountName: afxdp-device-plugin
containers:
- name: kube-afxdp
image: afxdp-device-plugin:latest
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
resources:
requests:
cpu: "250m"
memory: "40Mi"
limits:
cpu: "1"
memory: "200Mi"
volumeMounts:
- name: unixsock
mountPath: /tmp/afxdp_dp/
- name: bpfmappinning
mountPath: /var/run/afxdp_dp/
mountPropagation: Bidirectional
- name: devicesock
mountPath: /var/lib/kubelet/device-plugins/
- name: resources
mountPath: /var/lib/kubelet/pod-resources/
- name: config-volume
mountPath: /afxdp/config
- name: log
mountPath: /var/log/afxdp-k8s-plugins/
- name: cnibin
mountPath: /opt/cni/bin/
volumes:
- name: unixsock
hostPath:
path: /tmp/afxdp_dp/
- name: bpfmappinning
hostPath:
path: /var/run/afxdp_dp/
- name: devicesock
hostPath:
path: /var/lib/kubelet/device-plugins/
- name: resources
hostPath:
path: /var/lib/kubelet/pod-resources/
- name: config-volume
configMap:
name: afxdp-dp-config
items:
- key: config.json
path: config.json
- name: log
hostPath:
path: /var/log/afxdp-k8s-plugins/
- name: cnibin
hostPath:
path: /opt/cni/bin/
5 changes: 5 additions & 0 deletions deployments/daemonset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ spec:
volumeMounts:
- name: unixsock
mountPath: /tmp/afxdp_dp/
- name: bpfmappinning
mountPath: /var/run/afxdp_dp/
- name: devicesock
mountPath: /var/lib/kubelet/device-plugins/
- name: resources
Expand All @@ -92,6 +94,9 @@ spec:
- name: unixsock
hostPath:
path: /tmp/afxdp_dp/
- name: bpfmappinning
hostPath:
path: /var/run/afxdp_dp/
- name: devicesock
hostPath:
path: /var/lib/kubelet/device-plugins/
Expand Down
11 changes: 5 additions & 6 deletions examples/cndp-0-0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@ spec:
image: quay.io/mtahhan/cndp-map-pinning:latest
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
#capabilities:
#add:
# - NET_RAW
# - IPC_LOCK
# - BPF
capabilities:
add:
- NET_RAW
- IPC_LOCK
#- BPF # Only needed if Kernel version <= 5.18
resources:
requests:
afxdp/myPool: '1'
Expand Down
2 changes: 1 addition & 1 deletion examples/kind-pod-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
add:
- NET_RAW
- IPC_LOCK
- BPF
- BPF # Only needed if kernel version <= 5.18
resources:
requests:
afxdp/myPool: '1' # The resource requested needs to match the device plugin pool name / resource type
Expand Down
5 changes: 5 additions & 0 deletions examples/pod-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ spec:
image: docker-image:latest # Specify your docker image here, along with PullPolicy and command
imagePullPolicy: IfNotPresent
command: ["tail", "-f", "/dev/null"]
# capabilities: # Should be configured if using DPDK/CNDP with BPF Map pinning.
# add:
# - NET_RAW
# - IPC_LOCK
# - BPF # Only needed if kernel version <= 5.18
resources:
requests:
afxdp/myPool: '1' # The resource requested needs to match the device plugin pool name / resource type
Expand Down
3 changes: 2 additions & 1 deletion images/amd64.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ WORKDIR /usr/src/afxdp_k8s_plugins
RUN apk add --no-cache build-base~=0.5-r3 \
&& apk add --no-cache libbsd-dev~=0.11.7 \
&& apk add --no-cache libxdp-dev~=1.2.10-r0 \
&& apk add --no-cache libbpf-dev~=1.0.1-r0 \
&& apk add --no-cache llvm15~=15.0.7-r0 \
&& apk add --no-cache clang15~=15.0.7-r0 \
&& make builddp

FROM amd64/alpine:3.18@sha256:25fad2a32ad1f6f510e528448ae1ec69a28ef81916a004d3629874104f8a7f70
RUN apk --no-cache -U add iproute2-rdma~=6.3.0-r0 acl~=2.3 \
&& apk add --no-cache libxdp~=1.2.10-r0
&& apk add --no-cache xdp-tools~=1.2.10-r0
COPY --from=cnibuilder /usr/src/afxdp_k8s_plugins/bin/afxdp /afxdp/afxdp
COPY --from=dpbuilder /usr/src/afxdp_k8s_plugins/bin/afxdp-dp /afxdp/afxdp-dp
COPY --from=dpbuilder /usr/src/afxdp_k8s_plugins/images/entrypoint.sh /afxdp/entrypoint.sh
Expand Down
106 changes: 3 additions & 103 deletions internal/bpf/bpfWrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,12 @@ int Clean_bpf(char *ifname) {

mp = xdp_multiprog__get_from_ifindex(if_index);
if (!mp) {
Log_Error("%s: unable to receive correct multi_prog reference : %s", __FUNCTION__,
mp);
return -1;
Log_Info("%s: No programs loaded on : %s", __FUNCTION__, ifname);
return 0;
}

err = xdp_multiprog__detach(mp);
if (err) {
if (err && err != -EINVAL) { // -EINVAL == No program attached
Log_Error("%s: Removal of xdp program failed, returned: "
"returned: %d",
__FUNCTION__, err);
Expand All @@ -174,102 +173,3 @@ int Clean_bpf(char *ifname) {
Log_Info("%s: removed xdp program from interface %s (%d)", __FUNCTION__, ifname, if_index);
return 0;
}

int Load_attach_bpf_xdp_pass(char *ifname) {
int prog_fd = -1, err, ifindex;
char *filename = "/afxdp/xdp_pass.o";
struct bpf_object *obj;
struct xdp_program *prog;
__u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;

Log_Info("%s: disovering if_index for interface %s", __FUNCTION__, ifname);

ifindex = if_nametoindex(ifname);
if (!ifindex) {
Log_Error("%s: if_index not valid: %s", __FUNCTION__, ifname);
return -1;
}
Log_Info("%s: if_index for interface %s is %d", __FUNCTION__, ifname, ifindex);

if (access(filename, O_RDONLY) < 0) {
Log_Error("%s:error accessing file %s: %s\n", __FUNCTION__, filename,
strerror(errno));
return err;
}

Log_Info("%s: starting setup of xdp-pass program on "
"interface %s (%d)",
__FUNCTION__, ifname, ifindex);

/* Load the BPF program */
prog = xdp_program__open_file(filename, NULL, NULL);
err = libxdp_get_error(prog);
if (err) {
libxdp_strerror(err, "Couldn’t load XDP program",
sizeof("Couldn’t load XDP program"));
Log_Error("%s: Couldn’t load XDP program\n", __FUNCTION__, filename);
return err;
}

/* Attach the program to the interface at the xdp hook */
err = xdp_program__attach(prog, ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST, 0);
if (err) {
libxdp_strerror(err, "Couldn't attach the xdp pass program",
sizeof("Couldn't attach the xdp pass program"));
Log_Error("%s: Couldn't attach the XDP PASS PROGRAM TO %s\n", __FUNCTION__, ifname);
return err;
}

Log_Info("%s: xdp-pass program loaded on %s (%d)", __FUNCTION__, ifname, ifindex);

return 0;
}

int Load_bpf_pin_xsk_map(char *ifname, char *pin_path) {
struct bpf_object *obj;
struct xdp_program *prog;
struct bpf_link *link;
int ifindex, map_fd = -1;
int err;
const char *prog_name = "xdp_afxdp_redirect";
char *filename = "/afxdp/xdp_afxdp_redirect.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, bpf_opts, .pin_root_path = pin_path);

ifindex = if_nametoindex(ifname);
if (!ifindex) {
Log_Error("%s: if_index not valid: %s", __FUNCTION__, ifname);
return -1;
}
Log_Info("%s: if_index for interface %s is %d", __FUNCTION__, ifname, ifindex);

if (access(filename, O_RDONLY) < 0) {
Log_Error("%s:error accessing file %s: %s\n", __FUNCTION__, filename,
strerror(errno));
return err;
}

Log_Info("%s: starting setup of xdp-redirect program on "
"interface %s (%d)",
__FUNCTION__, ifname, ifindex);

/* Load the BPF program */
prog = xdp_program__open_file(filename, NULL, NULL);
err = libxdp_get_error(prog);
if (err) {
libxdp_strerror(err, "Couldn’t load XDP program",
sizeof("Couldn’t load XDP program"));
Log_Error("%s: Couldn’t load XDP program\n", __FUNCTION__, filename);
return err;
}

/* Attach the program to the interface at the xdp hook */
err = xdp_program__attach(prog, ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST, 0);
if (err) {
libxdp_strerror(err, "Couldn't attach the xdp pass program",
sizeof("Couldn't attach the xdp pass program"));
Log_Error("%s: Couldn't attach the XDP PASS PROGRAM TO %s\n", __FUNCTION__, ifname);
return err;
}

return 0;
}
Loading

0 comments on commit 595d1b6

Please sign in to comment.