Skip to content
This repository was archived by the owner on Dec 7, 2023. It is now read-only.
This repository was archived by the owner on Dec 7, 2023. It is now read-only.

Ignite CNI leaves stale portmappings #690

@networkop

Description

@networkop

Description: when running ignite with default (CNI) plugin, portmappings are not being cleanup after VM removal.

Environment:
baseOS: Linux 2020 5.7.17-2-MANJARO #1 SMP PREEMPT Sat Aug 22 14:58:17 UTC 2020 x86_64 GNU/Linux
ignite: v0.7.1
CNI: v0.8.7

How to reproduce:

  1. On a Linux VM, install ignite and CNI plugin binaries, following the steps outlined in the release notes.
  2. Create a test VM with a portmapping:
sudo ignite run weaveworks/ignite-ubuntu:latest \
  --name vm \
  --cpus 2 \
  --memory 1GB \
  --size 6GB \
  --ssh \
  --ports 5228:22
  1. Examine the nat rules created by the CNI plugins:
sudo iptables -t nat -vnL | grep bd792edea2bfb3dc
    2   136 CNI-8d88489d4bfe07fea25a9d22  all  --  *      *       10.61.0.24           0.0.0.0/0            /* name: "ignite-cni-bridge" id: "ignite-bd792edea2bfb3dc" */
    0     0 ACCEPT     all  --  *      *       0.0.0.0/0            10.61.0.0/16         /* name: "ignite-cni-bridge" id: "ignite-bd792edea2bfb3dc" */
    2   136 MASQUERADE  all  --  *      *       0.0.0.0/0           !224.0.0.0/4          /* name: "ignite-cni-bridge" id: "ignite-bd792edea2bfb3dc" */
    0     0 CNI-DN-8d88489d4bfe07fea25a9  tcp  --  *      *       0.0.0.0/0            0.0.0.0/0            /* dnat name: "ignite-cni-bridge" id: "ignite-bd792edea2bfb3dc" */ multiport dports 5228
  1. Remove the test VM
sudo ignite  rm -f bd792edea2bfb3dc
  1. Re-examine the nat rules:
sudo iptables -t nat -vnL | grep bd792edea2bfb3dc

    0     0 CNI-DN-8d88489d4bfe07fea25a9  tcp  --  *      *       0.0.0.0/0            0.0.0.0/0            /* dnat name: "ignite-cni-bridge" id: "ignite-bd792edea2bfb3dc" */ multiport dports 5228

These leftover rules accumulate over time and never get deleted. As an unfortunate side-effect when using firekube, the cluster gets created successfully the first time, but on subsequent re-creates it fails to connect to SSH ports as the new rules get appended to the end of the chain.

Workaround: I've managed to track it down to the following piece of the code: https://github.com/weaveworks/ignite/blob/v0.7.1/pkg/network/cni/cni.go#L192

When calling RemoveContainerNetwork without portmapping options, they never get removed. I've patched the code in the following way to pass these options all the way down to plugin.cni.Remove and this has fixed the issue for me.

diff --git a/pkg/network/cni/cni.go b/pkg/network/cni/cni.go
index a2a3a5f3..dff1310c 100644
--- a/pkg/network/cni/cni.go
+++ b/pkg/network/cni/cni.go
@@ -189,7 +189,7 @@ func cniToIgniteResult(r *gocni.CNIResult) *network.Result {
 	return result
 }
 
-func (plugin *cniNetworkPlugin) RemoveContainerNetwork(containerID string, isRunning bool) (err error) {
+func (plugin *cniNetworkPlugin) RemoveContainerNetwork(containerID string, isRunning bool, portMappings ...meta.PortMapping) (err error) {
 	if err = plugin.initialize(); err != nil {
 		return err
 	}
@@ -217,7 +217,21 @@ func (plugin *cniNetworkPlugin) RemoveContainerNetwork(containerID string, isRun
 		}
 	}
 
-	return plugin.cni.Remove(context.Background(), containerID, netnsPath)
+	pms := make([]gocni.PortMapping, 0, len(portMappings))
+	for _, pm := range portMappings {
+		hostIP := ""
+		if pm.BindAddress != nil {
+			hostIP = pm.BindAddress.String()
+		}
+		pms = append(pms, gocni.PortMapping{
+			HostPort:      int32(pm.HostPort),
+			ContainerPort: int32(pm.VMPort),
+			Protocol:      pm.Protocol.String(),
+			HostIP:        hostIP,
+		})
+	}
+
+	return plugin.cni.Remove(context.Background(), containerID, netnsPath, gocni.WithCapabilityPortMap(pms))
 }
 
 // cleanupBridges makes the defaultNetworkName CNI network config not leak iptables rules
diff --git a/pkg/network/docker/docker.go b/pkg/network/docker/docker.go
index e3377baa..6edc8288 100644
--- a/pkg/network/docker/docker.go
+++ b/pkg/network/docker/docker.go
@@ -44,7 +44,7 @@ func (plugin *dockerNetworkPlugin) SetupContainerNetwork(containerID string, _ .
 	}, nil
 }
 
-func (*dockerNetworkPlugin) RemoveContainerNetwork(_ string, _ bool) error {
+func (*dockerNetworkPlugin) RemoveContainerNetwork(_ string, _ bool, _ ...meta.PortMapping) error {
 	// no-op for docker, this is handled automatically
 	return nil
 }
diff --git a/pkg/network/types.go b/pkg/network/types.go
index 89273292..206e1993 100644
--- a/pkg/network/types.go
+++ b/pkg/network/types.go
@@ -21,7 +21,7 @@ type Plugin interface {
 	SetupContainerNetwork(containerID string, portmappings ...meta.PortMapping) (*Result, error)
 
 	// RemoveContainerNetwork is the method called before a container using the network plugin can be deleted
-	RemoveContainerNetwork(containerID string, isRunning bool) error
+	RemoveContainerNetwork(containerID string, isRunning bool, portmappings ...meta.PortMapping) error
 }
 
 type Result struct {
diff --git a/pkg/operations/remove.go b/pkg/operations/remove.go
index ba0d94ab..b15f7262 100644
--- a/pkg/operations/remove.go
+++ b/pkg/operations/remove.go
@@ -6,6 +6,7 @@ import (
 
 	log "github.com/sirupsen/logrus"
 	api "github.com/weaveworks/ignite/pkg/apis/ignite"
+	meta "github.com/weaveworks/ignite/pkg/apis/meta/v1alpha1"
 	"github.com/weaveworks/ignite/pkg/client"
 	"github.com/weaveworks/ignite/pkg/dmlegacy/cleanup"
 	"github.com/weaveworks/ignite/pkg/logs"
@@ -39,7 +40,7 @@ func CleanupVM(vm *api.VM) error {
 		}
 	} else {
 		// Try to cleanup VM networking
-		if err := removeNetworking(util.NewPrefixer().Prefix(vm.GetUID()), false); err != nil {
+		if err := removeNetworking(util.NewPrefixer().Prefix(vm.GetUID()), false, vm.Spec.Network.Ports...); err != nil {
 			log.Warnf("Failed to cleanup networking for stopped container %s %q: %v", vm.GetKind(), vm.GetUID(), err)
 		}
 	}
@@ -84,7 +85,7 @@ func StopVM(vm *api.VM, kill, silent bool) error {
 	action := "stop"
 
 	// Remove VM networking
-	if err = removeNetworking(util.NewPrefixer().Prefix(vm.GetUID()), true); err != nil {
+	if err = removeNetworking(util.NewPrefixer().Prefix(vm.GetUID()), true, vm.Spec.Network.Ports...); err != nil {
 		return err
 	}
 
@@ -113,7 +114,7 @@ func StopVM(vm *api.VM, kill, silent bool) error {
 	return nil
 }
 
-func removeNetworking(containerID string, isRunning bool) error {
+func removeNetworking(containerID string, isRunning bool, portmappings ...meta.PortMapping) error {
 	log.Infof("Removing the container with ID %q from the %q network", containerID, providers.NetworkPlugin.Name())
-	return providers.NetworkPlugin.RemoveContainerNetwork(containerID, isRunning)
+	return providers.NetworkPlugin.RemoveContainerNetwork(containerID, isRunning, portmappings...)
 }

If the above patch looks ok, I'm happy to submit a PR.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions