diff --git a/go.mod b/go.mod index 07d6883fe13..6f8cfaf8074 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/Microsoft/hcsshim v0.14.0-rc.1 github.com/compose-spec/compose-go/v2 v2.10.1 //gomodjail:unconfined github.com/containerd/accelerated-container-image v1.4.1 - github.com/containerd/cgroups/v3 v3.1.2 //gomodjail:unconfined + github.com/containerd/cgroups/v3 v3.1.3 //gomodjail:unconfined github.com/containerd/console v1.0.5 //gomodjail:unconfined github.com/containerd/containerd/api v1.10.0 github.com/containerd/containerd/v2 v2.2.1 //gomodjail:unconfined diff --git a/go.sum b/go.sum index 4b9092792ea..b9772686b2a 100644 --- a/go.sum +++ b/go.sum @@ -25,8 +25,8 @@ github.com/compose-spec/compose-go/v2 v2.10.1 h1:mFbXobojGRFIVi1UknrvaDAZ+PkJfyj github.com/compose-spec/compose-go/v2 v2.10.1/go.mod h1:Ohac1SzhO/4fXXrzWIztIVB6ckmKBv1Nt5Z5mGVESUg= github.com/containerd/accelerated-container-image v1.4.1 h1:jeZYAaq5pMCeyRZ0I916OjJsEb2TGjAQmfAZyQLi3ec= github.com/containerd/accelerated-container-image v1.4.1/go.mod h1:rhqPgQ63sgkYHY56pAVl0NBN+lDJYgzgZW9m781nnWg= -github.com/containerd/cgroups/v3 v3.1.2 h1:OSosXMtkhI6Qove637tg1XgK4q+DhR0mX8Wi8EhrHa4= -github.com/containerd/cgroups/v3 v3.1.2/go.mod h1:PKZ2AcWmSBsY/tJUVhtS/rluX0b1uq1GmPO1ElCmbOw= +github.com/containerd/cgroups/v3 v3.1.3 h1:eUNflyMddm18+yrDmZPn3jI7C5hJ9ahABE5q6dyLYXQ= +github.com/containerd/cgroups/v3 v3.1.3/go.mod h1:PKZ2AcWmSBsY/tJUVhtS/rluX0b1uq1GmPO1ElCmbOw= github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc= github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= github.com/containerd/containerd/api v1.10.0 h1:5n0oHYVBwN4VhoX9fFykCV9dF1/BvAXeg2F8W6UYq1o= diff --git a/hack/build-integration-canary.sh b/hack/build-integration-canary.sh index 725628b962a..d94fdc91ea2 100755 --- a/hack/build-integration-canary.sh +++ b/hack/build-integration-canary.sh @@ -162,15 +162,16 @@ latest::release(){ while read -r line; do [ ! "$ignore" ] || ! grep -q "$ignore" <<<"$line" || continue - name="$(echo "$line" | jq -rc .name)" + # Use tag_name as the canonical version identifier (name is an optional display label and may be empty) + name="$(echo "$line" | jq -rc 'if .name != "" then .name else .tag_name end')" if [ "$name" == "" ] || [ "$name" == null ] ; then log::debug " > bogus release name ($name) ignored" continue fi log::debug " > found release: $name" - if version::compare <(echo "$line" | jq -rc .name); then + if version::compare <(echo "$name"); then higher_data="$line" - higher_readable="$(echo "$line" | jq -rc .name | sed -E 's/(.*[ ])?(v?[0-9][0-9.a-z-]+).*/\2/')" + higher_readable="$(echo "$name" | sed -E 's/(.*[ ])?(v?[0-9][0-9.a-z-]+).*/\2/')" fi done < <(github::releases "$repo") diff --git a/pkg/composer/serviceparser/serviceparser.go b/pkg/composer/serviceparser/serviceparser.go index afd665fca6f..3a6323765a2 100644 --- a/pkg/composer/serviceparser/serviceparser.go +++ b/pkg/composer/serviceparser/serviceparser.go @@ -19,6 +19,7 @@ package serviceparser import ( "bytes" "encoding/csv" + "encoding/json" "errors" "fmt" "os" @@ -33,6 +34,7 @@ import ( "github.com/containerd/log" "github.com/containerd/nerdctl/v2/pkg/identifiers" + "github.com/containerd/nerdctl/v2/pkg/labels" "github.com/containerd/nerdctl/v2/pkg/reflectutil" ) @@ -595,6 +597,8 @@ func newContainer(project *types.Project, parsed *Service, i int) (*Container, e return nil, err } netTypeContainer := false + // Collect per-network static IPs to determine if we need a per-network IP map. + networkIPMap := make(map[string]string) for _, net := range networks { if strings.HasPrefix(net.fullName, "container:") { netTypeContainer = true @@ -602,13 +606,27 @@ func newContainer(project *types.Project, parsed *Service, i int) (*Container, e c.RunArgs = append(c.RunArgs, "--net="+net.fullName) if value, ok := svc.Networks[net.shortNetworkName]; ok { if value != nil && value.Ipv4Address != "" { - c.RunArgs = append(c.RunArgs, "--ip="+value.Ipv4Address) + networkIPMap[net.fullName] = value.Ipv4Address } if value != nil && value.MacAddress != "" { c.RunArgs = append(c.RunArgs, "--mac-address="+value.MacAddress) } } } + // When multiple networks have static IPs, pass a per-network IP map as an annotation + // so that each CNI plugin receives only the IP for its own network. + // For a single IP, use the legacy --ip= flag for backward compatibility. + if len(networkIPMap) > 1 { + ipMapJSON, err := json.Marshal(networkIPMap) + if err != nil { + return nil, fmt.Errorf("failed to marshal per-network IP map: %w", err) + } + c.RunArgs = append(c.RunArgs, fmt.Sprintf("--annotation=%s=%s", labels.IPAddressPerNetwork, string(ipMapJSON))) + } else if len(networkIPMap) == 1 { + for _, ip := range networkIPMap { + c.RunArgs = append(c.RunArgs, "--ip="+ip) + } + } if netTypeContainer && svc.Hostname != "" { return nil, fmt.Errorf("conflicting options: hostname and container network mode") diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go index e0838d6ea9c..2c5212a2771 100644 --- a/pkg/labels/labels.go +++ b/pkg/labels/labels.go @@ -70,6 +70,10 @@ const ( // IP6Address is the static IP6 address of the container assigned by the user IP6Address = Prefix + "ip6" + // IPAddressPerNetwork JSON-encoded map of network names to user-assigned static + // IPv4 addresses. Used for multi-network containers. + IPAddressPerNetwork = Prefix + "ip-per-network" + // LogURI is the log URI LogURI = Prefix + "log-uri" diff --git a/pkg/ocihook/ocihook.go b/pkg/ocihook/ocihook.go index e860f1b1a68..a079a352073 100644 --- a/pkg/ocihook/ocihook.go +++ b/pkg/ocihook/ocihook.go @@ -30,6 +30,7 @@ import ( "strings" "time" + cnilibrary "github.com/containernetworking/cni/libcni" types100 "github.com/containernetworking/cni/pkg/types/100" "github.com/opencontainers/runtime-spec/specs-go" b4nndclient "github.com/rootless-containers/bypass4netns/pkg/api/daemon/client" @@ -185,6 +186,7 @@ func newHandlerOpts(state *specs.State, dataStore, cniPath, cniNetconfPath, brid cniOpts := []cni.Opt{ cni.WithPluginDir([]string{cniPath}), } + o.cniPluginDir = cniPath var netw *netutil.NetworkConfig for _, netstr := range networks { if netw, err = e.NetworkByNameOrID(netstr); err != nil { @@ -192,6 +194,7 @@ func newHandlerOpts(state *specs.State, dataStore, cniPath, cniNetconfPath, brid } cniOpts = append(cniOpts, cni.WithConfListBytes(netw.Bytes)) o.cniNames = append(o.cniNames, netstr) + o.cniNetConfigs = append(o.cniNetConfigs, netw.Bytes) } o.cni, err = cni.New(cniOpts...) if err != nil { @@ -228,6 +231,15 @@ func newHandlerOpts(state *specs.State, dataStore, cniPath, cniNetconfPath, brid o.containerIP6 = ip6Address } + // Parse per-network IP map if present (for multi-network containers with per-network static IPs) + if ipPerNetJSON, ok := o.state.Annotations[labels.IPAddressPerNetwork]; ok && ipPerNetJSON != "" { + var ipPerNetwork map[string]string + if err := json.Unmarshal([]byte(ipPerNetJSON), &ipPerNetwork); err != nil { + return nil, fmt.Errorf("failed to unmarshal per-network IP map: %w", err) + } + o.ipPerNetwork = ipPerNetwork + } + if rootlessutil.IsRootlessChild() { o.rootlessKitClient, err = rootlessutil.NewRootlessKitClient() if err != nil { @@ -258,6 +270,8 @@ type handlerOpts struct { ports []cni.PortMapping cni cni.CNI cniNames []string + cniPluginDir string + cniNetConfigs [][]byte fullID string rootlessKitClient rlkclient.Client bypassClient b4nndclient.Client @@ -265,6 +279,7 @@ type handlerOpts struct { containerIP string containerMAC string containerIP6 string + ipPerNetwork map[string]string } // hookSpec is from https://github.com/containerd/containerd/blob/v1.4.3/cmd/containerd/command/oci-hook.go#L59-L64 @@ -460,6 +475,64 @@ func portReserverPidFilePath(opts *handlerOpts) string { return filepath.Join("/run/nerdctl/", opts.state.Annotations[labels.Namespace], opts.state.ID, "port-reserver.pid") } +// perNetworkIfName returns the container-side interface name for a given network index +// (e.g., "eth0", "eth1", "eth2"). +func perNetworkIfName(index int) string { + return fmt.Sprintf("eth%d", index) +} + +// perNetworkAdd calls cnilibrary.AddNetworkList directly for a single network +// with the correct interface name (ethN) and per-network args. +func perNetworkAdd(ctx context.Context, opts *handlerOpts, networkIndex int, nsPath string, extraArgs [][2]string, portMappings []cni.PortMapping) (*types100.Result, error) { + if networkIndex < 0 || networkIndex >= len(opts.cniNetConfigs) { + return nil, fmt.Errorf("network index %d out of range (have %d networks)", networkIndex, len(opts.cniNetConfigs)) + } + confList, err := cnilibrary.ConfListFromBytes(opts.cniNetConfigs[networkIndex]) + if err != nil { + return nil, fmt.Errorf("failed to parse conflist for network %d: %w", networkIndex, err) + } + cniConfig := cnilibrary.NewCNIConfig([]string{opts.cniPluginDir}, nil) + rt := &cnilibrary.RuntimeConf{ + ContainerID: opts.fullID, + NetNS: nsPath, + IfName: perNetworkIfName(networkIndex), + Args: extraArgs, + CapabilityArgs: make(map[string]interface{}), + } + if len(portMappings) > 0 { + rt.CapabilityArgs["portMappings"] = portMappings + } + result, err := cniConfig.AddNetworkList(ctx, confList, rt) + if err != nil { + return nil, err + } + return types100.NewResultFromResult(result) +} + +// perNetworkDel calls cnilibrary.DelNetworkList directly for a single network +// with the correct interface name (ethN). +func perNetworkDel(ctx context.Context, opts *handlerOpts, networkIndex int, nsPath string, extraArgs [][2]string, portMappings []cni.PortMapping) error { + if networkIndex < 0 || networkIndex >= len(opts.cniNetConfigs) { + return fmt.Errorf("network index %d out of range (have %d networks)", networkIndex, len(opts.cniNetConfigs)) + } + confList, err := cnilibrary.ConfListFromBytes(opts.cniNetConfigs[networkIndex]) + if err != nil { + return fmt.Errorf("failed to parse conflist for network %d: %w", networkIndex, err) + } + cniConfig := cnilibrary.NewCNIConfig([]string{opts.cniPluginDir}, nil) + rt := &cnilibrary.RuntimeConf{ + ContainerID: opts.fullID, + NetNS: nsPath, + IfName: perNetworkIfName(networkIndex), + Args: extraArgs, + CapabilityArgs: make(map[string]interface{}), + } + if len(portMappings) > 0 { + rt.CapabilityArgs["portMappings"] = portMappings + } + return cniConfig.DelNetworkList(ctx, confList, rt) +} + func applyNetworkSettings(opts *handlerOpts) (err error) { portMapOpts, err := getPortMapOpts(opts) if err != nil { @@ -530,17 +603,18 @@ func applyNetworkSettings(opts *handlerOpts) (err error) { if err != nil { return err } - var namespaceOpts []cni.NamespaceOpts - namespaceOpts = append(namespaceOpts, portMapOpts...) - namespaceOpts = append(namespaceOpts, ipAddressOpts...) - namespaceOpts = append(namespaceOpts, macAddressOpts...) - namespaceOpts = append(namespaceOpts, ip6AddressOpts...) - namespaceOpts = append(namespaceOpts, + + commonOpts := []cni.NamespaceOpts{} + commonOpts = append(commonOpts, portMapOpts...) + commonOpts = append(commonOpts, macAddressOpts...) + commonOpts = append(commonOpts, ip6AddressOpts...) + commonOpts = append(commonOpts, cni.WithLabels(map[string]string{ "IgnoreUnknown": "1", }), cni.WithArgs("NERDCTL_CNI_DHCP_HOSTNAME", opts.state.Annotations[labels.Hostname]), ) + hsMeta := hostsstore.Meta{ ID: opts.state.ID, Networks: make(map[string]*types100.Result, len(opts.cniNames)), @@ -550,33 +624,88 @@ func applyNetworkSettings(opts *handlerOpts) (err error) { Name: opts.state.Annotations[labels.Name], } - // When containerd gets bounced, containers that were previously running and that are restarted will go again - // through onCreateRuntime (*unlike* in a normal stop/start flow). - // As such, a container may very well have an ip already. The bridge plugin would thus refuse to loan a new one - // and error out, thus making the onCreateRuntime hook fail. In turn, runc (or containerd) will mis-interpret this, - // and subsequently call onPostStop (although the container will not get deleted), and we will release the name... - // leading to a bricked system where multiple containers may share the same name. - // Thus, we do pre-emptively clean things up - error is not checked, as in the majority of cases, that would - // legitimately error (and that does not matter) - // See https://github.com/containerd/nerdctl/issues/3355 - _ = opts.cni.Remove(ctx, opts.fullID, "", namespaceOpts...) + // When per-network IPs are specified (multi-network with different static IPs), + // we must set up each network individually so each CNI plugin receives only its own IP. + // We use cnilibrary directly (instead of go-cni's Setup) so that each network + // gets the correct interface name (eth0, eth1, eth2, ...) rather than all getting eth0. + if len(opts.ipPerNetwork) > 0 { + // Pre-emptively clean up (see comment below for rationale) + for i := range opts.cniNames { + _ = perNetworkDel(ctx, opts, i, "", nil, nil) + } - // Defer CNI configuration removal to ensure idempotency of oci-hook. - defer func() { - if err != nil { - log.L.Warn("Container failed starting. Removing allocated network configuration.") - _ = opts.cni.Remove(ctx, opts.fullID, nsPath, namespaceOpts...) + defer func() { + if err != nil { + log.L.Warn("Container failed starting. Removing allocated network configuration.") + for i, cniName := range opts.cniNames { + if delErr := perNetworkDel(ctx, opts, i, nsPath, nil, nil); delErr != nil { + log.L.WithError(delErr).Warnf("failed to remove network %s during cleanup", cniName) + } + } + } + }() + + // Convert port mappings for cnilibrary RuntimeConf capability args + var capPortMappings []cni.PortMapping + if len(opts.ports) > 0 { + capPortMappings = opts.ports } - }() - cniRes, err := opts.cni.Setup(ctx, opts.fullID, nsPath, namespaceOpts...) - if err != nil { - return fmt.Errorf("failed to call cni.Setup: %w", err) - } + for i, cniName := range opts.cniNames { + // Build per-network CNI_ARGS + extraArgs := [][2]string{ + {"IgnoreUnknown", "1"}, + {"NERDCTL_CNI_DHCP_HOSTNAME", opts.state.Annotations[labels.Hostname]}, + } + if ip, ok := opts.ipPerNetwork[cniName]; ok && ip != "" { + extraArgs = append(extraArgs, [2]string{"IP", ip}) + } + if opts.containerMAC != "" { + extraArgs = append(extraArgs, [2]string{"MAC", opts.containerMAC}) + } + + cniRes, setupErr := perNetworkAdd(ctx, opts, i, nsPath, extraArgs, capPortMappings) + if setupErr != nil { + return fmt.Errorf("failed to call cni.Setup for network %s: %w", cniName, setupErr) + } + if cniRes != nil { + hsMeta.Networks[cniName] = cniRes + } + } + } else { + // Legacy path: single IP (or no IP) shared across all networks + var namespaceOpts []cni.NamespaceOpts + namespaceOpts = append(namespaceOpts, commonOpts...) + namespaceOpts = append(namespaceOpts, ipAddressOpts...) + + // When containerd gets bounced, containers that were previously running and that are restarted will go again + // through onCreateRuntime (*unlike* in a normal stop/start flow). + // As such, a container may very well have an ip already. The bridge plugin would thus refuse to loan a new one + // and error out, thus making the onCreateRuntime hook fail. In turn, runc (or containerd) will mis-interpret this, + // and subsequently call onPostStop (although the container will not get deleted), and we will release the name... + // leading to a bricked system where multiple containers may share the same name. + // Thus, we do pre-emptively clean things up - error is not checked, as in the majority of cases, that would + // legitimately error (and that does not matter) + // See https://github.com/containerd/nerdctl/issues/3355 + _ = opts.cni.Remove(ctx, opts.fullID, "", namespaceOpts...) + + // Defer CNI configuration removal to ensure idempotency of oci-hook. + defer func() { + if err != nil { + log.L.Warn("Container failed starting. Removing allocated network configuration.") + _ = opts.cni.Remove(ctx, opts.fullID, nsPath, namespaceOpts...) + } + }() - cniResRaw := cniRes.Raw() - for i, cniName := range opts.cniNames { - hsMeta.Networks[cniName] = cniResRaw[i] + cniRes, err := opts.cni.Setup(ctx, opts.fullID, nsPath, namespaceOpts...) + if err != nil { + return fmt.Errorf("failed to call cni.Setup: %w", err) + } + + cniResRaw := cniRes.Raw() + for i, cniName := range opts.cniNames { + hsMeta.Networks[cniName] = cniResRaw[i] + } } b4nnEnabled, b4nnBindEnabled, err := bypass4netnsutil.IsBypass4netnsEnabled(opts.state.Annotations) @@ -708,14 +837,35 @@ func onPostStop(opts *handlerOpts) error { if err != nil { return err } - var namespaceOpts []cni.NamespaceOpts - namespaceOpts = append(namespaceOpts, portMapOpts...) - namespaceOpts = append(namespaceOpts, ipAddressOpts...) - namespaceOpts = append(namespaceOpts, macAddressOpts...) - namespaceOpts = append(namespaceOpts, ip6AddressOpts...) - if err := opts.cni.Remove(ctx, opts.fullID, "", namespaceOpts...); err != nil { - log.L.WithError(err).Errorf("failed to call cni.Remove") - return err + + if len(opts.ipPerNetwork) > 0 { + // Per-network cleanup: remove each network individually with its own IP + // and the correct interface name (ethN). + var capPortMappings []cni.PortMapping + if len(opts.ports) > 0 { + capPortMappings = opts.ports + } + for i, cniName := range opts.cniNames { + extraArgs := [][2]string{ + {"IgnoreUnknown", "1"}, + } + if ip, ok := opts.ipPerNetwork[cniName]; ok && ip != "" { + extraArgs = append(extraArgs, [2]string{"IP", ip}) + } + if delErr := perNetworkDel(ctx, opts, i, "", extraArgs, capPortMappings); delErr != nil { + log.L.WithError(delErr).Errorf("failed to call cni.Remove for network %s", cniName) + } + } + } else { + var namespaceOpts []cni.NamespaceOpts + namespaceOpts = append(namespaceOpts, portMapOpts...) + namespaceOpts = append(namespaceOpts, ipAddressOpts...) + namespaceOpts = append(namespaceOpts, macAddressOpts...) + namespaceOpts = append(namespaceOpts, ip6AddressOpts...) + if err := opts.cni.Remove(ctx, opts.fullID, "", namespaceOpts...); err != nil { + log.L.WithError(err).Errorf("failed to call cni.Remove") + return err + } } // opts.cni.Remove has trouble removing network configurations when netns is empty.