Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 65 additions & 39 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,52 +479,30 @@ func (c *Container) deleteExecFifo() {
// container cannot access the statedir (and the FIFO itself remains
// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
// fd, with _LIBCONTAINER_FIFOFD set to its fd number.
func (c *Container) includeExecFifo(cmd *exec.Cmd) error {
func (c *Container) includeExecFifo() error {
fifoName := filepath.Join(c.stateDir, execFifoFilename)
fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return err
}
c.fifo = fifo

cmd.ExtraFiles = append(cmd.ExtraFiles, fifo)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
return nil
}

func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
comm, err := newProcessComm()
if err != nil {
return nil, err
}

// Make sure we use a new safe copy of /proc/self/exe binary each time, this
// is called to make sure that if a container manages to overwrite the file,
// it cannot affect other containers on the system. For runc, this code will
// only ever be called once, but libcontainer users might call this more than
// once.
p.closeClonedExes()
// After https://go-review.googlesource.com/c/go/+/728642 will be merged, we need to recreate
// cmd Object to retry it.
func (c *Container) createCmdObject(p *Process, comm *processComm, cgroupFD *os.File) *exec.Cmd {
var (
exePath string
safeExe *os.File
)
if exeseal.IsSelfExeCloned() {
// /proc/self/exe is already a cloned binary -- no need to do anything
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
// We don't need to use /proc/thread-self here because the exe mm of a
// thread-group is guaranteed to be the same for all threads by
// definition. This lets us avoid having to do runtime.LockOSThread.
exePath = "/proc/self/exe"
} else {
var err error
safeExe, err = exeseal.CloneSelfExe(c.stateDir)
if err != nil {
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
}

if len(p.clonedExes) > 0 {
safeExe = p.clonedExes[0]
exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
p.clonedExes = append(p.clonedExes, safeExe)
logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests
} else {
exePath = "/proc/self/exe"
}

cmd := exec.Command(exePath, "init")
Expand Down Expand Up @@ -602,22 +580,70 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
}

if c.fifo != nil {
cmd.ExtraFiles = append(cmd.ExtraFiles, c.fifo)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
}

if p.Init {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
} else {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
}

if cgroupFD != nil {
cmd.SysProcAttr.UseCgroupFD = true
cmd.SysProcAttr.CgroupFD = int(cgroupFD.Fd())
}

return cmd
}

func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
comm, err := newProcessComm()
if err != nil {
return nil, err
}

// Make sure we use a new safe copy of /proc/self/exe binary each time, this
// is called to make sure that if a container manages to overwrite the file,
// it cannot affect other containers on the system. For runc, this code will
// only ever be called once, but libcontainer users might call this more than
// once.
p.closeClonedExes()
var safeExe *os.File
if exeseal.IsSelfExeCloned() {
// /proc/self/exe is already a cloned binary -- no need to do anything
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
// We don't need to use /proc/thread-self here because the exe mm of a
// thread-group is guaranteed to be the same for all threads by
// definition. This lets us avoid having to do runtime.LockOSThread.
} else {
var err error
safeExe, err = exeseal.CloneSelfExe(c.stateDir)
if err != nil {
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
}
p.clonedExes = append(p.clonedExes, safeExe)
logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests
}

if p.Init {
// We only set up fifoFd if we're not doing a `runc exec`. The historic
// reason for this is that previously we would pass a dirfd that allowed
// for container rootfs escape (and not doing it in `runc exec` avoided
// that problem), but we no longer do that. However, there's no need to do
// this for `runc exec` so we just keep it this way to be safe.
if err := c.includeExecFifo(cmd); err != nil {
if err := c.includeExecFifo(); err != nil {
return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
}
return c.newInitProcess(p, cmd, comm)
return c.newInitProcess(p, comm)
}
return c.newSetnsProcess(p, cmd, comm)
return c.newSetnsProcess(p, comm)
}

func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
func (c *Container) newInitProcess(p *Process, comm *processComm) (*initProcess, error) {
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
if ns.Path != "" {
Expand All @@ -629,6 +655,8 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
return nil, err
}

cmd := c.createCmdObject(p, comm, nil)

init := &initProcess{
containerProcess: containerProcess{
cmd: cmd,
Expand All @@ -645,8 +673,7 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
return init, nil
}

func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
func (c *Container) newSetnsProcess(p *Process, comm *processComm) (*setnsProcess, error) {
state := c.currentState()
// for setns process, we don't have to set cloneflags as the process namespaces
// will only be set via setns syscall
Expand All @@ -656,7 +683,6 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
}
proc := &setnsProcess{
containerProcess: containerProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
Expand Down
121 changes: 121 additions & 0 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@ import (
"strings"
"syscall"
"testing"
"time"

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/cgroups"
"github.com/opencontainers/cgroups/systemd"
"github.com/opencontainers/runc/internal/linux"
"github.com/opencontainers/runc/internal/pathrs"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/internal/userns"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"

"golang.org/x/sys/unix"
Expand Down Expand Up @@ -230,6 +233,124 @@ func TestEnter(t *testing.T) {
}
}

const stateFilename = "state.json"

// We need to dump changed state into state file.
func saveState(stateDir string, s *libcontainer.State) (retErr error) {
tmpFile, err := os.CreateTemp(stateDir, "state-")
if err != nil {
return err
}

defer func() {
if retErr != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
}
}()

err = utils.WriteJSON(tmpFile, s)
if err != nil {
return err
}
err = tmpFile.Close()
if err != nil {
return err
}

stateFilePath := filepath.Join(stateDir, stateFilename)
return os.Rename(tmpFile.Name(), stateFilePath)
}

func TestCmdRetry(t *testing.T) {
if testing.Short() {
return
}

if !cgroups.IsCgroup2UnifiedMode() {
t.Skip("CLONE_INTO_CGROUP works only with cgroup v2")
}

// Create dir to "pseudo" cgroup path
pseudoPath := t.TempDir()

config := newTemplateConfig(t, nil)

name := strings.ReplaceAll(t.Name(), "/", "_") + strconv.FormatInt(-int64(time.Now().Nanosecond()), 35)
root := t.TempDir()

// Container State Dir
stateDir, err := securejoin.SecureJoin(root, name)
ok(t, err)

// Create Container
container, err := libcontainer.Create(root, name, config)

ok(t, err)
defer destroyContainer(container)

// Execute a first process in the container
stdinR, stdinW, err := os.Pipe()
ok(t, err)

var stdout, stdout2 bytes.Buffer

pconfig := libcontainer.Process{
Cwd: "/",
Args: []string{"sh", "-c", "cat"},
Env: standardEnvironment,
Stdin: stdinR,
Stdout: &stdout,
Init: true,
}

err = container.Run(&pconfig)
_ = stdinR.Close()
defer stdinW.Close()
ok(t, err)

state, err := container.State()
ok(t, err)

// Dump our "pseudo" cgroup path as dirPath
state.CgroupPaths[""] = pseudoPath
err = saveState(stateDir, state)
ok(t, err)

// Load container from dumped state
container2, err := libcontainer.Load(root, name)
ok(t, err)

// Execute another process in the container
stdinR2, stdinW2, err := os.Pipe()
ok(t, err)
pconfig2 := libcontainer.Process{
Cwd: "/",
Env: standardEnvironment,
}
pconfig2.Args = []string{"sh", "-c", "cat"}
pconfig2.Stdin = stdinR2
pconfig2.Stdout = &stdout2

// Turn on TestMode in cgroups library to not check path mode and create all necessary files.
cgroups.TestMode = true
defer func() {
cgroups.TestMode = false
}()

err = container2.Run(&pconfig2)
_ = stdinR2.Close()
defer stdinW2.Close()
ok(t, err)

// Wait processes
_ = stdinW2.Close()
waitProcess(&pconfig2, t)

_ = stdinW.Close()
waitProcess(&pconfig, t)
}

func TestProcessEnv(t *testing.T) {
if testing.Short() {
return
Expand Down
12 changes: 3 additions & 9 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"runtime"
"strconv"
"strings"
"syscall"
"time"

"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -357,12 +356,6 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
}

logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
if p.cmd.SysProcAttr == nil {
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
p.cmd.SysProcAttr.UseCgroupFD = true
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())

return fd, nil
}

Expand All @@ -380,11 +373,12 @@ func (p *setnsProcess) startWithCgroupFD() error {
defer fd.Close()
}

p.cmd = p.containerProcess.container.createCmdObject(p.process, p.comm, fd)
err = p.startWithCPUAffinity()
if err != nil && p.cmd.SysProcAttr.UseCgroupFD {
// We need to recreate the exec.Cmd object
logrus.Debugf("exec with CLONE_INTO_CGROUP failed: %v; retrying without", err)
// SysProcAttr.CgroupFD is never used when UseCgroupFD is unset.
p.cmd.SysProcAttr.UseCgroupFD = false
p.cmd = p.containerProcess.container.createCmdObject(p.process, p.comm, nil)
err = p.startWithCPUAffinity()
}

Expand Down