Skip to content

Commit 4cdc0d6

Browse files
committed
libct: setns recreate cmd object after the first call
Signed-off-by: Efim Verzakov <efimverzakov@gmail.com>
1 parent e0adafb commit 4cdc0d6

File tree

2 files changed

+70
-48
lines changed

2 files changed

+70
-48
lines changed

libcontainer/container_linux.go

Lines changed: 67 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -479,52 +479,30 @@ func (c *Container) deleteExecFifo() {
479479
// container cannot access the statedir (and the FIFO itself remains
480480
// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
481481
// fd, with _LIBCONTAINER_FIFOFD set to its fd number.
482-
func (c *Container) includeExecFifo(cmd *exec.Cmd) error {
482+
func (c *Container) includeExecFifo() error {
483483
fifoName := filepath.Join(c.stateDir, execFifoFilename)
484484
fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
485485
if err != nil {
486486
return err
487487
}
488488
c.fifo = fifo
489489

490-
cmd.ExtraFiles = append(cmd.ExtraFiles, fifo)
491-
cmd.Env = append(cmd.Env,
492-
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
493490
return nil
494491
}
495492

496-
func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
497-
comm, err := newProcessComm()
498-
if err != nil {
499-
return nil, err
500-
}
501-
502-
// Make sure we use a new safe copy of /proc/self/exe binary each time, this
503-
// is called to make sure that if a container manages to overwrite the file,
504-
// it cannot affect other containers on the system. For runc, this code will
505-
// only ever be called once, but libcontainer users might call this more than
506-
// once.
507-
p.closeClonedExes()
493+
// After https://go-review.googlesource.com/c/go/+/728642 will be merged, we need to recreate
494+
// cmd Object to retry it.
495+
func (c *Container) createCmdObject(p *Process, comm *processComm, cgroupFD *os.File) *exec.Cmd {
508496
var (
509497
exePath string
510498
safeExe *os.File
511499
)
512-
if exeseal.IsSelfExeCloned() {
513-
// /proc/self/exe is already a cloned binary -- no need to do anything
514-
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
515-
// We don't need to use /proc/thread-self here because the exe mm of a
516-
// thread-group is guaranteed to be the same for all threads by
517-
// definition. This lets us avoid having to do runtime.LockOSThread.
518-
exePath = "/proc/self/exe"
519-
} else {
520-
var err error
521-
safeExe, err = exeseal.CloneSelfExe(c.stateDir)
522-
if err != nil {
523-
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
524-
}
500+
501+
if len(p.clonedExes) > 0 {
502+
safeExe = p.clonedExes[0]
525503
exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
526-
p.clonedExes = append(p.clonedExes, safeExe)
527-
logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests
504+
} else {
505+
exePath = "/proc/self/exe"
528506
}
529507

530508
cmd := exec.Command(exePath, "init")
@@ -602,22 +580,72 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
602580
cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
603581
}
604582

583+
if c.fifo != nil {
584+
cmd.ExtraFiles = append(cmd.ExtraFiles, c.fifo)
585+
cmd.Env = append(cmd.Env,
586+
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
587+
}
588+
589+
if p.Init {
590+
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
591+
} else {
592+
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
593+
}
594+
595+
if cgroupFD != nil {
596+
cmd.SysProcAttr.UseCgroupFD = true
597+
cmd.SysProcAttr.CgroupFD = int(cgroupFD.Fd())
598+
}
599+
600+
return cmd
601+
}
602+
603+
func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
604+
comm, err := newProcessComm()
605+
if err != nil {
606+
return nil, err
607+
}
608+
609+
// Make sure we use a new safe copy of /proc/self/exe binary each time, this
610+
// is called to make sure that if a container manages to overwrite the file,
611+
// it cannot affect other containers on the system. For runc, this code will
612+
// only ever be called once, but libcontainer users might call this more than
613+
// once.
614+
p.closeClonedExes()
615+
var (
616+
safeExe *os.File
617+
)
618+
if exeseal.IsSelfExeCloned() {
619+
// /proc/self/exe is already a cloned binary -- no need to do anything
620+
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
621+
// We don't need to use /proc/thread-self here because the exe mm of a
622+
// thread-group is guaranteed to be the same for all threads by
623+
// definition. This lets us avoid having to do runtime.LockOSThread.
624+
} else {
625+
var err error
626+
safeExe, err = exeseal.CloneSelfExe(c.stateDir)
627+
if err != nil {
628+
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
629+
}
630+
p.clonedExes = append(p.clonedExes, safeExe)
631+
logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests
632+
}
633+
605634
if p.Init {
606635
// We only set up fifoFd if we're not doing a `runc exec`. The historic
607636
// reason for this is that previously we would pass a dirfd that allowed
608637
// for container rootfs escape (and not doing it in `runc exec` avoided
609638
// that problem), but we no longer do that. However, there's no need to do
610639
// this for `runc exec` so we just keep it this way to be safe.
611-
if err := c.includeExecFifo(cmd); err != nil {
640+
if err := c.includeExecFifo(); err != nil {
612641
return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
613642
}
614-
return c.newInitProcess(p, cmd, comm)
643+
return c.newInitProcess(p, comm)
615644
}
616-
return c.newSetnsProcess(p, cmd, comm)
645+
return c.newSetnsProcess(p, comm)
617646
}
618647

619-
func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*initProcess, error) {
620-
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
648+
func (c *Container) newInitProcess(p *Process, comm *processComm) (*initProcess, error) {
621649
nsMaps := make(map[configs.NamespaceType]string)
622650
for _, ns := range c.config.Namespaces {
623651
if ns.Path != "" {
@@ -629,6 +657,8 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
629657
return nil, err
630658
}
631659

660+
cmd := c.createCmdObject(p, comm, nil)
661+
632662
init := &initProcess{
633663
containerProcess: containerProcess{
634664
cmd: cmd,
@@ -645,8 +675,7 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
645675
return init, nil
646676
}
647677

648-
func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*setnsProcess, error) {
649-
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
678+
func (c *Container) newSetnsProcess(p *Process, comm *processComm) (*setnsProcess, error) {
650679
state := c.currentState()
651680
// for setns process, we don't have to set cloneflags as the process namespaces
652681
// will only be set via setns syscall
@@ -656,7 +685,6 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
656685
}
657686
proc := &setnsProcess{
658687
containerProcess: containerProcess{
659-
cmd: cmd,
660688
comm: comm,
661689
manager: c.cgroupManager,
662690
config: c.newInitConfig(p),

libcontainer/process_linux.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"runtime"
1616
"strconv"
1717
"strings"
18-
"syscall"
1918
"time"
2019

2120
"github.com/opencontainers/runtime-spec/specs-go"
@@ -357,12 +356,6 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
357356
}
358357

359358
logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
360-
if p.cmd.SysProcAttr == nil {
361-
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
362-
}
363-
p.cmd.SysProcAttr.UseCgroupFD = true
364-
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())
365-
366359
return fd, nil
367360
}
368361

@@ -380,11 +373,12 @@ func (p *setnsProcess) startWithCgroupFD() error {
380373
defer fd.Close()
381374
}
382375

376+
p.cmd = p.containerProcess.container.createCmdObject(p.process, p.comm, fd)
383377
err = p.startWithCPUAffinity()
384378
if err != nil && p.cmd.SysProcAttr.UseCgroupFD {
379+
// We need to recreate the exec.Cmd object
385380
logrus.Debugf("exec with CLONE_INTO_CGROUP failed: %v; retrying without", err)
386-
// SysProcAttr.CgroupFD is never used when UseCgroupFD is unset.
387-
p.cmd.SysProcAttr.UseCgroupFD = false
381+
p.cmd = p.containerProcess.container.createCmdObject(p.process, p.comm, nil)
388382
err = p.startWithCPUAffinity()
389383
}
390384

0 commit comments

Comments
 (0)