@@ -479,52 +479,30 @@ func (c *Container) deleteExecFifo() {
479479// container cannot access the statedir (and the FIFO itself remains
480480// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
481481// fd, with _LIBCONTAINER_FIFOFD set to its fd number.
482- func (c * Container ) includeExecFifo (cmd * exec. Cmd ) error {
482+ func (c * Container ) includeExecFifo () error {
483483 fifoName := filepath .Join (c .stateDir , execFifoFilename )
484484 fifo , err := os .OpenFile (fifoName , unix .O_PATH | unix .O_CLOEXEC , 0 )
485485 if err != nil {
486486 return err
487487 }
488488 c .fifo = fifo
489489
490- cmd .ExtraFiles = append (cmd .ExtraFiles , fifo )
491- cmd .Env = append (cmd .Env ,
492- "_LIBCONTAINER_FIFOFD=" + strconv .Itoa (stdioFdCount + len (cmd .ExtraFiles )- 1 ))
493490 return nil
494491}
495492
496- func (c * Container ) newParentProcess (p * Process ) (parentProcess , error ) {
497- comm , err := newProcessComm ()
498- if err != nil {
499- return nil , err
500- }
501-
502- // Make sure we use a new safe copy of /proc/self/exe binary each time, this
503- // is called to make sure that if a container manages to overwrite the file,
504- // it cannot affect other containers on the system. For runc, this code will
505- // only ever be called once, but libcontainer users might call this more than
506- // once.
507- p .closeClonedExes ()
493+ // After https://go-review.googlesource.com/c/go/+/728642 will be merged, we need to recreate
494+ // cmd Object to retry it.
495+ func (c * Container ) createCmdObject (p * Process , comm * processComm , cgroupFD * os.File ) * exec.Cmd {
508496 var (
509497 exePath string
510498 safeExe * os.File
511499 )
512- if exeseal .IsSelfExeCloned () {
513- // /proc/self/exe is already a cloned binary -- no need to do anything
514- logrus .Debug ("skipping binary cloning -- /proc/self/exe is already cloned!" )
515- // We don't need to use /proc/thread-self here because the exe mm of a
516- // thread-group is guaranteed to be the same for all threads by
517- // definition. This lets us avoid having to do runtime.LockOSThread.
518- exePath = "/proc/self/exe"
519- } else {
520- var err error
521- safeExe , err = exeseal .CloneSelfExe (c .stateDir )
522- if err != nil {
523- return nil , fmt .Errorf ("unable to create safe /proc/self/exe clone for runc init: %w" , err )
524- }
500+
501+ if len (p .clonedExes ) > 0 {
502+ safeExe = p .clonedExes [0 ]
525503 exePath = "/proc/self/fd/" + strconv .Itoa (int (safeExe .Fd ()))
526- p . clonedExes = append ( p . clonedExes , safeExe )
527- logrus . Debug ( "runc exeseal: using /proc/self/exe clone" ) // used for tests
504+ } else {
505+ exePath = " /proc/self/exe"
528506 }
529507
530508 cmd := exec .Command (exePath , "init" )
@@ -602,22 +580,72 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
602580 cmd .SysProcAttr .Pdeathsig = unix .Signal (c .config .ParentDeathSignal )
603581 }
604582
583+ if c .fifo != nil {
584+ cmd .ExtraFiles = append (cmd .ExtraFiles , c .fifo )
585+ cmd .Env = append (cmd .Env ,
586+ "_LIBCONTAINER_FIFOFD=" + strconv .Itoa (stdioFdCount + len (cmd .ExtraFiles )- 1 ))
587+ }
588+
589+ if p .Init {
590+ cmd .Env = append (cmd .Env , "_LIBCONTAINER_INITTYPE=" + string (initStandard ))
591+ } else {
592+ cmd .Env = append (cmd .Env , "_LIBCONTAINER_INITTYPE=" + string (initSetns ))
593+ }
594+
595+ if cgroupFD != nil {
596+ cmd .SysProcAttr .UseCgroupFD = true
597+ cmd .SysProcAttr .CgroupFD = int (cgroupFD .Fd ())
598+ }
599+
600+ return cmd
601+ }
602+
603+ func (c * Container ) newParentProcess (p * Process ) (parentProcess , error ) {
604+ comm , err := newProcessComm ()
605+ if err != nil {
606+ return nil , err
607+ }
608+
609+ // Make sure we use a new safe copy of /proc/self/exe binary each time, this
610+ // is called to make sure that if a container manages to overwrite the file,
611+ // it cannot affect other containers on the system. For runc, this code will
612+ // only ever be called once, but libcontainer users might call this more than
613+ // once.
614+ p .closeClonedExes ()
615+ var (
616+ safeExe * os.File
617+ )
618+ if exeseal .IsSelfExeCloned () {
619+ // /proc/self/exe is already a cloned binary -- no need to do anything
620+ logrus .Debug ("skipping binary cloning -- /proc/self/exe is already cloned!" )
621+ // We don't need to use /proc/thread-self here because the exe mm of a
622+ // thread-group is guaranteed to be the same for all threads by
623+ // definition. This lets us avoid having to do runtime.LockOSThread.
624+ } else {
625+ var err error
626+ safeExe , err = exeseal .CloneSelfExe (c .stateDir )
627+ if err != nil {
628+ return nil , fmt .Errorf ("unable to create safe /proc/self/exe clone for runc init: %w" , err )
629+ }
630+ p .clonedExes = append (p .clonedExes , safeExe )
631+ logrus .Debug ("runc exeseal: using /proc/self/exe clone" ) // used for tests
632+ }
633+
605634 if p .Init {
606635 // We only set up fifoFd if we're not doing a `runc exec`. The historic
607636 // reason for this is that previously we would pass a dirfd that allowed
608637 // for container rootfs escape (and not doing it in `runc exec` avoided
609638 // that problem), but we no longer do that. However, there's no need to do
610639 // this for `runc exec` so we just keep it this way to be safe.
611- if err := c .includeExecFifo (cmd ); err != nil {
640+ if err := c .includeExecFifo (); err != nil {
612641 return nil , fmt .Errorf ("unable to setup exec fifo: %w" , err )
613642 }
614- return c .newInitProcess (p , cmd , comm )
643+ return c .newInitProcess (p , comm )
615644 }
616- return c .newSetnsProcess (p , cmd , comm )
645+ return c .newSetnsProcess (p , comm )
617646}
618647
619- func (c * Container ) newInitProcess (p * Process , cmd * exec.Cmd , comm * processComm ) (* initProcess , error ) {
620- cmd .Env = append (cmd .Env , "_LIBCONTAINER_INITTYPE=" + string (initStandard ))
648+ func (c * Container ) newInitProcess (p * Process , comm * processComm ) (* initProcess , error ) {
621649 nsMaps := make (map [configs.NamespaceType ]string )
622650 for _ , ns := range c .config .Namespaces {
623651 if ns .Path != "" {
@@ -629,6 +657,8 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
629657 return nil , err
630658 }
631659
660+ cmd := c .createCmdObject (p , comm , nil )
661+
632662 init := & initProcess {
633663 containerProcess : containerProcess {
634664 cmd : cmd ,
@@ -645,8 +675,7 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
645675 return init , nil
646676}
647677
648- func (c * Container ) newSetnsProcess (p * Process , cmd * exec.Cmd , comm * processComm ) (* setnsProcess , error ) {
649- cmd .Env = append (cmd .Env , "_LIBCONTAINER_INITTYPE=" + string (initSetns ))
678+ func (c * Container ) newSetnsProcess (p * Process , comm * processComm ) (* setnsProcess , error ) {
650679 state := c .currentState ()
651680 // for setns process, we don't have to set cloneflags as the process namespaces
652681 // will only be set via setns syscall
@@ -656,7 +685,6 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
656685 }
657686 proc := & setnsProcess {
658687 containerProcess : containerProcess {
659- cmd : cmd ,
660688 comm : comm ,
661689 manager : c .cgroupManager ,
662690 config : c .newInitConfig (p ),
0 commit comments