Skip to content

Commit 0834450

Browse files
committed
DHCP lease maintenance should terminate when interface no longer exists.
Due to oberservations that threads can grow and the dhcp daemon uses an increasing amount of memory. This situation can happen organically when using say, bridge CNI, and the bridge has been removed outside of the bridge CNI lifecycle, and an interface no longer exists on a pod. Does so on a retry loop using the `backoffRetry()` method. Signed-off-by: dougbtv <dosmith@redhat.com>
1 parent e4ca66b commit 0834450

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

plugins/ipam/dhcp/lease.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ const (
5555
leaseStateRebinding
5656
)
5757

58+
// Timing for retrying link existence check
59+
const (
60+
linkCheckRetryMax = 10 * time.Second
61+
linkCheckTotalTimeout = 30 * time.Second
62+
)
63+
5864
// This implementation uses 1 OS thread per lease. This is because
5965
// all the network operations have to be done in network namespace
6066
// of the interface. This can be improved by switching to the proper
@@ -292,6 +298,14 @@ func (l *DHCPLease) maintain() {
292298
for {
293299
var sleepDur time.Duration
294300

301+
linkCheckCtx, cancel := context.WithTimeoutCause(l.ctx, l.resendTimeout, errNoMoreTries)
302+
defer cancel()
303+
err := l.checkLinkExistsWithBackoff(linkCheckCtx)
304+
if err != nil {
305+
log.Printf("%v: interface %s no longer exists or check failed, terminating lease maintenance (last encountered: %v)", l.clientID, l.link.Attrs().Name, err)
306+
return
307+
}
308+
295309
switch state {
296310
case leaseStateBound:
297311
sleepDur = time.Until(l.renewalTime)
@@ -344,6 +358,23 @@ func (l *DHCPLease) maintain() {
344358
}
345359
}
346360

361+
// checkLinkExistsWithBackoff uses backoffRetry to check if a network link exists
362+
func (l *DHCPLease) checkLinkExistsWithBackoff(ctx context.Context) error {
363+
checkFunc := func() (*nclient4.Lease, error) {
364+
// Returns the error to trigger a retry.
365+
if _, err := netlink.LinkByName(l.link.Attrs().Name); err != nil {
366+
return nil, err
367+
}
368+
return nil, nil
369+
}
370+
371+
ctx, cancel := context.WithTimeout(ctx, linkCheckTotalTimeout)
372+
defer cancel()
373+
_, err := backoffRetry(ctx, linkCheckRetryMax, checkFunc)
374+
return err
375+
}
376+
377+
347378
func (l *DHCPLease) downIface() {
348379
if err := netlink.LinkSetDown(l.link); err != nil {
349380
log.Printf("%v: failed to bring %v interface DOWN: %v", l.clientID, l.link.Attrs().Name, err)

0 commit comments

Comments
 (0)