mirror of
https://github.com/tailscale/tailscale.git
synced 2025-12-22 13:37:10 +00:00
ipn/ipnlocal: fix LocalBackend deadlock when packet arrives during profile switch (#18126)
If a packet arrives while WireGuard is being reconfigured with b.mu held, such as during a profile switch, calling back into (*LocalBackend).GetPeerAPIPort from (*Wrapper).filterPacketInboundFromWireGuard may deadlock when it tries to acquire b.mu. This occurs because a peer cannot be removed while an inbound packet is being processed. The reconfig and profile switch wait for (*Peer).RoutineSequentialReceiver to return, but it never finishes because GetPeerAPIPort needs b.mu, which the waiting goroutine already holds. In this PR, we make peerAPIPorts a new syncs.AtomicValue field that is written with b.mu held but can be read by GetPeerAPIPort without holding the mutex, which fixes the deadlock. There might be other long-term ways to address the issue, such as moving peer API listeners from LocalBackend to nodeBackend so they can be accessed without holding b.mu, but these changes are too large and risky at this stage in the v1.92 release cycle. Updates #18124 Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
@@ -245,6 +245,8 @@ type LocalBackend struct {
|
||||
// to prevent state changes while invoking callbacks.
|
||||
extHost *ExtensionHost
|
||||
|
||||
peerAPIPorts syncs.AtomicValue[map[netip.Addr]int] // can be read without b.mu held; TODO(nickkhyl): remove or move to nodeBackend?
|
||||
|
||||
// The mutex protects the following elements.
|
||||
mu syncs.Mutex
|
||||
|
||||
@@ -295,8 +297,8 @@ type LocalBackend struct {
|
||||
authActor ipnauth.Actor // an actor who called [LocalBackend.StartLoginInteractive] last, or nil; TODO(nickkhyl): move to nodeBackend
|
||||
egg bool
|
||||
prevIfState *netmon.State
|
||||
peerAPIServer *peerAPIServer // or nil
|
||||
peerAPIListeners []*peerAPIListener
|
||||
peerAPIServer *peerAPIServer // or nil
|
||||
peerAPIListeners []*peerAPIListener // TODO(nickkhyl): move to nodeBackend
|
||||
loginFlags controlclient.LoginFlags
|
||||
notifyWatchers map[string]*watchSession // by session ID
|
||||
lastStatusTime time.Time // status.AsOf value of the last processed status update
|
||||
@@ -4710,14 +4712,8 @@ func (b *LocalBackend) GetPeerAPIPort(ip netip.Addr) (port uint16, ok bool) {
|
||||
if !buildfeatures.HasPeerAPIServer {
|
||||
return 0, false
|
||||
}
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
for _, pln := range b.peerAPIListeners {
|
||||
if pln.ip == ip {
|
||||
return uint16(pln.port), true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
portInt, ok := b.peerAPIPorts.Load()[ip]
|
||||
return uint16(portInt), ok
|
||||
}
|
||||
|
||||
// handlePeerAPIConn serves an already-accepted connection c.
|
||||
@@ -5209,6 +5205,7 @@ func (b *LocalBackend) closePeerAPIListenersLocked() {
|
||||
pln.Close()
|
||||
}
|
||||
b.peerAPIListeners = nil
|
||||
b.peerAPIPorts.Store(nil)
|
||||
}
|
||||
|
||||
// peerAPIListenAsync is whether the operating system requires that we
|
||||
@@ -5281,6 +5278,7 @@ func (b *LocalBackend) initPeerAPIListenerLocked() {
|
||||
b.peerAPIServer = ps
|
||||
|
||||
isNetstack := b.sys.IsNetstack()
|
||||
peerAPIPorts := make(map[netip.Addr]int)
|
||||
for i, a := range addrs.All() {
|
||||
var ln net.Listener
|
||||
var err error
|
||||
@@ -5313,7 +5311,9 @@ func (b *LocalBackend) initPeerAPIListenerLocked() {
|
||||
b.logf("peerapi: serving on %s", pln.urlStr)
|
||||
go pln.serve()
|
||||
b.peerAPIListeners = append(b.peerAPIListeners, pln)
|
||||
peerAPIPorts[a.Addr()] = pln.port
|
||||
}
|
||||
b.peerAPIPorts.Store(peerAPIPorts)
|
||||
|
||||
b.goTracker.Go(b.doSetHostinfoFilterServices)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user