refactor(meshchat): improve RNS shutdown process and enhance RPC listener closure handling
Some checks failed
CI / test-backend (push) Successful in 16s
CI / lint (push) Failing after 44s
CI / build-frontend (push) Successful in 9m36s
Tests / test (push) Successful in 9m46s

This commit is contained in:
2026-01-02 20:54:22 -06:00
parent 43d1fcc91a
commit eeaabacf35

View File

@@ -7,6 +7,7 @@ import base64
import configparser import configparser
import copy import copy
import gc import gc
import hashlib
import io import io
import ipaddress import ipaddress
import json import json
@@ -25,7 +26,6 @@ import webbrowser
import zipfile import zipfile
from collections.abc import Callable from collections.abc import Callable
from datetime import UTC, datetime, timedelta from datetime import UTC, datetime, timedelta
import hashlib
import bcrypt import bcrypt
import LXMF import LXMF
@@ -823,7 +823,14 @@ class ReticulumMeshChat:
# Give loops a moment to finish # Give loops a moment to finish
await asyncio.sleep(2) await asyncio.sleep(2)
# Aggressively close RNS interfaces to release sockets # Close RNS instance first to let it detach interfaces naturally
try:
# Use class method to ensure all instances are cleaned up if any
RNS.Reticulum.exit_handler()
except Exception as e:
print(f"Warning during RNS exit: {e}")
# Aggressively close RNS interfaces to release sockets if they didn't close
try: try:
interfaces = [] interfaces = []
if hasattr(RNS.Transport, "interfaces"): if hasattr(RNS.Transport, "interfaces"):
@@ -883,24 +890,25 @@ class ReticulumMeshChat:
# Close RPC listener if it exists on the instance # Close RPC listener if it exists on the instance
if old_reticulum: if old_reticulum:
if ( # Reticulum uses private attributes for the listener
hasattr(old_reticulum, "rpc_listener") rpc_listener_names = [
and old_reticulum.rpc_listener "rpc_listener",
): "_Reticulum__rpc_listener",
"_rpc_listener",
]
for attr_name in rpc_listener_names:
if hasattr(old_reticulum, attr_name):
listener = getattr(old_reticulum, attr_name)
if listener:
try: try:
# Listener.close() should close the underlying socket/pipe print(
old_reticulum.rpc_listener.close() f"Forcing closure of RPC listener in {attr_name}...",
# Clear it to be sure )
old_reticulum.rpc_listener = None if hasattr(listener, "close"):
listener.close()
setattr(old_reticulum, attr_name, None)
except Exception as e: except Exception as e:
print(f"Warning closing RPC listener: {e}") print(f"Warning closing RPC listener {attr_name}: {e}")
# Persist and close RNS instance
try:
# Use class method to ensure all instances are cleaned up if any
RNS.Reticulum.exit_handler()
except Exception as e:
print(f"Warning during RNS exit: {e}")
# Clear RNS singleton and internal state to allow re-initialization # Clear RNS singleton and internal state to allow re-initialization
try: try:
@@ -943,6 +951,9 @@ class ReticulumMeshChat:
# Wait another moment for sockets to definitely be released by OS # Wait another moment for sockets to definitely be released by OS
# Also give some time for the RPC listener port to settle # Also give some time for the RPC listener port to settle
print("Waiting for ports to settle...") print("Waiting for ports to settle...")
# We add a settle time here similar to Sideband's logic
await asyncio.sleep(4)
# Detect RPC type from reticulum instance if possible, otherwise default to both # Detect RPC type from reticulum instance if possible, otherwise default to both
rpc_addrs = [] rpc_addrs = []
if old_reticulum: if old_reticulum:
@@ -1001,7 +1012,6 @@ class ReticulumMeshChat:
rpc_addrs.append(("\0rns/default/rpc", "AF_UNIX")) rpc_addrs.append(("\0rns/default/rpc", "AF_UNIX"))
for i in range(15): for i in range(15):
await asyncio.sleep(1)
all_free = True all_free = True
for addr, family_str in rpc_addrs: for addr, family_str in rpc_addrs:
try: try:
@@ -1019,14 +1029,22 @@ class ReticulumMeshChat:
s.bind(addr) s.bind(addr)
s.close() s.close()
except OSError: except OSError:
addr_display = addr
if (
family == socket.AF_UNIX
and isinstance(addr, str)
and addr.startswith("\0")
):
addr_display = addr[1:] + " (abstract)"
print( print(
f"RPC addr {addr} still in use... (attempt {i + 1}/15)", f"RPC addr {addr_display} still in use... (attempt {i + 1}/15)",
) )
s.close() s.close()
all_free = False all_free = False
# If we are stuck, try to force close the connection manually # If we are stuck, try to force close the connection manually
if i > 5: if i > 1:
try: try:
current_process = psutil.Process() current_process = psutil.Process()
# We use kind='all' to catch both TCP and UNIX sockets # We use kind='all' to catch both TCP and UNIX sockets
@@ -1044,11 +1062,47 @@ class ReticulumMeshChat:
or addr[0] == "0.0.0.0" or addr[0] == "0.0.0.0"
): ):
match = True match = True
elif ( elif family_str == "AF_UNIX":
family_str == "AF_UNIX" # Match path for UNIX sockets, including abstract
and conn.laddr == addr # Psutil sometimes returns abstract addresses as strings or bytes,
# with or without the leading null byte.
laddr = conn.laddr
# Normalize both to bytes for comparison
target_addr = (
addr
if isinstance(addr, bytes)
else addr.encode()
if isinstance(addr, str)
else b""
)
current_laddr = (
laddr
if isinstance(laddr, bytes)
else laddr.encode()
if isinstance(laddr, str)
else b""
)
if current_laddr == target_addr or (
target_addr.startswith(b"\0")
and current_laddr
== target_addr[1:]
) or (
current_laddr.startswith(b"\0")
and target_addr
== current_laddr[1:]
):
match = True
elif (
target_addr in current_laddr
or current_laddr in target_addr
):
# Last resort: partial match
if (
len(target_addr) > 5
and len(current_laddr) > 5
): ):
# Match path for UNIX sockets
match = True match = True
if match: if match:
@@ -1088,6 +1142,8 @@ class ReticulumMeshChat:
print("All RNS ports/sockets are free.") print("All RNS ports/sockets are free.")
break break
await asyncio.sleep(1)
if not all_free: if not all_free:
# One last attempt with a very short sleep before failing # One last attempt with a very short sleep before failing
await asyncio.sleep(2) await asyncio.sleep(2)
@@ -1115,7 +1171,6 @@ class ReticulumMeshChat:
raise OSError( raise OSError(
"Timeout waiting for RNS ports to be released. Cannot restart.", "Timeout waiting for RNS ports to be released. Cannot restart.",
) )
else:
print("RNS ports finally free after last-second check.") print("RNS ports finally free after last-second check.")
# Final GC to ensure everything is released # Final GC to ensure everything is released