Add global BLE lock + faster timeouts for multi-device reliability
Root cause: 3 BMS devices fighting for 3 ESPHome proxy connection slots simultaneously, causing 80% timeout failures and 22s+ poll times. Fixes: - Add shared asyncio.Lock so only one BMS polls at a time — eliminates proxy slot contention entirely - Pass ble_device_callback to establish_connection so retry attempts get a fresh BLEDevice (handles proxy path changes) - Reduce command timeout 5s -> 3s, retries 3 -> 2 (BMS responds in <200ms when connection is clean) - Reduce establish_connection max_attempts 3 -> 2 (fail fast, retry next cycle instead of blocking 25s) - Fixed poll timeout to 15s (was poll_interval-5=25s) Expected: polls complete in 2-5s instead of 22s, ~95%+ success rate. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,11 @@ _LOGGER = logging.getLogger(__name__)
|
||||
# the last known data instead so the UI doesn't oscillate.
|
||||
_FAILURES_BEFORE_UNAVAILABLE = 5
|
||||
|
||||
# Hard ceiling on the BLE poll operation (connect + commands + disconnect).
|
||||
# With the global lock preventing contention, connections should be fast —
|
||||
# 15 s is generous for 2 commands over a local proxy.
|
||||
_POLL_TIMEOUT = 15
|
||||
|
||||
|
||||
class BmsCoordinator(DataUpdateCoordinator[dict]):
|
||||
"""Polls the BMS over BLE and distributes data to all sensor entities.
|
||||
@@ -36,6 +41,7 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
||||
address: str,
|
||||
poll_interval: int,
|
||||
name: str = "Xiaoxiang Smart BMS",
|
||||
ble_lock: asyncio.Lock | None = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
hass,
|
||||
@@ -45,12 +51,14 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
||||
)
|
||||
self.address = address
|
||||
self._device_name = name
|
||||
self._poll_timeout = max(poll_interval - 5, 20)
|
||||
self._handler = BmsBluetoothHandler(address)
|
||||
self.hw_version: str | None = None
|
||||
self._consecutive_failures = 0
|
||||
# Kept fresh by the BLE advertisement callback registered in __init__.py
|
||||
self._ble_device: BLEDevice | None = None
|
||||
# Shared across all BMS coordinator instances so only one BMS connects
|
||||
# at a time — prevents ESPHome proxy connection slot exhaustion.
|
||||
self._ble_lock = ble_lock or asyncio.Lock()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Device info — shared by sensor, binary_sensor, number platforms
|
||||
@@ -82,7 +90,11 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
||||
raise HomeAssistantError(
|
||||
f"BMS ({self.address}) not reachable — cannot send MOS command"
|
||||
)
|
||||
success = await self._handler.write_mos(device, value)
|
||||
async with self._ble_lock:
|
||||
success = await self._handler.write_mos(
|
||||
device, value,
|
||||
ble_device_callback=self._get_ble_device,
|
||||
)
|
||||
if not success:
|
||||
raise HomeAssistantError("BMS did not acknowledge the MOS command")
|
||||
await self.async_request_refresh()
|
||||
@@ -135,17 +147,24 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
||||
if self.hw_version is None:
|
||||
commands.append(CMD_VERSION)
|
||||
|
||||
try:
|
||||
responses = await asyncio.wait_for(
|
||||
self._handler.poll(device, commands),
|
||||
timeout=self._poll_timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return self._handle_failure(
|
||||
f"BMS poll timed out after {self._poll_timeout}s"
|
||||
)
|
||||
except Exception as exc:
|
||||
return self._handle_failure(f"BMS poll failed: {exc}")
|
||||
# Only one BMS polls at a time — prevents proxy connection slot contention.
|
||||
# The timeout wraps only the actual BLE operation, not the lock wait.
|
||||
async with self._ble_lock:
|
||||
try:
|
||||
responses = await asyncio.wait_for(
|
||||
self._handler.poll(
|
||||
device,
|
||||
commands,
|
||||
ble_device_callback=self._get_ble_device,
|
||||
),
|
||||
timeout=_POLL_TIMEOUT,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return self._handle_failure(
|
||||
f"BMS poll timed out after {_POLL_TIMEOUT}s"
|
||||
)
|
||||
except Exception as exc:
|
||||
return self._handle_failure(f"BMS poll failed: {exc}")
|
||||
|
||||
general_frame, cell_frame = responses[0], responses[1]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user