Add global BLE lock + faster timeouts for multi-device reliability
Root cause: 3 BMS devices fighting for 3 ESPHome proxy connection slots simultaneously, causing 80% timeout failures and 22s+ poll times. Fixes: - Add shared asyncio.Lock so only one BMS polls at a time — eliminates proxy slot contention entirely - Pass ble_device_callback to establish_connection so retry attempts get a fresh BLEDevice (handles proxy path changes) - Reduce command timeout 5s -> 3s, retries 3 -> 2 (BMS responds in <200ms when connection is clean) - Reduce establish_connection max_attempts 3 -> 2 (fail fast, retry next cycle instead of blocking 25s) - Fixed poll timeout to 15s (was poll_interval-5=25s) Expected: polls complete in 2-5s instead of 22s, ~95%+ success rate. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
"""Xiaoxiang Smart BMS — Home Assistant integration."""
|
"""Xiaoxiang Smart BMS — Home Assistant integration."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
from homeassistant.components.bluetooth import (
|
from homeassistant.components.bluetooth import (
|
||||||
BluetoothChange,
|
BluetoothChange,
|
||||||
BluetoothScanningMode,
|
BluetoothScanningMode,
|
||||||
@@ -22,7 +24,19 @@ async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
|
|||||||
address = entry.data[CONF_ADDRESS]
|
address = entry.data[CONF_ADDRESS]
|
||||||
poll_interval = entry.options.get(CONF_POLL_INTERVAL, DEFAULT_POLL_INTERVAL)
|
poll_interval = entry.options.get(CONF_POLL_INTERVAL, DEFAULT_POLL_INTERVAL)
|
||||||
|
|
||||||
coordinator = BmsCoordinator(hass, address, poll_interval, name=entry.title)
|
hass.data.setdefault(DOMAIN, {})
|
||||||
|
|
||||||
|
# Shared BLE lock — only one BMS connects at a time to avoid
|
||||||
|
# ESPHome proxy connection slot exhaustion with multiple devices.
|
||||||
|
if "_ble_lock" not in hass.data[DOMAIN]:
|
||||||
|
hass.data[DOMAIN]["_ble_lock"] = asyncio.Lock()
|
||||||
|
ble_lock = hass.data[DOMAIN]["_ble_lock"]
|
||||||
|
|
||||||
|
coordinator = BmsCoordinator(
|
||||||
|
hass, address, poll_interval,
|
||||||
|
name=entry.title,
|
||||||
|
ble_lock=ble_lock,
|
||||||
|
)
|
||||||
|
|
||||||
# Keep the coordinator's BLE device reference fresh via advertisement callback.
|
# Keep the coordinator's BLE device reference fresh via advertisement callback.
|
||||||
# This avoids stale transport paths when ESPHome proxies cycle.
|
# This avoids stale transport paths when ESPHome proxies cycle.
|
||||||
@@ -45,7 +59,7 @@ async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
|
|||||||
await coordinator.async_setup()
|
await coordinator.async_setup()
|
||||||
await coordinator.async_config_entry_first_refresh()
|
await coordinator.async_config_entry_first_refresh()
|
||||||
|
|
||||||
hass.data.setdefault(DOMAIN, {})[entry.entry_id] = coordinator
|
hass.data[DOMAIN][entry.entry_id] = coordinator
|
||||||
await hass.config_entries.async_forward_entry_setups(entry, PLATFORMS)
|
await hass.config_entries.async_forward_entry_setups(entry, PLATFORMS)
|
||||||
|
|
||||||
entry.async_on_unload(entry.add_update_listener(_async_update_listener))
|
entry.async_on_unload(entry.add_update_listener(_async_update_listener))
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import struct
|
import struct
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
from bleak import BleakError
|
from bleak import BleakError
|
||||||
from bleak.backends.device import BLEDevice
|
from bleak.backends.device import BLEDevice
|
||||||
@@ -63,8 +64,9 @@ class BmsBluetoothHandler:
|
|||||||
self,
|
self,
|
||||||
ble_device: BLEDevice,
|
ble_device: BLEDevice,
|
||||||
commands: list[bytes],
|
commands: list[bytes],
|
||||||
timeout: float = 5.0,
|
timeout: float = 3.0,
|
||||||
retries: int = 3,
|
retries: int = 2,
|
||||||
|
ble_device_callback: Callable[[], BLEDevice | None] | None = None,
|
||||||
) -> list[bytes | None]:
|
) -> list[bytes | None]:
|
||||||
"""Connect, send each command in sequence, disconnect.
|
"""Connect, send each command in sequence, disconnect.
|
||||||
|
|
||||||
@@ -79,7 +81,8 @@ class BmsBluetoothHandler:
|
|||||||
BleakClientWithServiceCache,
|
BleakClientWithServiceCache,
|
||||||
ble_device,
|
ble_device,
|
||||||
self._address,
|
self._address,
|
||||||
max_attempts=3,
|
max_attempts=2,
|
||||||
|
ble_device_callback=ble_device_callback,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await client.start_notify(RX_CHAR_UUID, self._on_notify)
|
await client.start_notify(RX_CHAR_UUID, self._on_notify)
|
||||||
@@ -166,7 +169,7 @@ class BmsBluetoothHandler:
|
|||||||
_LOGGER.warning("BLE write failed (attempt %d/%d): %s",
|
_LOGGER.warning("BLE write failed (attempt %d/%d): %s",
|
||||||
attempt, retries, exc)
|
attempt, retries, exc)
|
||||||
if attempt < retries:
|
if attempt < retries:
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.3)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -178,7 +181,7 @@ class BmsBluetoothHandler:
|
|||||||
_LOGGER.warning("BMS timeout (cmd=0x%s, attempt %d/%d)",
|
_LOGGER.warning("BMS timeout (cmd=0x%s, attempt %d/%d)",
|
||||||
command.hex(), attempt, retries)
|
command.hex(), attempt, retries)
|
||||||
if attempt < retries:
|
if attempt < retries:
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.3)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -186,7 +189,12 @@ class BmsBluetoothHandler:
|
|||||||
# MOS write command
|
# MOS write command
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
async def write_mos(self, ble_device: BLEDevice, value: int) -> bool:
|
async def write_mos(
|
||||||
|
self,
|
||||||
|
ble_device: BLEDevice,
|
||||||
|
value: int,
|
||||||
|
ble_device_callback: Callable[[], BLEDevice | None] | None = None,
|
||||||
|
) -> bool:
|
||||||
"""Send a MOS control write command and return True on ACK.
|
"""Send a MOS control write command and return True on ACK.
|
||||||
|
|
||||||
Follows the same connect -> send -> disconnect pattern as poll() so
|
Follows the same connect -> send -> disconnect pattern as poll() so
|
||||||
@@ -200,12 +208,13 @@ class BmsBluetoothHandler:
|
|||||||
BleakClientWithServiceCache,
|
BleakClientWithServiceCache,
|
||||||
ble_device,
|
ble_device,
|
||||||
self._address,
|
self._address,
|
||||||
max_attempts=3,
|
max_attempts=2,
|
||||||
|
ble_device_callback=ble_device_callback,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await client.start_notify(RX_CHAR_UUID, self._on_notify)
|
await client.start_notify(RX_CHAR_UUID, self._on_notify)
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.3)
|
||||||
response = await self._request(client, command, timeout=5.0, retries=2)
|
response = await self._request(client, command, timeout=3.0, retries=2)
|
||||||
return response is not None and response[2] == 0x00
|
return response is not None and response[2] == 0x00
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -22,6 +22,11 @@ _LOGGER = logging.getLogger(__name__)
|
|||||||
# the last known data instead so the UI doesn't oscillate.
|
# the last known data instead so the UI doesn't oscillate.
|
||||||
_FAILURES_BEFORE_UNAVAILABLE = 5
|
_FAILURES_BEFORE_UNAVAILABLE = 5
|
||||||
|
|
||||||
|
# Hard ceiling on the BLE poll operation (connect + commands + disconnect).
|
||||||
|
# With the global lock preventing contention, connections should be fast —
|
||||||
|
# 15 s is generous for 2 commands over a local proxy.
|
||||||
|
_POLL_TIMEOUT = 15
|
||||||
|
|
||||||
|
|
||||||
class BmsCoordinator(DataUpdateCoordinator[dict]):
|
class BmsCoordinator(DataUpdateCoordinator[dict]):
|
||||||
"""Polls the BMS over BLE and distributes data to all sensor entities.
|
"""Polls the BMS over BLE and distributes data to all sensor entities.
|
||||||
@@ -36,6 +41,7 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
|||||||
address: str,
|
address: str,
|
||||||
poll_interval: int,
|
poll_interval: int,
|
||||||
name: str = "Xiaoxiang Smart BMS",
|
name: str = "Xiaoxiang Smart BMS",
|
||||||
|
ble_lock: asyncio.Lock | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
hass,
|
hass,
|
||||||
@@ -45,12 +51,14 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
|||||||
)
|
)
|
||||||
self.address = address
|
self.address = address
|
||||||
self._device_name = name
|
self._device_name = name
|
||||||
self._poll_timeout = max(poll_interval - 5, 20)
|
|
||||||
self._handler = BmsBluetoothHandler(address)
|
self._handler = BmsBluetoothHandler(address)
|
||||||
self.hw_version: str | None = None
|
self.hw_version: str | None = None
|
||||||
self._consecutive_failures = 0
|
self._consecutive_failures = 0
|
||||||
# Kept fresh by the BLE advertisement callback registered in __init__.py
|
# Kept fresh by the BLE advertisement callback registered in __init__.py
|
||||||
self._ble_device: BLEDevice | None = None
|
self._ble_device: BLEDevice | None = None
|
||||||
|
# Shared across all BMS coordinator instances so only one BMS connects
|
||||||
|
# at a time — prevents ESPHome proxy connection slot exhaustion.
|
||||||
|
self._ble_lock = ble_lock or asyncio.Lock()
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Device info — shared by sensor, binary_sensor, number platforms
|
# Device info — shared by sensor, binary_sensor, number platforms
|
||||||
@@ -82,7 +90,11 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
|||||||
raise HomeAssistantError(
|
raise HomeAssistantError(
|
||||||
f"BMS ({self.address}) not reachable — cannot send MOS command"
|
f"BMS ({self.address}) not reachable — cannot send MOS command"
|
||||||
)
|
)
|
||||||
success = await self._handler.write_mos(device, value)
|
async with self._ble_lock:
|
||||||
|
success = await self._handler.write_mos(
|
||||||
|
device, value,
|
||||||
|
ble_device_callback=self._get_ble_device,
|
||||||
|
)
|
||||||
if not success:
|
if not success:
|
||||||
raise HomeAssistantError("BMS did not acknowledge the MOS command")
|
raise HomeAssistantError("BMS did not acknowledge the MOS command")
|
||||||
await self.async_request_refresh()
|
await self.async_request_refresh()
|
||||||
@@ -135,14 +147,21 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
|
|||||||
if self.hw_version is None:
|
if self.hw_version is None:
|
||||||
commands.append(CMD_VERSION)
|
commands.append(CMD_VERSION)
|
||||||
|
|
||||||
|
# Only one BMS polls at a time — prevents proxy connection slot contention.
|
||||||
|
# The timeout wraps only the actual BLE operation, not the lock wait.
|
||||||
|
async with self._ble_lock:
|
||||||
try:
|
try:
|
||||||
responses = await asyncio.wait_for(
|
responses = await asyncio.wait_for(
|
||||||
self._handler.poll(device, commands),
|
self._handler.poll(
|
||||||
timeout=self._poll_timeout,
|
device,
|
||||||
|
commands,
|
||||||
|
ble_device_callback=self._get_ble_device,
|
||||||
|
),
|
||||||
|
timeout=_POLL_TIMEOUT,
|
||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
return self._handle_failure(
|
return self._handle_failure(
|
||||||
f"BMS poll timed out after {self._poll_timeout}s"
|
f"BMS poll timed out after {_POLL_TIMEOUT}s"
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return self._handle_failure(f"BMS poll failed: {exc}")
|
return self._handle_failure(f"BMS poll failed: {exc}")
|
||||||
|
|||||||
Reference in New Issue
Block a user