Refactor BLE layer for 24/7 reliability

- Replace raw BleakClient with establish_connection from
  bleak-retry-connector (retries, GATT service cache, proxy-aware)
- Replace fragile asyncio.Event with asyncio.Queue for response frames,
  drain stale data on each connection to prevent cross-cycle leakage
- Register BLE advertisement callback to keep BLEDevice reference fresh
  across ESPHome proxy path changes
- Remove asyncio.sleep(2) device lookup hack
- Increase poll timeout floor from 10s to 20s
- Increase failure tolerance from 3 to 5 consecutive misses
- Bump default poll interval to 30s, min to 15s (halves connection churn)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-12 09:36:08 +02:00
parent c00d9b66c2
commit 1520ed3c0f
4 changed files with 131 additions and 72 deletions
+23 -14
View File
@@ -5,6 +5,7 @@ import asyncio
import logging
from datetime import timedelta
from bleak.backends.device import BLEDevice
from homeassistant.components.bluetooth import async_ble_device_from_address
from homeassistant.core import HomeAssistant
from homeassistant.exceptions import HomeAssistantError
@@ -19,13 +20,13 @@ _LOGGER = logging.getLogger(__name__)
# Only mark sensors unavailable after this many *consecutive* failed polls.
# Transient BLE misses (device not in cache, ESPHome proxy busy, etc.) return
# the last known data instead so the UI doesn't oscillate.
_FAILURES_BEFORE_UNAVAILABLE = 3
_FAILURES_BEFORE_UNAVAILABLE = 5
class BmsCoordinator(DataUpdateCoordinator[dict]):
"""Polls the BMS over BLE and distributes data to all sensor entities.
Uses a connect read disconnect pattern on every poll so the BMS's
Uses a connect -> read -> disconnect pattern on every poll so the BMS's
single BLE connection slot is free between updates (mobile app access).
"""
@@ -44,10 +45,12 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
)
self.address = address
self._device_name = name
self._poll_timeout = max(poll_interval - 3, 10)
self._poll_timeout = max(poll_interval - 5, 20)
self._handler = BmsBluetoothHandler(address)
self.hw_version: str | None = None
self._consecutive_failures = 0
# Kept fresh by the BLE advertisement callback registered in __init__.py
self._ble_device: BLEDevice | None = None
# ------------------------------------------------------------------
# Device info — shared by sensor, binary_sensor, number platforms
@@ -74,7 +77,7 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
async def async_write_mos(self, value: int) -> None:
"""Send a MOS control command to the BMS, then refresh sensor state."""
device = async_ble_device_from_address(self.hass, self.address, connectable=True)
device = self._get_ble_device()
if device is None:
raise HomeAssistantError(
f"BMS ({self.address}) not reachable — cannot send MOS command"
@@ -82,9 +85,22 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
success = await self._handler.write_mos(device, value)
if not success:
raise HomeAssistantError("BMS did not acknowledge the MOS command")
# Refresh immediately so sensors reflect the new MOS state
await self.async_request_refresh()
# ------------------------------------------------------------------
# BLE device lookup
# ------------------------------------------------------------------
def _get_ble_device(self) -> BLEDevice | None:
"""Return the freshest available BLEDevice reference.
Prefers the advertisement-callback reference (_ble_device) because it
tracks proxy transport path changes. Falls back to the scanner cache.
"""
return self._ble_device or async_ble_device_from_address(
self.hass, self.address, connectable=True
)
# ------------------------------------------------------------------
# Poll
# ------------------------------------------------------------------
@@ -92,7 +108,7 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
def _handle_failure(self, reason: str) -> dict:
"""On a transient failure, return cached data up to the threshold.
Only raises UpdateFailed ( sensors go unavailable) after
Only raises UpdateFailed (-> sensors go unavailable) after
_FAILURES_BEFORE_UNAVAILABLE consecutive misses.
"""
self._consecutive_failures += 1
@@ -109,14 +125,7 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
async def _async_update_data(self) -> dict:
"""Connect to the BMS, fetch all data, disconnect."""
# The BMS may not be in the scanner cache immediately after a disconnect.
# Wait up to 2 s for an advertisement before giving up.
device = async_ble_device_from_address(self.hass, self.address, connectable=True)
if device is None:
await asyncio.sleep(2.0)
device = async_ble_device_from_address(
self.hass, self.address, connectable=True
)
device = self._get_ble_device()
if device is None:
return self._handle_failure(
f"BMS ({self.address}) not reachable — check Bluetooth adapter / proxy"