Prevent unavailable oscillation with failure tolerance

- Track consecutive failures; return cached data for up to 3 misses in a row
  before marking sensors unavailable. Single transient BLE failures no longer
  cause the UI to flip unavailable.
- Retry device lookup: if async_ble_device_from_address returns None (device
  not yet back in scanner cache after last disconnect), wait 2s and try once
  more before counting it as a failure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-11 20:29:50 +02:00
parent e1e6d69d2c
commit b6c3e597f7
+40 -8
View File
@@ -15,6 +15,11 @@ from .const import CMD_CELL, CMD_GENERAL, CMD_VERSION, DOMAIN
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
# Only mark sensors unavailable after this many *consecutive* failed polls.
# Transient BLE misses (device not in cache, ESPHome proxy busy, etc.) return
# the last known data instead so the UI doesn't oscillate.
_FAILURES_BEFORE_UNAVAILABLE = 3
class BmsCoordinator(DataUpdateCoordinator[dict]): class BmsCoordinator(DataUpdateCoordinator[dict]):
"""Polls the BMS over BLE and distributes data to all sensor entities. """Polls the BMS over BLE and distributes data to all sensor entities.
@@ -38,9 +43,10 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
) )
self.address = address self.address = address
self._device_name = name self._device_name = name
self._poll_timeout = max(poll_interval - 3, 10) # hard cap, leaves 3s slack self._poll_timeout = max(poll_interval - 3, 10)
self._handler = BmsBluetoothHandler(address) self._handler = BmsBluetoothHandler(address)
self.hw_version: str | None = None self.hw_version: str | None = None
self._consecutive_failures = 0
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Device info — shared by sensor, binary_sensor, number platforms # Device info — shared by sensor, binary_sensor, number platforms
@@ -69,15 +75,39 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
# Poll # Poll
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _handle_failure(self, reason: str) -> dict:
"""On a transient failure, return cached data up to the threshold.
Only raises UpdateFailed (→ sensors go unavailable) after
_FAILURES_BEFORE_UNAVAILABLE consecutive misses.
"""
self._consecutive_failures += 1
if self._consecutive_failures <= _FAILURES_BEFORE_UNAVAILABLE and self.data:
_LOGGER.debug(
"BMS poll failed (%d/%d), keeping last known data: %s",
self._consecutive_failures,
_FAILURES_BEFORE_UNAVAILABLE,
reason,
)
return self.data
raise UpdateFailed(reason)
async def _async_update_data(self) -> dict: async def _async_update_data(self) -> dict:
"""Connect to the BMS, fetch all data, disconnect.""" """Connect to the BMS, fetch all data, disconnect."""
# The BMS may not be in the scanner cache immediately after a disconnect.
# Wait up to 2 s for an advertisement before giving up.
device = async_ble_device_from_address(self.hass, self.address, connectable=True) device = async_ble_device_from_address(self.hass, self.address, connectable=True)
if device is None: if device is None:
raise UpdateFailed( await asyncio.sleep(2.0)
device = async_ble_device_from_address(
self.hass, self.address, connectable=True
)
if device is None:
return self._handle_failure(
f"BMS ({self.address}) not reachable — check Bluetooth adapter / proxy" f"BMS ({self.address}) not reachable — check Bluetooth adapter / proxy"
) )
# Fetch hardware version once; skip on subsequent polls
commands = [CMD_GENERAL, CMD_CELL] commands = [CMD_GENERAL, CMD_CELL]
if self.hw_version is None: if self.hw_version is None:
commands.append(CMD_VERSION) commands.append(CMD_VERSION)
@@ -88,18 +118,21 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
timeout=self._poll_timeout, timeout=self._poll_timeout,
) )
except asyncio.TimeoutError: except asyncio.TimeoutError:
raise UpdateFailed( return self._handle_failure(
f"BMS poll timed out after {self._poll_timeout}s" f"BMS poll timed out after {self._poll_timeout}s"
) )
except Exception as exc: except Exception as exc:
raise UpdateFailed(f"BMS poll failed: {exc}") from exc return self._handle_failure(f"BMS poll failed: {exc}")
general_frame, cell_frame = responses[0], responses[1] general_frame, cell_frame = responses[0], responses[1]
if general_frame is None: if general_frame is None:
raise UpdateFailed("No response to general info request (0x03)") return self._handle_failure("No response to general info request (0x03)")
if cell_frame is None: if cell_frame is None:
raise UpdateFailed("No response to cell info request (0x04)") return self._handle_failure("No response to cell info request (0x04)")
# Successful poll — reset failure counter
self._consecutive_failures = 0
if self.hw_version is None and len(responses) > 2 and responses[2]: if self.hw_version is None and len(responses) > 2 and responses[2]:
self.hw_version = BmsBluetoothHandler.parse_version(responses[2]) self.hw_version = BmsBluetoothHandler.parse_version(responses[2])
@@ -108,7 +141,6 @@ class BmsCoordinator(DataUpdateCoordinator[dict]):
data = BmsBluetoothHandler.parse_general_info(general_frame) data = BmsBluetoothHandler.parse_general_info(general_frame)
data.update(BmsBluetoothHandler.parse_cell_info(cell_frame)) data.update(BmsBluetoothHandler.parse_cell_info(cell_frame))
# Derived fields
data["power"] = round(data["voltage"] * data["current"], 2) data["power"] = round(data["voltage"] * data["current"], 2)
data["energy_stored"] = round(data["voltage"] * data["residual_capacity"] / 1000, 3) data["energy_stored"] = round(data["voltage"] * data["residual_capacity"] / 1000, 3)