dcc528b96a
Root cause: 3 BMS devices fighting for 3 ESPHome proxy connection slots simultaneously, causing 80% timeout failures and 22s+ poll times. Fixes: - Add shared asyncio.Lock so only one BMS polls at a time — eliminates proxy slot contention entirely - Pass ble_device_callback to establish_connection so retry attempts get a fresh BLEDevice (handles proxy path changes) - Reduce command timeout 5s -> 3s, retries 3 -> 2 (BMS responds in <200ms when connection is clean) - Reduce establish_connection max_attempts 3 -> 2 (fail fast, retry next cycle instead of blocking 25s) - Fixed poll timeout to 15s (was poll_interval-5=25s) Expected: polls complete in 2-5s instead of 22s, ~95%+ success rate. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
203 lines
7.8 KiB
Python
203 lines
7.8 KiB
Python
"""DataUpdateCoordinator for the Xiaoxiang Smart BMS."""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from datetime import timedelta
|
|
|
|
from bleak.backends.device import BLEDevice
|
|
from homeassistant.components.bluetooth import async_ble_device_from_address
|
|
from homeassistant.core import HomeAssistant
|
|
from homeassistant.exceptions import HomeAssistantError
|
|
from homeassistant.helpers.device_registry import DeviceInfo
|
|
from homeassistant.helpers.update_coordinator import DataUpdateCoordinator, UpdateFailed
|
|
|
|
from .bluetooth_handler import BmsBluetoothHandler
|
|
from .const import CMD_CELL, CMD_GENERAL, CMD_VERSION, DOMAIN
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
# Only mark sensors unavailable after this many *consecutive* failed polls.
|
|
# Transient BLE misses (device not in cache, ESPHome proxy busy, etc.) return
|
|
# the last known data instead so the UI doesn't oscillate.
|
|
_FAILURES_BEFORE_UNAVAILABLE = 5
|
|
|
|
# Hard ceiling on the BLE poll operation (connect + commands + disconnect).
|
|
# With the global lock preventing contention, connections should be fast —
|
|
# 15 s is generous for 2 commands over a local proxy.
|
|
_POLL_TIMEOUT = 15
|
|
|
|
|
|
class BmsCoordinator(DataUpdateCoordinator[dict]):
|
|
"""Polls the BMS over BLE and distributes data to all sensor entities.
|
|
|
|
Uses a connect -> read -> disconnect pattern on every poll so the BMS's
|
|
single BLE connection slot is free between updates (mobile app access).
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
hass: HomeAssistant,
|
|
address: str,
|
|
poll_interval: int,
|
|
name: str = "Xiaoxiang Smart BMS",
|
|
ble_lock: asyncio.Lock | None = None,
|
|
) -> None:
|
|
super().__init__(
|
|
hass,
|
|
_LOGGER,
|
|
name=DOMAIN,
|
|
update_interval=timedelta(seconds=poll_interval),
|
|
)
|
|
self.address = address
|
|
self._device_name = name
|
|
self._handler = BmsBluetoothHandler(address)
|
|
self.hw_version: str | None = None
|
|
self._consecutive_failures = 0
|
|
# Kept fresh by the BLE advertisement callback registered in __init__.py
|
|
self._ble_device: BLEDevice | None = None
|
|
# Shared across all BMS coordinator instances so only one BMS connects
|
|
# at a time — prevents ESPHome proxy connection slot exhaustion.
|
|
self._ble_lock = ble_lock or asyncio.Lock()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Device info — shared by sensor, binary_sensor, number platforms
|
|
# ------------------------------------------------------------------
|
|
|
|
@property
|
|
def device_info(self) -> DeviceInfo:
|
|
return DeviceInfo(
|
|
identifiers={(DOMAIN, self.address)},
|
|
name=self._device_name,
|
|
manufacturer="Xiaoxiang",
|
|
model=self.hw_version or "Smart BMS",
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Lifecycle
|
|
# ------------------------------------------------------------------
|
|
|
|
async def async_setup(self) -> None:
|
|
"""No-op — no persistent connection to establish."""
|
|
|
|
async def async_teardown(self) -> None:
|
|
"""No-op — each poll disconnects itself."""
|
|
|
|
async def async_write_mos(self, value: int) -> None:
|
|
"""Send a MOS control command to the BMS, then refresh sensor state."""
|
|
device = self._get_ble_device()
|
|
if device is None:
|
|
raise HomeAssistantError(
|
|
f"BMS ({self.address}) not reachable — cannot send MOS command"
|
|
)
|
|
async with self._ble_lock:
|
|
success = await self._handler.write_mos(
|
|
device, value,
|
|
ble_device_callback=self._get_ble_device,
|
|
)
|
|
if not success:
|
|
raise HomeAssistantError("BMS did not acknowledge the MOS command")
|
|
await self.async_request_refresh()
|
|
|
|
# ------------------------------------------------------------------
|
|
# BLE device lookup
|
|
# ------------------------------------------------------------------
|
|
|
|
def _get_ble_device(self) -> BLEDevice | None:
|
|
"""Return the freshest available BLEDevice reference.
|
|
|
|
Prefers the advertisement-callback reference (_ble_device) because it
|
|
tracks proxy transport path changes. Falls back to the scanner cache.
|
|
"""
|
|
return self._ble_device or async_ble_device_from_address(
|
|
self.hass, self.address, connectable=True
|
|
)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Poll
|
|
# ------------------------------------------------------------------
|
|
|
|
def _handle_failure(self, reason: str) -> dict:
|
|
"""On a transient failure, return cached data up to the threshold.
|
|
|
|
Only raises UpdateFailed (-> sensors go unavailable) after
|
|
_FAILURES_BEFORE_UNAVAILABLE consecutive misses.
|
|
"""
|
|
self._consecutive_failures += 1
|
|
if self._consecutive_failures <= _FAILURES_BEFORE_UNAVAILABLE and self.data:
|
|
_LOGGER.debug(
|
|
"BMS poll failed (%d/%d), keeping last known data: %s",
|
|
self._consecutive_failures,
|
|
_FAILURES_BEFORE_UNAVAILABLE,
|
|
reason,
|
|
)
|
|
return self.data
|
|
raise UpdateFailed(reason)
|
|
|
|
async def _async_update_data(self) -> dict:
|
|
"""Connect to the BMS, fetch all data, disconnect."""
|
|
|
|
device = self._get_ble_device()
|
|
if device is None:
|
|
return self._handle_failure(
|
|
f"BMS ({self.address}) not reachable — check Bluetooth adapter / proxy"
|
|
)
|
|
|
|
commands = [CMD_GENERAL, CMD_CELL]
|
|
if self.hw_version is None:
|
|
commands.append(CMD_VERSION)
|
|
|
|
# Only one BMS polls at a time — prevents proxy connection slot contention.
|
|
# The timeout wraps only the actual BLE operation, not the lock wait.
|
|
async with self._ble_lock:
|
|
try:
|
|
responses = await asyncio.wait_for(
|
|
self._handler.poll(
|
|
device,
|
|
commands,
|
|
ble_device_callback=self._get_ble_device,
|
|
),
|
|
timeout=_POLL_TIMEOUT,
|
|
)
|
|
except asyncio.TimeoutError:
|
|
return self._handle_failure(
|
|
f"BMS poll timed out after {_POLL_TIMEOUT}s"
|
|
)
|
|
except Exception as exc:
|
|
return self._handle_failure(f"BMS poll failed: {exc}")
|
|
|
|
general_frame, cell_frame = responses[0], responses[1]
|
|
|
|
if general_frame is None:
|
|
return self._handle_failure("No response to general info request (0x03)")
|
|
if cell_frame is None:
|
|
return self._handle_failure("No response to cell info request (0x04)")
|
|
|
|
# Successful poll — reset failure counter
|
|
self._consecutive_failures = 0
|
|
|
|
if self.hw_version is None and len(responses) > 2 and responses[2]:
|
|
self.hw_version = BmsBluetoothHandler.parse_version(responses[2])
|
|
_LOGGER.debug("BMS hardware version: %s", self.hw_version)
|
|
|
|
data = BmsBluetoothHandler.parse_general_info(general_frame)
|
|
data.update(BmsBluetoothHandler.parse_cell_info(cell_frame))
|
|
|
|
data["power"] = round(data["voltage"] * data["current"], 2)
|
|
data["energy_stored"] = round(data["voltage"] * data["residual_capacity"] / 1000, 3)
|
|
|
|
if data["cell_voltages"]:
|
|
v_max = max(data["cell_voltages"])
|
|
v_min = min(data["cell_voltages"])
|
|
data["cell_delta"] = round((v_max - v_min) * 1000, 1)
|
|
else:
|
|
data["cell_delta"] = None
|
|
|
|
_LOGGER.debug(
|
|
"BMS data: %.2fV %.2fA %d%% %.2fAh %.3fkWh %d cells",
|
|
data["voltage"], data["current"], data["state_of_charge"],
|
|
data["residual_capacity"], data["energy_stored"],
|
|
len(data["cell_voltages"]),
|
|
)
|
|
return data
|