diff --git a/custom_components/xiaoxiang_bms/bluetooth_handler.py b/custom_components/xiaoxiang_bms/bluetooth_handler.py index fc45f1d..f14122d 100644 --- a/custom_components/xiaoxiang_bms/bluetooth_handler.py +++ b/custom_components/xiaoxiang_bms/bluetooth_handler.py @@ -29,15 +29,72 @@ _TRAILER_LEN = 3 class BmsBluetoothHandler: """Protocol framing and parsing for a Xiaoxiang BMS device. - Designed for a connect -> poll -> disconnect pattern: the BMS only allows - one simultaneous BLE connection, so we hold it only for the duration of - a single data fetch and release it immediately after. + Holds a **persistent** BLE connection. The connection is established on + the first poll and kept alive across cycles. If it drops, the next poll + automatically reconnects. This avoids the massive overhead of + connect/disconnect every cycle through ESPHome BLE proxies. """ def __init__(self, address: str) -> None: self._address = address self._buffer = bytearray() self._response_queue: asyncio.Queue[bytes] = asyncio.Queue() + self._client: BleakClientWithServiceCache | None = None + + # ------------------------------------------------------------------ + # Connection management + # ------------------------------------------------------------------ + + @property + def is_connected(self) -> bool: + return self._client is not None and self._client.is_connected + + def _on_disconnect(self, _client) -> None: + """Called by bleak when the connection drops unexpectedly.""" + _LOGGER.debug("BMS %s disconnected", self._address) + self._client = None + + async def _ensure_connected( + self, + ble_device: BLEDevice, + ble_device_callback: Callable[[], BLEDevice | None] | None = None, + ) -> None: + """Connect if not already connected.""" + if self.is_connected: + return + + # Clean up stale client + if self._client is not None: + try: + await self._client.disconnect() + except BleakError: + pass + self._client = None + + self._reset() + + _LOGGER.debug("Connecting to BMS %s", self._address) + client = await establish_connection( + BleakClientWithServiceCache, + ble_device, + self._address, + disconnected_callback=self._on_disconnect, + max_attempts=2, + ble_device_callback=ble_device_callback, + ) + await client.start_notify(RX_CHAR_UUID, self._on_notify) + await asyncio.sleep(0.3) + self._client = client + _LOGGER.info("BMS %s connected", self._address) + + async def disconnect(self) -> None: + """Explicitly close the connection (used during teardown).""" + if self._client is not None: + try: + await self._client.disconnect() + except BleakError: + pass + self._client = None # ------------------------------------------------------------------ # Internal helpers @@ -52,12 +109,12 @@ class BmsBluetoothHandler: break def _reset(self) -> None: - """Clear all transient state before a new connection.""" + """Clear all transient state.""" self._buffer.clear() self._drain_queue() # ------------------------------------------------------------------ - # High-level poll — the only entry point the coordinator needs + # High-level poll # ------------------------------------------------------------------ async def poll( @@ -68,46 +125,26 @@ class BmsBluetoothHandler: retries: int = 2, ble_device_callback: Callable[[], BLEDevice | None] | None = None, ) -> list[bytes | None]: - """Connect, send each command in sequence, disconnect. + """Send each command over the persistent connection, return responses. - The BMS only supports a single BLE connection at a time. By connecting - only during the active read window and disconnecting immediately after, - the mobile app (or any other client) can connect freely between polls. + Connects automatically if not already connected. If the connection + drops mid-poll, raises BleakError so the coordinator can handle it. """ - self._reset() - _LOGGER.debug("Polling BMS at %s", self._address) + self._drain_queue() - client = await establish_connection( - BleakClientWithServiceCache, - ble_device, - self._address, - max_attempts=2, - ble_device_callback=ble_device_callback, - ) - try: - await client.start_notify(RX_CHAR_UUID, self._on_notify) - await asyncio.sleep(0.3) - return [ - await self._request(client, cmd, timeout, retries) - for cmd in commands - ] - finally: - try: - await client.disconnect() - except BleakError: - pass + await self._ensure_connected(ble_device, ble_device_callback) + + return [ + await self._request(self._client, cmd, timeout, retries) + for cmd in commands + ] # ------------------------------------------------------------------ # Frame reception # ------------------------------------------------------------------ def _on_notify(self, _char, data: bytearray) -> None: - """Accumulate BLE notification chunks into complete protocol frames. - - BLE max payload is 20 bytes (default MTU), so a single BMS frame - (up to ~50 bytes for 16 cells) arrives across several notifications. - We buffer until we can calculate and verify the expected frame length. - """ + """Accumulate BLE notification chunks into complete protocol frames.""" self._buffer.extend(data) # Discard leading garbage until we see a frame start byte @@ -140,7 +177,7 @@ class BmsBluetoothHandler: self._response_queue.put_nowait(frame) # ------------------------------------------------------------------ - # Request / response (private — used inside poll()) + # Request / response # ------------------------------------------------------------------ async def _request( @@ -150,19 +187,22 @@ class BmsBluetoothHandler: timeout: float, retries: int, ) -> bytes | None: - """Send one command and wait for the response frame, with retries. - - Tries Write With Response first; falls back to Write Without Response - if the characteristic rejects it — covers both BMS firmware variants. - """ + """Send one command and wait for the response frame, with retries.""" for attempt in range(1, retries + 1): - # Drain any stale frames before sending a new command + if not client.is_connected: + _LOGGER.warning("BMS %s connection lost during request", self._address) + self._client = None + return None + self._drain_queue() self._buffer.clear() try: await client.write_gatt_char(TX_CHAR_UUID, command, response=True) except BleakError: + if not client.is_connected: + self._client = None + return None try: await client.write_gatt_char(TX_CHAR_UUID, command, response=False) except BleakError as exc: @@ -195,32 +235,15 @@ class BmsBluetoothHandler: value: int, ble_device_callback: Callable[[], BLEDevice | None] | None = None, ) -> bool: - """Send a MOS control write command and return True on ACK. - - Follows the same connect -> send -> disconnect pattern as poll() so - it doesn't interfere with the normal poll cycle. - """ - self._reset() + """Send a MOS control write command over the persistent connection.""" + self._drain_queue() command = self._build_mos_command(value) _LOGGER.debug("Writing MOS value 0x%02X to BMS at %s", value, self._address) - client = await establish_connection( - BleakClientWithServiceCache, - ble_device, - self._address, - max_attempts=2, - ble_device_callback=ble_device_callback, - ) - try: - await client.start_notify(RX_CHAR_UUID, self._on_notify) - await asyncio.sleep(0.3) - response = await self._request(client, command, timeout=3.0, retries=2) - return response is not None and response[2] == 0x00 - finally: - try: - await client.disconnect() - except BleakError: - pass + await self._ensure_connected(ble_device, ble_device_callback) + + response = await self._request(self._client, command, timeout=3.0, retries=2) + return response is not None and response[2] == 0x00 @staticmethod def _build_mos_command(value: int) -> bytes: @@ -230,9 +253,6 @@ class BmsBluetoothHandler: Checked bytes (per spec): command_code + length + data bytes = 0xE1 + 0x02 + 0x00 + XX Checksum = two's complement of sum, high byte first. - - Verified against spec example: - XX=0x02 -> sum=0xE5 -> ~0xE5+1=0xFF1B -> CHK FF 1B """ checked = [0xE1, 0x02, 0x00, value & 0xFF] checksum = (~sum(checked) + 1) & 0xFFFF @@ -252,7 +272,7 @@ class BmsBluetoothHandler: Payload byte offsets (frame[4] is payload[0]): 0-1 Total voltage uint16 BE /100 -> V - 2-3 Current int16 BE /100 -> A (positive = charging, negative = discharging) + 2-3 Current int16 BE /100 -> A (positive = charging) 4-5 Residual capacity uint16 BE /100 -> Ah 6-7 Nominal capacity uint16 BE /100 -> Ah 8-9 Cycle count uint16 BE @@ -287,12 +307,9 @@ class BmsBluetoothHandler: "state_of_charge": p[19], "cell_count": p[21], "temperatures": temperatures, - # MOS status "mos_charge_enabled": bool(mos & 0x01), "mos_discharge_enabled": bool(mos & 0x02), - # Cell balancing (any cell currently balancing) "balance_active": balance != 0, - # Protection flags (bit per event, True = protection triggered) "prot_cell_overvolt": bool(prot & (1 << 0)), "prot_cell_undervolt": bool(prot & (1 << 1)), "prot_pack_overvolt": bool(prot & (1 << 2)), @@ -319,10 +336,8 @@ class BmsBluetoothHandler: """Parse a 0x04 cell voltage response frame. Per spec: frame[3] (the header length byte) = cell_count x 2. - The payload contains ONLY the voltage bytes — no count byte. - 0+ Cell voltages uint16 BE each unit mV /1000 -> V """ - count = frame[3] // 2 # header length byte = N_cells x 2 + count = frame[3] // 2 p = frame[_HEADER_LEN:-_TRAILER_LEN] voltages: list[float] = [] for i in range(count): diff --git a/custom_components/xiaoxiang_bms/coordinator.py b/custom_components/xiaoxiang_bms/coordinator.py index 3350548..b2fc4d7 100644 --- a/custom_components/xiaoxiang_bms/coordinator.py +++ b/custom_components/xiaoxiang_bms/coordinator.py @@ -5,6 +5,7 @@ import asyncio import logging from datetime import timedelta +from bleak import BleakError from bleak.backends.device import BLEDevice from homeassistant.components.bluetooth import async_ble_device_from_address from homeassistant.core import HomeAssistant @@ -18,21 +19,19 @@ from .const import CMD_CELL, CMD_GENERAL, CMD_VERSION, DOMAIN _LOGGER = logging.getLogger(__name__) # Only mark sensors unavailable after this many *consecutive* failed polls. -# Transient BLE misses (device not in cache, ESPHome proxy busy, etc.) return -# the last known data instead so the UI doesn't oscillate. _FAILURES_BEFORE_UNAVAILABLE = 5 -# Hard ceiling on the BLE poll operation (connect + commands + disconnect). -# With the global lock preventing contention, connections should be fast — -# 15 s is generous for 2 commands over a local proxy. +# Hard ceiling on a single poll (commands only — no connection overhead +# when already connected). Reconnection adds ~5 s on top. _POLL_TIMEOUT = 15 class BmsCoordinator(DataUpdateCoordinator[dict]): """Polls the BMS over BLE and distributes data to all sensor entities. - Uses a connect -> read -> disconnect pattern on every poll so the BMS's - single BLE connection slot is free between updates (mobile app access). + Holds a **persistent** BLE connection per device. The connection is + established on the first poll and kept alive across cycles. If it + drops, the next poll automatically reconnects. """ def __init__( @@ -56,12 +55,12 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): self._consecutive_failures = 0 # Kept fresh by the BLE advertisement callback registered in __init__.py self._ble_device: BLEDevice | None = None - # Shared across all BMS coordinator instances so only one BMS connects - # at a time — prevents ESPHome proxy connection slot exhaustion. + # Shared across all BMS coordinator instances — serialises BLE + # operations so multiple devices don't fight for proxy slots. self._ble_lock = ble_lock or asyncio.Lock() # ------------------------------------------------------------------ - # Device info — shared by sensor, binary_sensor, number platforms + # Device info # ------------------------------------------------------------------ @property @@ -78,10 +77,11 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): # ------------------------------------------------------------------ async def async_setup(self) -> None: - """No-op — no persistent connection to establish.""" + """No-op — connection is established lazily on first poll.""" async def async_teardown(self) -> None: - """No-op — each poll disconnects itself.""" + """Disconnect the persistent BLE connection.""" + await self._handler.disconnect() async def async_write_mos(self, value: int) -> None: """Send a MOS control command to the BMS, then refresh sensor state.""" @@ -91,10 +91,13 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): f"BMS ({self.address}) not reachable — cannot send MOS command" ) async with self._ble_lock: - success = await self._handler.write_mos( - device, value, - ble_device_callback=self._get_ble_device, - ) + try: + success = await self._handler.write_mos( + device, value, + ble_device_callback=self._get_ble_device, + ) + except (BleakError, asyncio.TimeoutError) as exc: + raise HomeAssistantError(f"MOS command failed: {exc}") from exc if not success: raise HomeAssistantError("BMS did not acknowledge the MOS command") await self.async_request_refresh() @@ -104,25 +107,22 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): # ------------------------------------------------------------------ def _get_ble_device(self) -> BLEDevice | None: - """Return the freshest available BLEDevice reference. - - Prefers the advertisement-callback reference (_ble_device) because it - tracks proxy transport path changes. Falls back to the scanner cache. - """ - return self._ble_device or async_ble_device_from_address( + """Return the freshest available BLEDevice reference.""" + if self._ble_device is not None: + return self._ble_device + device = async_ble_device_from_address( self.hass, self.address, connectable=True ) + if device is not None: + _LOGGER.debug("BMS %s found via scanner cache", self.address) + return device # ------------------------------------------------------------------ # Poll # ------------------------------------------------------------------ def _handle_failure(self, reason: str) -> dict: - """On a transient failure, return cached data up to the threshold. - - Only raises UpdateFailed (-> sensors go unavailable) after - _FAILURES_BEFORE_UNAVAILABLE consecutive misses. - """ + """Return cached data up to the threshold, then go unavailable.""" self._consecutive_failures += 1 if self._consecutive_failures <= _FAILURES_BEFORE_UNAVAILABLE and self.data: _LOGGER.debug( @@ -135,9 +135,17 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): raise UpdateFailed(reason) async def _async_update_data(self) -> dict: - """Connect to the BMS, fetch all data, disconnect.""" + """Poll the BMS over the persistent connection.""" device = self._get_ble_device() + if device is None: + # Wait up to 5 s for an advertisement (after HA restart / proxy reconnect) + _LOGGER.debug("BMS %s not discovered yet, waiting…", self.address) + for _ in range(5): + await asyncio.sleep(1.0) + device = self._get_ble_device() + if device is not None: + break if device is None: return self._handle_failure( f"BMS ({self.address}) not reachable — check Bluetooth adapter / proxy" @@ -147,8 +155,7 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): if self.hw_version is None: commands.append(CMD_VERSION) - # Only one BMS polls at a time — prevents proxy connection slot contention. - # The timeout wraps only the actual BLE operation, not the lock wait. + # Serialise BLE operations across all BMS instances. async with self._ble_lock: try: responses = await asyncio.wait_for( @@ -160,10 +167,12 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): timeout=_POLL_TIMEOUT, ) except asyncio.TimeoutError: + # Connection might be dead — force reconnect next cycle + await self._handler.disconnect() return self._handle_failure( f"BMS poll timed out after {_POLL_TIMEOUT}s" ) - except Exception as exc: + except (BleakError, Exception) as exc: return self._handle_failure(f"BMS poll failed: {exc}") general_frame, cell_frame = responses[0], responses[1] @@ -194,9 +203,10 @@ class BmsCoordinator(DataUpdateCoordinator[dict]): data["cell_delta"] = None _LOGGER.debug( - "BMS data: %.2fV %.2fA %d%% %.2fAh %.3fkWh %d cells", + "BMS data: %.2fV %.2fA %d%% %.2fAh %.3fkWh %d cells (connected: %s)", data["voltage"], data["current"], data["state_of_charge"], data["residual_capacity"], data["energy_stored"], len(data["cell_voltages"]), + self._handler.is_connected, ) return data