Refactor BLE layer for 24/7 reliability
- Replace raw BleakClient with establish_connection from bleak-retry-connector (retries, GATT service cache, proxy-aware) - Replace fragile asyncio.Event with asyncio.Queue for response frames, drain stale data on each connection to prevent cross-cycle leakage - Register BLE advertisement callback to keep BLEDevice reference fresh across ESPHome proxy path changes - Remove asyncio.sleep(2) device lookup hack - Increase poll timeout floor from 10s to 20s - Increase failure tolerance from 3 to 5 consecutive misses - Bump default poll interval to 30s, min to 15s (halves connection churn) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,8 +5,9 @@ import asyncio
|
||||
import logging
|
||||
import struct
|
||||
|
||||
from bleak import BleakClient, BleakError
|
||||
from bleak import BleakError
|
||||
from bleak.backends.device import BLEDevice
|
||||
from bleak_retry_connector import establish_connection, BleakClientWithServiceCache
|
||||
|
||||
from .const import (
|
||||
FRAME_END,
|
||||
@@ -19,7 +20,7 @@ _LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# Full frame layout:
|
||||
# [0xDD] [CMD] [STATUS] [PAYLOAD_LEN] [PAYLOAD...] [CHK_HI] [CHK_LO] [0x77]
|
||||
# Header = 4 bytes, trailer = 3 bytes (checksum × 2 + end marker)
|
||||
# Header = 4 bytes, trailer = 3 bytes (checksum x 2 + end marker)
|
||||
_HEADER_LEN = 4
|
||||
_TRAILER_LEN = 3
|
||||
|
||||
@@ -27,7 +28,7 @@ _TRAILER_LEN = 3
|
||||
class BmsBluetoothHandler:
|
||||
"""Protocol framing and parsing for a Xiaoxiang BMS device.
|
||||
|
||||
Designed for a connect → poll → disconnect pattern: the BMS only allows
|
||||
Designed for a connect -> poll -> disconnect pattern: the BMS only allows
|
||||
one simultaneous BLE connection, so we hold it only for the duration of
|
||||
a single data fetch and release it immediately after.
|
||||
"""
|
||||
@@ -35,9 +36,24 @@ class BmsBluetoothHandler:
|
||||
def __init__(self, address: str) -> None:
|
||||
self._address = address
|
||||
self._buffer = bytearray()
|
||||
self._response_event = asyncio.Event()
|
||||
self._response_data: bytes | None = None
|
||||
self._lock = asyncio.Lock()
|
||||
self._response_queue: asyncio.Queue[bytes] = asyncio.Queue()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _drain_queue(self) -> None:
|
||||
"""Discard any stale frames left in the queue from a prior cycle."""
|
||||
while not self._response_queue.empty():
|
||||
try:
|
||||
self._response_queue.get_nowait()
|
||||
except asyncio.QueueEmpty:
|
||||
break
|
||||
|
||||
def _reset(self) -> None:
|
||||
"""Clear all transient state before a new connection."""
|
||||
self._buffer.clear()
|
||||
self._drain_queue()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# High-level poll — the only entry point the coordinator needs
|
||||
@@ -47,7 +63,7 @@ class BmsBluetoothHandler:
|
||||
self,
|
||||
ble_device: BLEDevice,
|
||||
commands: list[bytes],
|
||||
timeout: float = 3.0,
|
||||
timeout: float = 5.0,
|
||||
retries: int = 3,
|
||||
) -> list[bytes | None]:
|
||||
"""Connect, send each command in sequence, disconnect.
|
||||
@@ -56,13 +72,17 @@ class BmsBluetoothHandler:
|
||||
only during the active read window and disconnecting immediately after,
|
||||
the mobile app (or any other client) can connect freely between polls.
|
||||
"""
|
||||
self._reset()
|
||||
_LOGGER.debug("Polling BMS at %s", self._address)
|
||||
client = BleakClient(ble_device)
|
||||
|
||||
client = await establish_connection(
|
||||
BleakClientWithServiceCache,
|
||||
ble_device,
|
||||
self._address,
|
||||
max_attempts=3,
|
||||
)
|
||||
try:
|
||||
await client.connect()
|
||||
await client.start_notify(RX_CHAR_UUID, self._on_notify)
|
||||
# Give the BMS a moment to register the subscription before
|
||||
# we start sending commands
|
||||
await asyncio.sleep(0.3)
|
||||
return [
|
||||
await self._request(client, cmd, timeout, retries)
|
||||
@@ -71,9 +91,8 @@ class BmsBluetoothHandler:
|
||||
finally:
|
||||
try:
|
||||
await client.disconnect()
|
||||
except Exception:
|
||||
except BleakError:
|
||||
pass
|
||||
self._buffer.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Frame reception
|
||||
@@ -115,8 +134,7 @@ class BmsBluetoothHandler:
|
||||
return
|
||||
|
||||
_LOGGER.debug("BMS frame received (cmd=0x%02X, len=%d)", frame[1], payload_len)
|
||||
self._response_data = frame
|
||||
self._response_event.set()
|
||||
self._response_queue.put_nowait(frame)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Request / response (private — used inside poll())
|
||||
@@ -124,7 +142,7 @@ class BmsBluetoothHandler:
|
||||
|
||||
async def _request(
|
||||
self,
|
||||
client: BleakClient,
|
||||
client: BleakClientWithServiceCache,
|
||||
command: bytes,
|
||||
timeout: float,
|
||||
retries: int,
|
||||
@@ -134,30 +152,33 @@ class BmsBluetoothHandler:
|
||||
Tries Write With Response first; falls back to Write Without Response
|
||||
if the characteristic rejects it — covers both BMS firmware variants.
|
||||
"""
|
||||
async with self._lock:
|
||||
for attempt in range(1, retries + 1):
|
||||
self._response_event.clear()
|
||||
self._response_data = None
|
||||
try:
|
||||
await client.write_gatt_char(TX_CHAR_UUID, command, response=True)
|
||||
except BleakError:
|
||||
try:
|
||||
await client.write_gatt_char(TX_CHAR_UUID, command, response=False)
|
||||
except BleakError as exc:
|
||||
_LOGGER.error("BLE write failed (attempt %d/%d): %s",
|
||||
attempt, retries, exc)
|
||||
if attempt < retries:
|
||||
await asyncio.sleep(0.3)
|
||||
continue
|
||||
for attempt in range(1, retries + 1):
|
||||
# Drain any stale frames before sending a new command
|
||||
self._drain_queue()
|
||||
self._buffer.clear()
|
||||
|
||||
try:
|
||||
await client.write_gatt_char(TX_CHAR_UUID, command, response=True)
|
||||
except BleakError:
|
||||
try:
|
||||
await asyncio.wait_for(self._response_event.wait(), timeout)
|
||||
return self._response_data
|
||||
except asyncio.TimeoutError:
|
||||
_LOGGER.warning("BMS timeout (cmd=0x%s, attempt %d/%d)",
|
||||
command.hex(), attempt, retries)
|
||||
await client.write_gatt_char(TX_CHAR_UUID, command, response=False)
|
||||
except BleakError as exc:
|
||||
_LOGGER.warning("BLE write failed (attempt %d/%d): %s",
|
||||
attempt, retries, exc)
|
||||
if attempt < retries:
|
||||
await asyncio.sleep(0.3)
|
||||
await asyncio.sleep(0.5)
|
||||
continue
|
||||
|
||||
try:
|
||||
frame = await asyncio.wait_for(
|
||||
self._response_queue.get(), timeout
|
||||
)
|
||||
return frame
|
||||
except asyncio.TimeoutError:
|
||||
_LOGGER.warning("BMS timeout (cmd=0x%s, attempt %d/%d)",
|
||||
command.hex(), attempt, retries)
|
||||
if attempt < retries:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return None
|
||||
|
||||
@@ -168,25 +189,29 @@ class BmsBluetoothHandler:
|
||||
async def write_mos(self, ble_device: BLEDevice, value: int) -> bool:
|
||||
"""Send a MOS control write command and return True on ACK.
|
||||
|
||||
Follows the same connect → send → disconnect pattern as poll() so
|
||||
Follows the same connect -> send -> disconnect pattern as poll() so
|
||||
it doesn't interfere with the normal poll cycle.
|
||||
"""
|
||||
self._reset()
|
||||
command = self._build_mos_command(value)
|
||||
_LOGGER.debug("Writing MOS value 0x%02X to BMS at %s", value, self._address)
|
||||
client = BleakClient(ble_device)
|
||||
|
||||
client = await establish_connection(
|
||||
BleakClientWithServiceCache,
|
||||
ble_device,
|
||||
self._address,
|
||||
max_attempts=3,
|
||||
)
|
||||
try:
|
||||
await client.connect()
|
||||
await client.start_notify(RX_CHAR_UUID, self._on_notify)
|
||||
await asyncio.sleep(0.5)
|
||||
response = await self._request(client, command, timeout=3.0, retries=2)
|
||||
# Response: DD E1 00 00 CHK_H CHK_L 77 (status byte 0x00 = OK)
|
||||
await asyncio.sleep(0.3)
|
||||
response = await self._request(client, command, timeout=5.0, retries=2)
|
||||
return response is not None and response[2] == 0x00
|
||||
finally:
|
||||
try:
|
||||
await client.disconnect()
|
||||
except Exception:
|
||||
except BleakError:
|
||||
pass
|
||||
self._buffer.clear()
|
||||
|
||||
@staticmethod
|
||||
def _build_mos_command(value: int) -> bytes:
|
||||
@@ -198,7 +223,7 @@ class BmsBluetoothHandler:
|
||||
Checksum = two's complement of sum, high byte first.
|
||||
|
||||
Verified against spec example:
|
||||
XX=0x02 → sum=0xE5 → ~0xE5+1=0xFF1B → CHK FF 1B ✓
|
||||
XX=0x02 -> sum=0xE5 -> ~0xE5+1=0xFF1B -> CHK FF 1B
|
||||
"""
|
||||
checked = [0xE1, 0x02, 0x00, value & 0xFF]
|
||||
checksum = (~sum(checked) + 1) & 0xFFFF
|
||||
@@ -217,10 +242,10 @@ class BmsBluetoothHandler:
|
||||
"""Parse a 0x03 general info response frame.
|
||||
|
||||
Payload byte offsets (frame[4] is payload[0]):
|
||||
0-1 Total voltage uint16 BE ÷100 → V
|
||||
2-3 Current int16 BE ÷100 → A (positive = charging, negative = discharging)
|
||||
4-5 Residual capacity uint16 BE ÷100 → Ah
|
||||
6-7 Nominal capacity uint16 BE ÷100 → Ah
|
||||
0-1 Total voltage uint16 BE /100 -> V
|
||||
2-3 Current int16 BE /100 -> A (positive = charging, negative = discharging)
|
||||
4-5 Residual capacity uint16 BE /100 -> Ah
|
||||
6-7 Nominal capacity uint16 BE /100 -> Ah
|
||||
8-9 Cycle count uint16 BE
|
||||
10-11 Production date (ignored)
|
||||
12-15 Balance status (ignored)
|
||||
@@ -230,7 +255,7 @@ class BmsBluetoothHandler:
|
||||
20 MOS status uint8
|
||||
21 Cell count uint8
|
||||
22 Temp probe count uint8
|
||||
23+ Temperatures uint16 BE each (raw − 2731) ÷ 10 → °C
|
||||
23+ Temperatures uint16 BE each (raw - 2731) / 10 -> C
|
||||
"""
|
||||
p = frame[_HEADER_LEN:-_TRAILER_LEN]
|
||||
|
||||
@@ -284,11 +309,11 @@ class BmsBluetoothHandler:
|
||||
def parse_cell_info(frame: bytes) -> dict:
|
||||
"""Parse a 0x04 cell voltage response frame.
|
||||
|
||||
Per spec: frame[3] (the header length byte) = cell_count × 2.
|
||||
Per spec: frame[3] (the header length byte) = cell_count x 2.
|
||||
The payload contains ONLY the voltage bytes — no count byte.
|
||||
0+ Cell voltages uint16 BE each unit mV ÷1000 → V
|
||||
0+ Cell voltages uint16 BE each unit mV /1000 -> V
|
||||
"""
|
||||
count = frame[3] // 2 # header length byte = N_cells × 2
|
||||
count = frame[3] // 2 # header length byte = N_cells x 2
|
||||
p = frame[_HEADER_LEN:-_TRAILER_LEN]
|
||||
voltages: list[float] = []
|
||||
for i in range(count):
|
||||
|
||||
Reference in New Issue
Block a user