Branch data Line data Source code
1 : : #include "ConnectionHealthMonitor.h"
2 : :
3 : : #include <QLoggingCategory>
4 : : #include <QNetworkInformation>
5 : : #include <QDateTime>
6 : :
7 : : #include "service/ImapService.h"
8 : :
9 : : #ifdef QT_GUI_LIB
10 : : #include <QGuiApplication>
11 : : #endif
12 : :
13 [ + + + - : 552 : Q_LOGGING_CATEGORY(lcHealth, "mailjd.health")
+ - - - ]
14 : :
15 : 111 : ConnectionHealthMonitor::ConnectionHealthMonitor(bool systemWatchEnabled,
16 : 111 : QObject *parent)
17 [ + - + - : 111 : : QObject(parent), m_systemWatchEnabled(systemWatchEnabled) {
+ - ]
18 [ + - ]: 111 : m_probeTimer.setSingleShot(true);
19 [ + - ]: 111 : m_probeTimer.setInterval(PROBE_DEFAULT_MS);
20 : 111 : connect(&m_probeTimer, &QTimer::timeout, this,
21 [ + - ]: 111 : &ConnectionHealthMonitor::onProbeTimer);
22 : :
23 [ + - ]: 111 : m_reconnectTimer.setSingleShot(true);
24 : 111 : connect(&m_reconnectTimer, &QTimer::timeout, this,
25 [ + - ]: 111 : &ConnectionHealthMonitor::onReconnect);
26 : :
27 : : // System-wide hooks (suspend + network change) are owned by a single
28 : : // "primary" monitor only — otherwise one OS event would fan out into N
29 : : // simultaneous forceReconnect() calls (one per secondary connection).
30 [ + + ]: 111 : if (!m_systemWatchEnabled) {
31 [ + - + - : 82 : qCInfo(lcHealth) << "Secondary monitor — system-watch hooks disabled";
+ - + + ]
32 : 41 : return;
33 : : }
34 : :
35 : : // T-72.7: Suspend/resume hook. applicationStateChanged is emitted by
36 : : // the platform plugin (Windows, macOS, mobile). On Linux desktops it is
37 : : // unreliable — Inactive simply means "window lost focus", not suspend —
38 : : // so we do not wire it there at all. The timer-skew fallback in
39 : : // onProbeTimer() is the resume detector on Linux.
40 : : #if defined(QT_GUI_LIB) && !defined(Q_OS_LINUX)
41 : : if (auto *app = qobject_cast<QGuiApplication *>(QCoreApplication::instance())) {
42 : : connect(app, &QGuiApplication::applicationStateChanged, this,
43 : : [this](Qt::ApplicationState s) { handleApplicationState(s); });
44 : : m_lastAppState = app->applicationState();
45 : : }
46 : : #endif
47 : :
48 [ + - ]: 70 : setupNetworkHooks();
49 : 0 : }
50 : :
51 : 84 : ConnectionHealthMonitor::~ConnectionHealthMonitor() {
52 : 49 : stopAllTimers();
53 : 49 : detach();
54 : 84 : }
55 : :
56 : 70 : void ConnectionHealthMonitor::setupNetworkHooks() {
57 : : // T-72.7: QNetworkInformation is part of Qt Network (already linked as
58 : : // Qt6::Network per the sprint plan). Try the default backend; if none
59 : : // is available (minimal container), skip gracefully — probes + timer
60 : : // skew still detect dead sockets.
61 [ + - - + ]: 70 : if (QNetworkInformation::availableBackends().isEmpty()) {
62 [ # # # # : 0 : qCInfo(lcHealth) << "No QNetworkInformation backend — relying on probes"
# # # # ]
63 [ # # ]: 0 : << "and timer-skew fallback for resume detection";
64 : 0 : return;
65 : : }
66 [ - + ]: 70 : if (!QNetworkInformation::loadDefaultBackend()) {
67 [ # # # # : 0 : qCWarning(lcHealth) << "QNetworkInformation::loadDefaultBackend() failed";
# # # # ]
68 : 0 : return;
69 : : }
70 : 70 : m_netInfo = QNetworkInformation::instance();
71 [ - + ]: 70 : if (!m_netInfo) {
72 [ # # # # : 0 : qCWarning(lcHealth) << "QNetworkInformation::instance() returned null";
# # # # ]
73 : 0 : return;
74 : : }
75 : 70 : m_lastReachability = m_netInfo->reachability();
76 : 70 : m_lastTransportMedium = m_netInfo->transportMedium();
77 : 70 : connect(m_netInfo, &QNetworkInformation::reachabilityChanged, this,
78 [ + - ]: 70 : [this](QNetworkInformation::Reachability r) {
79 : 0 : const auto previous = m_lastReachability;
80 : 0 : m_lastReachability = r;
81 [ # # ]: 0 : if (!m_active)
82 : 0 : return;
83 : : // Only Disconnected/Unknown → reachable is actionable. Some Qt
84 : : // backends emit initial or duplicate reachable signals shortly
85 : : // after startup; those must not tear down a healthy IMAP session.
86 [ # # ]: 0 : if (isReachabilityReconnectTransition(previous, r)) {
87 [ # # # # : 0 : qCInfo(lcHealth) << "Network reachability:" << previous << "→"
# # # # #
# # # ]
88 [ # # # # ]: 0 : << r << "— forcing reconnect";
89 [ # # ]: 0 : forceReconnect(QStringLiteral("network changed (reachable)"));
90 : : }
91 : : });
92 : 70 : connect(m_netInfo, &QNetworkInformation::transportMediumChanged, this,
93 [ + - ]: 70 : [this](QNetworkInformation::TransportMedium m) {
94 : 0 : const auto previous = m_lastTransportMedium;
95 : 0 : m_lastTransportMedium = m;
96 [ # # ]: 0 : if (!m_active)
97 : 0 : return;
98 : : // Wi-Fi ↔ cellular/carrier changes almost always mean a NAT reset.
99 : : // Ignore Unknown and initial backend-settling signals.
100 [ # # ]: 0 : if (isTransportReconnectTransition(previous, m)) {
101 [ # # # # : 0 : qCInfo(lcHealth) << "Transport medium changed (" << previous
# # # # #
# ]
102 [ # # # # : 0 : << "→" << m << ") — forcing reconnect";
# # ]
103 [ # # ]: 0 : forceReconnect(QStringLiteral("network changed (transport)"));
104 : : }
105 : : });
106 [ + - + - : 140 : qCInfo(lcHealth) << "Network-change hooks active via"
+ - + + ]
107 [ + - + - ]: 70 : << m_netInfo->backendName();
108 : : }
109 : :
110 : 109 : void ConnectionHealthMonitor::attach(ImapService *imap) {
111 [ - + ]: 109 : if (m_imap == imap)
112 : 0 : return;
113 : 109 : detach();
114 : 109 : m_imap = imap;
115 [ - + ]: 109 : if (!m_imap)
116 : 0 : return;
117 : : // Wire through a Qt::QueuedConnection-friendly int slot so the monitor
118 : : // does not depend on the ImapService enum meta-type at connect time.
119 : 109 : connect(m_imap, &ImapService::stateChanged, this,
120 [ + - ]: 597 : [this](ImapService::State s) { onStateChanged(static_cast<int>(s)); });
121 : 109 : connect(m_imap, &ImapService::errorOccurred, this,
122 [ + - ]: 218 : &ConnectionHealthMonitor::onErrorOccurred);
123 : : }
124 : :
125 : 197 : void ConnectionHealthMonitor::detach() {
126 [ + + ]: 197 : if (m_imap)
127 : 47 : QObject::disconnect(m_imap, nullptr, this, nullptr);
128 : 197 : m_imap = nullptr;
129 : 197 : stopAllTimers();
130 : 197 : }
131 : :
132 : 207 : void ConnectionHealthMonitor::setReconnectConfig(const ImapConfig &cfg) {
133 : 207 : m_config = cfg;
134 : 207 : }
135 : :
136 : 258 : void ConnectionHealthMonitor::setActive(bool active) {
137 [ + + ]: 258 : if (m_active == active)
138 : 120 : return;
139 : 138 : m_active = active;
140 [ + - + - : 276 : qCInfo(lcHealth) << (active ? "Monitor activated" : "Monitor deactivated");
+ + + - +
+ ]
141 [ + + ]: 138 : if (!active) {
142 : 34 : stopAllTimers();
143 : 34 : m_hadOutage = false;
144 [ + - ]: 104 : } else if (m_imap) {
145 : : // Re-arm the probe timer if currently in a probeable state.
146 : 104 : ImapService::State s = m_imap->state();
147 [ + + + - ]: 104 : if (s == ImapService::State::Authenticated ||
148 [ - + ]: 103 : s == ImapService::State::Selected ||
149 : : s == ImapService::State::Idling) {
150 : 1 : m_probeTimer.start();
151 : : }
152 : : }
153 : : }
154 : :
155 : 1 : void ConnectionHealthMonitor::setProbeInterval(int ms) {
156 : 1 : m_probeTimer.setInterval(ms);
157 : 1 : }
158 : :
159 : 2 : void ConnectionHealthMonitor::forceReconnect(const QString &reason) {
160 [ - + ]: 2 : if (!m_imap) {
161 [ # # # # : 0 : qCWarning(lcHealth) << "forceReconnect requested but no ImapService"
# # # # ]
162 [ # # # # ]: 0 : << "attached — reason:" << reason;
163 : 0 : return;
164 : : }
165 [ + - + - : 4 : qCInfo(lcHealth) << "forceReconnect:" << reason;
+ - + - +
+ ]
166 : : // Reset backoff so resume yields near-immediate recovery rather than
167 : : // waiting for a probe timeout. Set the outage flag so the next
168 : : // Authenticated emits connectionRestored().
169 : 2 : m_reconnectAttempts = 0;
170 : 2 : m_reconnectTimer.stop();
171 : 2 : m_hadOutage = true;
172 [ + - ]: 2 : if (m_active)
173 : 2 : m_imap->abortForReconnect(reason);
174 : : }
175 : :
176 : 488 : void ConnectionHealthMonitor::onStateChanged(int newStateInt) {
177 [ - + ]: 488 : if (!m_imap)
178 : 0 : return;
179 : 488 : auto newState = static_cast<ImapService::State>(newStateInt);
180 : :
181 [ + + + + ]: 488 : switch (newState) {
182 : 101 : case ImapService::State::Authenticated:
183 : : case ImapService::State::Selected:
184 : : // (Re)start the probe timer — these are the states we probe.
185 [ + + ]: 101 : if (m_active) {
186 : 99 : m_reconnectAttempts = 0;
187 : 99 : m_reconnectTimer.stop();
188 : 99 : m_probeTimer.start(m_probeTimer.interval());
189 [ + + ]: 99 : if (m_hadOutage) {
190 [ + - + - : 4 : qCInfo(lcHealth) << "Connection restored after outage";
+ - + + ]
191 : 2 : m_hadOutage = false;
192 : 2 : emit connectionRestored();
193 : : }
194 : : }
195 : 101 : break;
196 : 83 : case ImapService::State::Idling:
197 : : // Still healthy — keep probing (probe will issue IDLE DONE/OK).
198 [ + - ]: 83 : if (m_active)
199 : 83 : m_probeTimer.start(m_probeTimer.interval());
200 : 83 : break;
201 : 133 : case ImapService::State::Error:
202 : : case ImapService::State::Disconnected:
203 : 133 : m_probeTimer.stop();
204 : 133 : scheduleReconnect();
205 : 133 : break;
206 : 171 : default:
207 : : // Connecting/Connected/Greeting/Capability/StartingTLS/Authenticating:
208 : : // no probe needed, no reconnect scheduled.
209 : 171 : break;
210 : : }
211 : : }
212 : :
213 : 122 : void ConnectionHealthMonitor::onErrorOccurred(const QString &error) {
214 [ + - + - : 244 : qCWarning(lcHealth) << "IMAP error:" << error;
+ - + - +
+ ]
215 : : // Idempotent — if stateChanged(Error) already armed the timer, this is
216 : : // a no-op. We DO NOT reset backoff here; only Authenticated does that.
217 : 122 : scheduleReconnect();
218 : 122 : }
219 : :
220 : 45 : void ConnectionHealthMonitor::onProbeTimer() {
221 [ + - - + ]: 45 : if (!m_imap || !m_active)
222 : 0 : return;
223 : :
224 : : // T-72.7: timer-skew fallback. applicationStateChanged is unreliable
225 : : // on Linux desktops; the kernel stops dispatching timers during
226 : : // suspend, so the probe fires very late when the laptop wakes up.
227 : 45 : const auto now = QDateTime::currentMSecsSinceEpoch();
228 [ + + ]: 45 : if (m_lastProbeMsecsSinceEpoch > 0) {
229 : 38 : const qint64 elapsed = now - m_lastProbeMsecsSinceEpoch;
230 [ - + ]: 38 : if (detectTimerSkew(elapsed, m_probeTimer.interval())) {
231 [ # # # # : 0 : qCInfo(lcHealth) << "Probe fired" << elapsed / 1000
# # # # #
# ]
232 [ # # ]: 0 : << "s late — treating as resume";
233 : 0 : m_lastProbeMsecsSinceEpoch = now;
234 [ # # ]: 0 : forceReconnect(QStringLiteral("timer skew — likely resumed"));
235 : 0 : return;
236 : : }
237 : : }
238 : 45 : m_lastProbeMsecsSinceEpoch = now;
239 : :
240 : : // Ask ImapService for a protocol-aware probe (NOOP or IDLE DONE/OK).
241 : : // The service's own 15 s watchdog is the second half of the detection
242 : : // budget; if it fires, failConnection() transitions to Error and our
243 : : // stateChanged arm schedules the reconnect.
244 [ + - ]: 90 : m_imap->requestLivenessProbe(QStringLiteral("periodic health check"));
245 : :
246 : : // Restart for the next cycle (only meaningful in probeable states).
247 : 45 : ImapService::State s = m_imap->state();
248 [ + + + + ]: 45 : if (s == ImapService::State::Authenticated ||
249 [ + - ]: 6 : s == ImapService::State::Selected ||
250 : : s == ImapService::State::Idling) {
251 : 45 : m_probeTimer.start(m_probeTimer.interval());
252 : : }
253 : : }
254 : :
255 : 33 : void ConnectionHealthMonitor::onReconnect() {
256 [ + - - + ]: 33 : if (!m_active || !m_imap) {
257 [ # # # # : 0 : qCDebug(lcHealth) << "onReconnect: inactive or no ImapService — skip";
# # # # ]
258 : 0 : return;
259 : : }
260 [ - + ]: 33 : if (m_config.host.isEmpty()) {
261 [ # # # # : 0 : qCWarning(lcHealth) << "onReconnect: empty config — skip";
# # # # ]
262 : 0 : return;
263 : : }
264 [ + - + - : 66 : qCInfo(lcHealth) << "Reconnecting (attempt" << (m_reconnectAttempts + 1)
+ - + - +
+ ]
265 [ + - ]: 33 : << ")…";
266 [ + - ]: 33 : emit statusMessage(QStringLiteral("Reconnecting…"));
267 : : // ImapService::connectToServer() validates that state is Disconnected
268 : : // or Error before connecting. abortForReconnect()/failConnection() has
269 : : // already transitioned us there.
270 : 33 : m_imap->connectToServer(m_config);
271 : : // Increment AFTER the attempt so the first delay is the base 5 s,
272 : : // not 10 s (the off-by-one that SettingsSyncService had).
273 : 33 : ++m_reconnectAttempts;
274 : : }
275 : :
276 : 255 : void ConnectionHealthMonitor::scheduleReconnect() {
277 [ + + ]: 255 : if (!m_active) {
278 [ + - + - : 14 : qCDebug(lcHealth) << "scheduleReconnect: inactive — skip";
+ - + + ]
279 : 7 : return;
280 : : }
281 [ + + ]: 248 : if (m_config.host.isEmpty()) {
282 [ + - + - : 60 : qCDebug(lcHealth) << "scheduleReconnect: empty config — skip";
+ - + + ]
283 : 30 : return;
284 : : }
285 : : // Idempotent: a second error while the timer is already armed does
286 : : // NOT shorten or restart the pending reconnect. This avoids the
287 : : // "staircase" of overlapping single-shots the previous implementations
288 : : // risked.
289 [ + + ]: 218 : if (m_reconnectTimer.isActive())
290 : 112 : return;
291 : :
292 : 106 : m_hadOutage = true;
293 : 106 : const int delay = backoffDelaySeconds();
294 [ + - + - : 212 : qCInfo(lcHealth) << "Scheduling reconnect in" << delay
+ - + - +
+ ]
295 [ + - + - : 106 : << "s (attempt" << (m_reconnectAttempts + 1) << ")";
+ - ]
296 [ + - + - ]: 212 : emit statusMessage(QStringLiteral("Reconnect in %1s…").arg(delay));
297 : 106 : emit reconnectScheduled(delay);
298 : 106 : m_reconnectTimer.start(delay * 1000);
299 : : }
300 : :
301 : 117 : int ConnectionHealthMonitor::backoffDelaySeconds() const {
302 : : // Corrected from SettingsSyncService: NO pre-increment, so the first
303 : : // delay is the base 5 s. delay = 5 * 2^min(attemptIndex, 20) capped at
304 : : // 300 s → 5, 10, 20, 40, 80, 160, 300, 300, …
305 : : // Bug-24/T-405: cap the shift to 20 to prevent integer overflow UB
306 : : // when m_reconnectAttempts >= 31.
307 : 117 : const int capped = std::min(m_reconnectAttempts, 20);
308 : 117 : return std::min(5 * (1 << capped), MAX_RECONNECT_DELAY);
309 : : }
310 : :
311 : 280 : void ConnectionHealthMonitor::stopAllTimers() {
312 : 280 : m_probeTimer.stop();
313 : 280 : m_reconnectTimer.stop();
314 : 280 : }
315 : :
316 : 5 : void ConnectionHealthMonitor::handleApplicationState(
317 : : Qt::ApplicationState current) {
318 [ + + ]: 5 : if (!m_active)
319 : 1 : return;
320 : : // Suspended → Active = "the system just woke up". A network roaming
321 : : // event almost certainly accompanied the resume, so force a reconnect
322 : : // instead of waiting for a probe timeout.
323 : : //
324 : : // Sprint 72 post-review fix: ApplicationInactive must NOT be treated as
325 : : // "suspended". On Linux desktops (XCB/Wayland) and even on Windows/macOS
326 : : // when another window takes focus, Inactive simply means "not the active
327 : : // window". Treating it as resume tore down healthy IMAP connections on
328 : : // every focus change. Only the genuine Suspended state (mobile
329 : : // backgrounding / system sleep) is actionable.
330 : 4 : const bool wasSuspended = m_lastAppState == Qt::ApplicationSuspended;
331 : 4 : const bool nowActive = current == Qt::ApplicationActive;
332 [ + + + - ]: 4 : if (wasSuspended && nowActive) {
333 [ + - + - : 3 : qCInfo(lcHealth) << "Application resumed (state"
+ - + + ]
334 [ + - + - : 3 : << m_lastAppState << "→" << current
+ - ]
335 [ + - ]: 1 : << ") — forcing reconnect";
336 [ + - ]: 1 : forceReconnect(QStringLiteral("resumed from suspend"));
337 : : }
338 : 4 : m_lastAppState = current;
339 : : }
|