From b9b8662bad161b49b6da2b883807e1c53d2ca789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phan=20Sainl=C3=A9ger?= Date: Mon, 15 Jun 2026 16:10:09 +0200 Subject: [PATCH] [IMP] maintenance_service_http_monitoring: add double check on HTTP errors to reduce "noise" from transient HTTP errors --- maintenance_service_http_monitoring/README.md | 3 + .../models/service_instance.py | 47 ++++++-- .../tests/test_http_monitoring.py | 102 ++++++++++++++---- 3 files changed, 121 insertions(+), 31 deletions(-) diff --git a/maintenance_service_http_monitoring/README.md b/maintenance_service_http_monitoring/README.md index 63afdf4..603b247 100644 --- a/maintenance_service_http_monitoring/README.md +++ b/maintenance_service_http_monitoring/README.md @@ -102,6 +102,9 @@ When a service fails HTTP checks: - The request is linked to the equipment - Only one request per equipment per day is created - The request description lists all failing services +- A **double-check** is performed before creating the request: the service is retested + after 2 seconds. A maintenance request is only created if the service fails **both** + checks, reducing noise from transient HTTP errors ## Webhook notifications diff --git a/maintenance_service_http_monitoring/models/service_instance.py b/maintenance_service_http_monitoring/models/service_instance.py index 3024801..72ecd5c 100644 --- a/maintenance_service_http_monitoring/models/service_instance.py +++ b/maintenance_service_http_monitoring/models/service_instance.py @@ -1,4 +1,5 @@ import logging +import time from odoo import api, fields, models @@ -10,6 +11,7 @@ except ImportError: _logger = logging.getLogger(__name__) HTTP_CHECK_TIMEOUT = 10 # seconds +HTTP_RETRY_DELAY = 2 # seconds between pass 1 and pass 2 class ServiceInstance(models.Model): @@ -31,11 +33,18 @@ class ServiceInstance(models.Model): ) def check_http_status(self): + """ + Perform HTTP check for each record and return the KO recordset. + + Writes last_http_status_code, last_http_check_date and http_status_ok on every + checked record. Does NOT create maintenance.request — that decision belongs to + the caller (cron) after optional retry logic. + """ + ko_records = self.browse() for rec in self: if not rec.service_url or not rec.equipment_id: continue - equipment = rec.equipment_id - if getattr(equipment, "maintenance_mode", False): + if rec.equipment_id.maintenance_mode: continue status_ok = False status_code = -1 @@ -57,20 +66,36 @@ class ServiceInstance(models.Model): } ) if not status_ok: - # Delegate maintenance.request creation to equipment - if hasattr(equipment, "create_http_maintenance_request"): - equipment.create_http_maintenance_request([rec]) + ko_records |= rec + return ko_records @api.model def cron_check_http_services(self): + """ + Check all active services with a URL, with one retry on failure. + + Pass 1: test every eligible service. + If any fail, wait HTTP_RETRY_DELAY seconds then retest only the KO ones. + maintenance.request is created only for services that fail both passes, + reducing noise from transient HTTP errors. + """ domain = [ ("active", "=", True), ("service_url", "!=", False), ("equipment_id", "!=", False), ] - services = self.search(domain) - for service in services: - equipment = service.equipment_id - if getattr(equipment, "maintenance_mode", False): - continue - service.check_http_status() + services = self.search(domain).filtered( + lambda s: not s.equipment_id.maintenance_mode + ) + + ko_after_pass1 = services.check_http_status() + + if not ko_after_pass1: + return + + time.sleep(HTTP_RETRY_DELAY) + + ko_confirmed = ko_after_pass1.check_http_status() + + for service in ko_confirmed: + service.equipment_id.create_http_maintenance_request([service]) diff --git a/maintenance_service_http_monitoring/tests/test_http_monitoring.py b/maintenance_service_http_monitoring/tests/test_http_monitoring.py index 0a9098f..2040f56 100644 --- a/maintenance_service_http_monitoring/tests/test_http_monitoring.py +++ b/maintenance_service_http_monitoring/tests/test_http_monitoring.py @@ -11,6 +11,9 @@ EQUIPMENT_HTTP_REQUESTS = ( "odoo.addons.maintenance_service_http_monitoring" ".models.maintenance_equipment.http_requests" ) +SERVICE_INSTANCE_SLEEP = ( + "odoo.addons.maintenance_service_http_monitoring.models.service_instance.time.sleep" +) def _mock_response(status_code): @@ -39,7 +42,7 @@ class TestHttpMonitoring(TransactionCase): ) # ------------------------------------------------------------------ - # Test 1 — HTTP 200 → service marked OK + # Test 1 -- HTTP 200 -> service marked OK # ------------------------------------------------------------------ def test_http_200_sets_status_ok(self): with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests: @@ -52,13 +55,16 @@ class TestHttpMonitoring(TransactionCase): self.assertIsNotNone(self.service_instance.last_http_check_date) # ------------------------------------------------------------------ - # Test 2 — HTTP 500 → service KO + maintenance.request created + # Test 2 -- Two KO passes -> maintenance.request created # ------------------------------------------------------------------ def test_http_500_creates_maintenance_request(self): - with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests: + with ( + patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP), + ): mock_requests.get.return_value = _mock_response(500) mock_requests.exceptions.RequestException = Exception - self.service_instance.check_http_status() + self.env["service.instance"].cron_check_http_services() self.assertFalse(self.service_instance.http_status_ok) self.assertEqual(self.service_instance.last_http_status_code, 500) @@ -70,7 +76,7 @@ class TestHttpMonitoring(TransactionCase): self.assertEqual(request.maintenance_type, "corrective") # ------------------------------------------------------------------ - # Test 3 — Network error → KO with code -1 + # Test 3 -- Network error -> KO with code -1 # ------------------------------------------------------------------ def test_network_error_sets_status_ko_and_minus_one(self): with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests: @@ -82,21 +88,27 @@ class TestHttpMonitoring(TransactionCase): self.assertEqual(self.service_instance.last_http_status_code, -1) # ------------------------------------------------------------------ - # Test 4 — Two consecutive failures → no duplicate maintenance.request + # Test 4 -- Two consecutive cron runs KO -> no duplicate request # ------------------------------------------------------------------ def test_no_duplicate_request_on_repeated_failure(self): - with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests: + with ( + patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP), + ): mock_requests.get.return_value = _mock_response(500) mock_requests.exceptions.RequestException = Exception - self.service_instance.check_http_status() + self.env["service.instance"].cron_check_http_services() request_1 = self.equipment.http_maintenance_request self.assertTrue(request_1) - with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests: + with ( + patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP), + ): mock_requests.get.return_value = _mock_response(500) mock_requests.exceptions.RequestException = Exception - self.service_instance.check_http_status() + self.env["service.instance"].cron_check_http_services() self.assertEqual(self.equipment.http_maintenance_request, request_1) self.assertEqual( @@ -107,7 +119,7 @@ class TestHttpMonitoring(TransactionCase): ) # ------------------------------------------------------------------ - # Test 5 — Equipment in maintenance mode → cron skips it + # Test 5 -- Equipment in maintenance mode -> cron skips it # ------------------------------------------------------------------ def test_maintenance_mode_skips_http_check(self): self.equipment.write({"maintenance_mode": True}) @@ -118,11 +130,10 @@ class TestHttpMonitoring(TransactionCase): self.env["service.instance"].cron_check_http_services() mock_requests.get.assert_not_called() - # last_http_check_date must remain unset (never checked) self.assertFalse(self.service_instance.last_http_check_date) # ------------------------------------------------------------------ - # Test 6 — Expired maintenance mode → cron deactivates it + # Test 6 -- Expired maintenance mode -> cron deactivates it # ------------------------------------------------------------------ def test_maintenance_mode_auto_expiry(self): past = fields.Datetime.now() - timedelta(hours=1) @@ -142,7 +153,7 @@ class TestHttpMonitoring(TransactionCase): self.assertFalse(self.equipment.maintenance_mode_end) # ------------------------------------------------------------------ - # Test 7 — Service without URL → ignored by cron + # Test 7 -- Service without URL -> ignored by cron # ------------------------------------------------------------------ def test_service_without_url_is_ignored(self): self.service_instance.write({"service_url": False}) @@ -156,7 +167,7 @@ class TestHttpMonitoring(TransactionCase): self.assertFalse(self.service_instance.last_http_check_date) # ------------------------------------------------------------------ - # Test 8 — HTTP 404 (non-exception) → KO with correct code + # Test 8 -- HTTP 404 (non-exception) -> KO with correct code # ------------------------------------------------------------------ def test_http_non_200_non_exception(self): with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests: @@ -168,7 +179,7 @@ class TestHttpMonitoring(TransactionCase): self.assertEqual(self.service_instance.last_http_status_code, 404) # ------------------------------------------------------------------ - # Test 9 — Webhook called when a new maintenance.request is created + # Test 9 -- Webhook called when a new maintenance.request is created # ------------------------------------------------------------------ def test_webhook_called_on_new_request(self): self.env["ir.config_parameter"].sudo().set_param( @@ -177,11 +188,12 @@ class TestHttpMonitoring(TransactionCase): ) with ( patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP), patch(EQUIPMENT_HTTP_REQUESTS) as mock_http, ): mock_requests.get.return_value = _mock_response(500) mock_requests.exceptions.RequestException = Exception - self.service_instance.check_http_status() + self.env["service.instance"].cron_check_http_services() mock_http.post.assert_called_once() call_kwargs = mock_http.post.call_args @@ -189,7 +201,7 @@ class TestHttpMonitoring(TransactionCase): self.assertEqual(payload["equipment"], self.equipment.name) # ------------------------------------------------------------------ - # Test 10 — Webhook skipped when no URL configured + # Test 10 -- Webhook skipped when no URL configured # ------------------------------------------------------------------ def test_webhook_skipped_when_no_url(self): self.env["ir.config_parameter"].sudo().set_param( @@ -197,16 +209,17 @@ class TestHttpMonitoring(TransactionCase): ) with ( patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP), patch(EQUIPMENT_HTTP_REQUESTS) as mock_http, ): mock_requests.get.return_value = _mock_response(500) mock_requests.exceptions.RequestException = Exception - self.service_instance.check_http_status() + self.env["service.instance"].cron_check_http_services() mock_http.post.assert_not_called() # ------------------------------------------------------------------ - # Test 11 — Service without equipment → check_http_status skips it + # Test 11 -- Service without equipment -> check_http_status skips it # ------------------------------------------------------------------ def test_service_without_equipment_is_ignored(self): self.service_instance.write({"equipment_id": False}) @@ -218,3 +231,52 @@ class TestHttpMonitoring(TransactionCase): mock_requests.get.assert_not_called() self.assertFalse(self.service_instance.last_http_check_date) + + # ------------------------------------------------------------------ + # Test 12 -- Transient failure (KO pass 1, OK pass 2) -> no request created + # ------------------------------------------------------------------ + def test_transient_failure_no_request_created(self): + with ( + patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP), + ): + mock_requests.get.side_effect = [ + _mock_response(500), # pass 1: KO + _mock_response(200), # pass 2 (retry): OK + ] + mock_requests.exceptions.RequestException = Exception + self.env["service.instance"].cron_check_http_services() + + # Final status reflects pass 2 result + self.assertTrue(self.service_instance.http_status_ok) + self.assertEqual(self.service_instance.last_http_status_code, 200) + # No maintenance.request must have been created + self.assertFalse(self.equipment.http_maintenance_request) + self.assertEqual( + self.env["maintenance.request"].search_count( + [("equipment_id", "=", self.equipment.id)] + ), + 0, + ) + + # ------------------------------------------------------------------ + # Test 13 -- Confirmed failure (KO pass 1 and 2) -> request created + # ------------------------------------------------------------------ + def test_confirmed_failure_creates_request(self): + with ( + patch(SERVICE_INSTANCE_REQUESTS) as mock_requests, + patch(SERVICE_INSTANCE_SLEEP) as mock_sleep, + ): + mock_requests.get.return_value = _mock_response(503) + mock_requests.exceptions.RequestException = Exception + self.env["service.instance"].cron_check_http_services() + + # sleep must have been called between the two passes + mock_sleep.assert_called_once_with(2) + # requests.get must have been called twice (pass 1 + pass 2) + self.assertEqual(mock_requests.get.call_count, 2) + # Final status is KO + self.assertFalse(self.service_instance.http_status_ok) + self.assertEqual(self.service_instance.last_http_status_code, 503) + # maintenance.request created + self.assertTrue(self.equipment.http_maintenance_request)