[IMP] maintenance_service_http_monitoring: add double check on HTTP errors

to reduce "noise" from transient HTTP errors
This commit is contained in:
Stéphan Sainléger
2026-06-15 16:10:09 +02:00
parent cb3ed485b8
commit b9b8662bad
3 changed files with 121 additions and 31 deletions

View File

@@ -102,6 +102,9 @@ When a service fails HTTP checks:
- The request is linked to the equipment
- Only one request per equipment per day is created
- The request description lists all failing services
- A **double-check** is performed before creating the request: the service is retested
after 2 seconds. A maintenance request is only created if the service fails **both**
checks, reducing noise from transient HTTP errors
## Webhook notifications

View File

@@ -1,4 +1,5 @@
import logging
import time
from odoo import api, fields, models
@@ -10,6 +11,7 @@ except ImportError:
_logger = logging.getLogger(__name__)
HTTP_CHECK_TIMEOUT = 10 # seconds
HTTP_RETRY_DELAY = 2 # seconds between pass 1 and pass 2
class ServiceInstance(models.Model):
@@ -31,11 +33,18 @@ class ServiceInstance(models.Model):
)
def check_http_status(self):
"""
Perform HTTP check for each record and return the KO recordset.
Writes last_http_status_code, last_http_check_date and http_status_ok on every
checked record. Does NOT create maintenance.request — that decision belongs to
the caller (cron) after optional retry logic.
"""
ko_records = self.browse()
for rec in self:
if not rec.service_url or not rec.equipment_id:
continue
equipment = rec.equipment_id
if getattr(equipment, "maintenance_mode", False):
if rec.equipment_id.maintenance_mode:
continue
status_ok = False
status_code = -1
@@ -57,20 +66,36 @@ class ServiceInstance(models.Model):
}
)
if not status_ok:
# Delegate maintenance.request creation to equipment
if hasattr(equipment, "create_http_maintenance_request"):
equipment.create_http_maintenance_request([rec])
ko_records |= rec
return ko_records
@api.model
def cron_check_http_services(self):
"""
Check all active services with a URL, with one retry on failure.
Pass 1: test every eligible service.
If any fail, wait HTTP_RETRY_DELAY seconds then retest only the KO ones.
maintenance.request is created only for services that fail both passes,
reducing noise from transient HTTP errors.
"""
domain = [
("active", "=", True),
("service_url", "!=", False),
("equipment_id", "!=", False),
]
services = self.search(domain)
for service in services:
equipment = service.equipment_id
if getattr(equipment, "maintenance_mode", False):
continue
service.check_http_status()
services = self.search(domain).filtered(
lambda s: not s.equipment_id.maintenance_mode
)
ko_after_pass1 = services.check_http_status()
if not ko_after_pass1:
return
time.sleep(HTTP_RETRY_DELAY)
ko_confirmed = ko_after_pass1.check_http_status()
for service in ko_confirmed:
service.equipment_id.create_http_maintenance_request([service])

View File

@@ -11,6 +11,9 @@ EQUIPMENT_HTTP_REQUESTS = (
"odoo.addons.maintenance_service_http_monitoring"
".models.maintenance_equipment.http_requests"
)
SERVICE_INSTANCE_SLEEP = (
"odoo.addons.maintenance_service_http_monitoring.models.service_instance.time.sleep"
)
def _mock_response(status_code):
@@ -39,7 +42,7 @@ class TestHttpMonitoring(TransactionCase):
)
# ------------------------------------------------------------------
# Test 1 HTTP 200 service marked OK
# Test 1 -- HTTP 200 -> service marked OK
# ------------------------------------------------------------------
def test_http_200_sets_status_ok(self):
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
@@ -52,13 +55,16 @@ class TestHttpMonitoring(TransactionCase):
self.assertIsNotNone(self.service_instance.last_http_check_date)
# ------------------------------------------------------------------
# Test 2 — HTTP 500 → service KO + maintenance.request created
# Test 2 -- Two KO passes -> maintenance.request created
# ------------------------------------------------------------------
def test_http_500_creates_maintenance_request(self):
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP),
):
mock_requests.get.return_value = _mock_response(500)
mock_requests.exceptions.RequestException = Exception
self.service_instance.check_http_status()
self.env["service.instance"].cron_check_http_services()
self.assertFalse(self.service_instance.http_status_ok)
self.assertEqual(self.service_instance.last_http_status_code, 500)
@@ -70,7 +76,7 @@ class TestHttpMonitoring(TransactionCase):
self.assertEqual(request.maintenance_type, "corrective")
# ------------------------------------------------------------------
# Test 3 Network error KO with code -1
# Test 3 -- Network error -> KO with code -1
# ------------------------------------------------------------------
def test_network_error_sets_status_ko_and_minus_one(self):
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
@@ -82,21 +88,27 @@ class TestHttpMonitoring(TransactionCase):
self.assertEqual(self.service_instance.last_http_status_code, -1)
# ------------------------------------------------------------------
# Test 4 Two consecutive failures → no duplicate maintenance.request
# Test 4 -- Two consecutive cron runs KO -> no duplicate request
# ------------------------------------------------------------------
def test_no_duplicate_request_on_repeated_failure(self):
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP),
):
mock_requests.get.return_value = _mock_response(500)
mock_requests.exceptions.RequestException = Exception
self.service_instance.check_http_status()
self.env["service.instance"].cron_check_http_services()
request_1 = self.equipment.http_maintenance_request
self.assertTrue(request_1)
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP),
):
mock_requests.get.return_value = _mock_response(500)
mock_requests.exceptions.RequestException = Exception
self.service_instance.check_http_status()
self.env["service.instance"].cron_check_http_services()
self.assertEqual(self.equipment.http_maintenance_request, request_1)
self.assertEqual(
@@ -107,7 +119,7 @@ class TestHttpMonitoring(TransactionCase):
)
# ------------------------------------------------------------------
# Test 5 Equipment in maintenance mode cron skips it
# Test 5 -- Equipment in maintenance mode -> cron skips it
# ------------------------------------------------------------------
def test_maintenance_mode_skips_http_check(self):
self.equipment.write({"maintenance_mode": True})
@@ -118,11 +130,10 @@ class TestHttpMonitoring(TransactionCase):
self.env["service.instance"].cron_check_http_services()
mock_requests.get.assert_not_called()
# last_http_check_date must remain unset (never checked)
self.assertFalse(self.service_instance.last_http_check_date)
# ------------------------------------------------------------------
# Test 6 Expired maintenance mode cron deactivates it
# Test 6 -- Expired maintenance mode -> cron deactivates it
# ------------------------------------------------------------------
def test_maintenance_mode_auto_expiry(self):
past = fields.Datetime.now() - timedelta(hours=1)
@@ -142,7 +153,7 @@ class TestHttpMonitoring(TransactionCase):
self.assertFalse(self.equipment.maintenance_mode_end)
# ------------------------------------------------------------------
# Test 7 Service without URL ignored by cron
# Test 7 -- Service without URL -> ignored by cron
# ------------------------------------------------------------------
def test_service_without_url_is_ignored(self):
self.service_instance.write({"service_url": False})
@@ -156,7 +167,7 @@ class TestHttpMonitoring(TransactionCase):
self.assertFalse(self.service_instance.last_http_check_date)
# ------------------------------------------------------------------
# Test 8 HTTP 404 (non-exception) KO with correct code
# Test 8 -- HTTP 404 (non-exception) -> KO with correct code
# ------------------------------------------------------------------
def test_http_non_200_non_exception(self):
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
@@ -168,7 +179,7 @@ class TestHttpMonitoring(TransactionCase):
self.assertEqual(self.service_instance.last_http_status_code, 404)
# ------------------------------------------------------------------
# Test 9 Webhook called when a new maintenance.request is created
# Test 9 -- Webhook called when a new maintenance.request is created
# ------------------------------------------------------------------
def test_webhook_called_on_new_request(self):
self.env["ir.config_parameter"].sudo().set_param(
@@ -177,11 +188,12 @@ class TestHttpMonitoring(TransactionCase):
)
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP),
patch(EQUIPMENT_HTTP_REQUESTS) as mock_http,
):
mock_requests.get.return_value = _mock_response(500)
mock_requests.exceptions.RequestException = Exception
self.service_instance.check_http_status()
self.env["service.instance"].cron_check_http_services()
mock_http.post.assert_called_once()
call_kwargs = mock_http.post.call_args
@@ -189,7 +201,7 @@ class TestHttpMonitoring(TransactionCase):
self.assertEqual(payload["equipment"], self.equipment.name)
# ------------------------------------------------------------------
# Test 10 Webhook skipped when no URL configured
# Test 10 -- Webhook skipped when no URL configured
# ------------------------------------------------------------------
def test_webhook_skipped_when_no_url(self):
self.env["ir.config_parameter"].sudo().set_param(
@@ -197,16 +209,17 @@ class TestHttpMonitoring(TransactionCase):
)
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP),
patch(EQUIPMENT_HTTP_REQUESTS) as mock_http,
):
mock_requests.get.return_value = _mock_response(500)
mock_requests.exceptions.RequestException = Exception
self.service_instance.check_http_status()
self.env["service.instance"].cron_check_http_services()
mock_http.post.assert_not_called()
# ------------------------------------------------------------------
# Test 11 Service without equipment check_http_status skips it
# Test 11 -- Service without equipment -> check_http_status skips it
# ------------------------------------------------------------------
def test_service_without_equipment_is_ignored(self):
self.service_instance.write({"equipment_id": False})
@@ -218,3 +231,52 @@ class TestHttpMonitoring(TransactionCase):
mock_requests.get.assert_not_called()
self.assertFalse(self.service_instance.last_http_check_date)
# ------------------------------------------------------------------
# Test 12 -- Transient failure (KO pass 1, OK pass 2) -> no request created
# ------------------------------------------------------------------
def test_transient_failure_no_request_created(self):
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP),
):
mock_requests.get.side_effect = [
_mock_response(500), # pass 1: KO
_mock_response(200), # pass 2 (retry): OK
]
mock_requests.exceptions.RequestException = Exception
self.env["service.instance"].cron_check_http_services()
# Final status reflects pass 2 result
self.assertTrue(self.service_instance.http_status_ok)
self.assertEqual(self.service_instance.last_http_status_code, 200)
# No maintenance.request must have been created
self.assertFalse(self.equipment.http_maintenance_request)
self.assertEqual(
self.env["maintenance.request"].search_count(
[("equipment_id", "=", self.equipment.id)]
),
0,
)
# ------------------------------------------------------------------
# Test 13 -- Confirmed failure (KO pass 1 and 2) -> request created
# ------------------------------------------------------------------
def test_confirmed_failure_creates_request(self):
with (
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
patch(SERVICE_INSTANCE_SLEEP) as mock_sleep,
):
mock_requests.get.return_value = _mock_response(503)
mock_requests.exceptions.RequestException = Exception
self.env["service.instance"].cron_check_http_services()
# sleep must have been called between the two passes
mock_sleep.assert_called_once_with(2)
# requests.get must have been called twice (pass 1 + pass 2)
self.assertEqual(mock_requests.get.call_count, 2)
# Final status is KO
self.assertFalse(self.service_instance.http_status_ok)
self.assertEqual(self.service_instance.last_http_status_code, 503)
# maintenance.request created
self.assertTrue(self.equipment.http_maintenance_request)