[IMP] maintenance_service_http_monitoring: add double check on HTTP errors
to reduce "noise" from transient HTTP errors
This commit is contained in:
@@ -102,6 +102,9 @@ When a service fails HTTP checks:
|
|||||||
- The request is linked to the equipment
|
- The request is linked to the equipment
|
||||||
- Only one request per equipment per day is created
|
- Only one request per equipment per day is created
|
||||||
- The request description lists all failing services
|
- The request description lists all failing services
|
||||||
|
- A **double-check** is performed before creating the request: the service is retested
|
||||||
|
after 2 seconds. A maintenance request is only created if the service fails **both**
|
||||||
|
checks, reducing noise from transient HTTP errors
|
||||||
|
|
||||||
## Webhook notifications
|
## Webhook notifications
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
from odoo import api, fields, models
|
from odoo import api, fields, models
|
||||||
|
|
||||||
@@ -10,6 +11,7 @@ except ImportError:
|
|||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
HTTP_CHECK_TIMEOUT = 10 # seconds
|
HTTP_CHECK_TIMEOUT = 10 # seconds
|
||||||
|
HTTP_RETRY_DELAY = 2 # seconds between pass 1 and pass 2
|
||||||
|
|
||||||
|
|
||||||
class ServiceInstance(models.Model):
|
class ServiceInstance(models.Model):
|
||||||
@@ -31,11 +33,18 @@ class ServiceInstance(models.Model):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def check_http_status(self):
|
def check_http_status(self):
|
||||||
|
"""
|
||||||
|
Perform HTTP check for each record and return the KO recordset.
|
||||||
|
|
||||||
|
Writes last_http_status_code, last_http_check_date and http_status_ok on every
|
||||||
|
checked record. Does NOT create maintenance.request — that decision belongs to
|
||||||
|
the caller (cron) after optional retry logic.
|
||||||
|
"""
|
||||||
|
ko_records = self.browse()
|
||||||
for rec in self:
|
for rec in self:
|
||||||
if not rec.service_url or not rec.equipment_id:
|
if not rec.service_url or not rec.equipment_id:
|
||||||
continue
|
continue
|
||||||
equipment = rec.equipment_id
|
if rec.equipment_id.maintenance_mode:
|
||||||
if getattr(equipment, "maintenance_mode", False):
|
|
||||||
continue
|
continue
|
||||||
status_ok = False
|
status_ok = False
|
||||||
status_code = -1
|
status_code = -1
|
||||||
@@ -57,20 +66,36 @@ class ServiceInstance(models.Model):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
if not status_ok:
|
if not status_ok:
|
||||||
# Delegate maintenance.request creation to equipment
|
ko_records |= rec
|
||||||
if hasattr(equipment, "create_http_maintenance_request"):
|
return ko_records
|
||||||
equipment.create_http_maintenance_request([rec])
|
|
||||||
|
|
||||||
@api.model
|
@api.model
|
||||||
def cron_check_http_services(self):
|
def cron_check_http_services(self):
|
||||||
|
"""
|
||||||
|
Check all active services with a URL, with one retry on failure.
|
||||||
|
|
||||||
|
Pass 1: test every eligible service.
|
||||||
|
If any fail, wait HTTP_RETRY_DELAY seconds then retest only the KO ones.
|
||||||
|
maintenance.request is created only for services that fail both passes,
|
||||||
|
reducing noise from transient HTTP errors.
|
||||||
|
"""
|
||||||
domain = [
|
domain = [
|
||||||
("active", "=", True),
|
("active", "=", True),
|
||||||
("service_url", "!=", False),
|
("service_url", "!=", False),
|
||||||
("equipment_id", "!=", False),
|
("equipment_id", "!=", False),
|
||||||
]
|
]
|
||||||
services = self.search(domain)
|
services = self.search(domain).filtered(
|
||||||
for service in services:
|
lambda s: not s.equipment_id.maintenance_mode
|
||||||
equipment = service.equipment_id
|
)
|
||||||
if getattr(equipment, "maintenance_mode", False):
|
|
||||||
continue
|
ko_after_pass1 = services.check_http_status()
|
||||||
service.check_http_status()
|
|
||||||
|
if not ko_after_pass1:
|
||||||
|
return
|
||||||
|
|
||||||
|
time.sleep(HTTP_RETRY_DELAY)
|
||||||
|
|
||||||
|
ko_confirmed = ko_after_pass1.check_http_status()
|
||||||
|
|
||||||
|
for service in ko_confirmed:
|
||||||
|
service.equipment_id.create_http_maintenance_request([service])
|
||||||
|
|||||||
@@ -11,6 +11,9 @@ EQUIPMENT_HTTP_REQUESTS = (
|
|||||||
"odoo.addons.maintenance_service_http_monitoring"
|
"odoo.addons.maintenance_service_http_monitoring"
|
||||||
".models.maintenance_equipment.http_requests"
|
".models.maintenance_equipment.http_requests"
|
||||||
)
|
)
|
||||||
|
SERVICE_INSTANCE_SLEEP = (
|
||||||
|
"odoo.addons.maintenance_service_http_monitoring.models.service_instance.time.sleep"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _mock_response(status_code):
|
def _mock_response(status_code):
|
||||||
@@ -39,7 +42,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 1 — HTTP 200 → service marked OK
|
# Test 1 -- HTTP 200 -> service marked OK
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_http_200_sets_status_ok(self):
|
def test_http_200_sets_status_ok(self):
|
||||||
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
||||||
@@ -52,13 +55,16 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertIsNotNone(self.service_instance.last_http_check_date)
|
self.assertIsNotNone(self.service_instance.last_http_check_date)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 2 — HTTP 500 → service KO + maintenance.request created
|
# Test 2 -- Two KO passes -> maintenance.request created
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_http_500_creates_maintenance_request(self):
|
def test_http_500_creates_maintenance_request(self):
|
||||||
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
with (
|
||||||
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP),
|
||||||
|
):
|
||||||
mock_requests.get.return_value = _mock_response(500)
|
mock_requests.get.return_value = _mock_response(500)
|
||||||
mock_requests.exceptions.RequestException = Exception
|
mock_requests.exceptions.RequestException = Exception
|
||||||
self.service_instance.check_http_status()
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
self.assertFalse(self.service_instance.http_status_ok)
|
self.assertFalse(self.service_instance.http_status_ok)
|
||||||
self.assertEqual(self.service_instance.last_http_status_code, 500)
|
self.assertEqual(self.service_instance.last_http_status_code, 500)
|
||||||
@@ -70,7 +76,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertEqual(request.maintenance_type, "corrective")
|
self.assertEqual(request.maintenance_type, "corrective")
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 3 — Network error → KO with code -1
|
# Test 3 -- Network error -> KO with code -1
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_network_error_sets_status_ko_and_minus_one(self):
|
def test_network_error_sets_status_ko_and_minus_one(self):
|
||||||
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
||||||
@@ -82,21 +88,27 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertEqual(self.service_instance.last_http_status_code, -1)
|
self.assertEqual(self.service_instance.last_http_status_code, -1)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 4 — Two consecutive failures → no duplicate maintenance.request
|
# Test 4 -- Two consecutive cron runs KO -> no duplicate request
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_no_duplicate_request_on_repeated_failure(self):
|
def test_no_duplicate_request_on_repeated_failure(self):
|
||||||
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
with (
|
||||||
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP),
|
||||||
|
):
|
||||||
mock_requests.get.return_value = _mock_response(500)
|
mock_requests.get.return_value = _mock_response(500)
|
||||||
mock_requests.exceptions.RequestException = Exception
|
mock_requests.exceptions.RequestException = Exception
|
||||||
self.service_instance.check_http_status()
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
request_1 = self.equipment.http_maintenance_request
|
request_1 = self.equipment.http_maintenance_request
|
||||||
self.assertTrue(request_1)
|
self.assertTrue(request_1)
|
||||||
|
|
||||||
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
with (
|
||||||
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP),
|
||||||
|
):
|
||||||
mock_requests.get.return_value = _mock_response(500)
|
mock_requests.get.return_value = _mock_response(500)
|
||||||
mock_requests.exceptions.RequestException = Exception
|
mock_requests.exceptions.RequestException = Exception
|
||||||
self.service_instance.check_http_status()
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
self.assertEqual(self.equipment.http_maintenance_request, request_1)
|
self.assertEqual(self.equipment.http_maintenance_request, request_1)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -107,7 +119,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 5 — Equipment in maintenance mode → cron skips it
|
# Test 5 -- Equipment in maintenance mode -> cron skips it
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_maintenance_mode_skips_http_check(self):
|
def test_maintenance_mode_skips_http_check(self):
|
||||||
self.equipment.write({"maintenance_mode": True})
|
self.equipment.write({"maintenance_mode": True})
|
||||||
@@ -118,11 +130,10 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.env["service.instance"].cron_check_http_services()
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
mock_requests.get.assert_not_called()
|
mock_requests.get.assert_not_called()
|
||||||
# last_http_check_date must remain unset (never checked)
|
|
||||||
self.assertFalse(self.service_instance.last_http_check_date)
|
self.assertFalse(self.service_instance.last_http_check_date)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 6 — Expired maintenance mode → cron deactivates it
|
# Test 6 -- Expired maintenance mode -> cron deactivates it
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_maintenance_mode_auto_expiry(self):
|
def test_maintenance_mode_auto_expiry(self):
|
||||||
past = fields.Datetime.now() - timedelta(hours=1)
|
past = fields.Datetime.now() - timedelta(hours=1)
|
||||||
@@ -142,7 +153,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertFalse(self.equipment.maintenance_mode_end)
|
self.assertFalse(self.equipment.maintenance_mode_end)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 7 — Service without URL → ignored by cron
|
# Test 7 -- Service without URL -> ignored by cron
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_service_without_url_is_ignored(self):
|
def test_service_without_url_is_ignored(self):
|
||||||
self.service_instance.write({"service_url": False})
|
self.service_instance.write({"service_url": False})
|
||||||
@@ -156,7 +167,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertFalse(self.service_instance.last_http_check_date)
|
self.assertFalse(self.service_instance.last_http_check_date)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 8 — HTTP 404 (non-exception) → KO with correct code
|
# Test 8 -- HTTP 404 (non-exception) -> KO with correct code
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_http_non_200_non_exception(self):
|
def test_http_non_200_non_exception(self):
|
||||||
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
with patch(SERVICE_INSTANCE_REQUESTS) as mock_requests:
|
||||||
@@ -168,7 +179,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertEqual(self.service_instance.last_http_status_code, 404)
|
self.assertEqual(self.service_instance.last_http_status_code, 404)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 9 — Webhook called when a new maintenance.request is created
|
# Test 9 -- Webhook called when a new maintenance.request is created
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_webhook_called_on_new_request(self):
|
def test_webhook_called_on_new_request(self):
|
||||||
self.env["ir.config_parameter"].sudo().set_param(
|
self.env["ir.config_parameter"].sudo().set_param(
|
||||||
@@ -177,11 +188,12 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
)
|
)
|
||||||
with (
|
with (
|
||||||
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP),
|
||||||
patch(EQUIPMENT_HTTP_REQUESTS) as mock_http,
|
patch(EQUIPMENT_HTTP_REQUESTS) as mock_http,
|
||||||
):
|
):
|
||||||
mock_requests.get.return_value = _mock_response(500)
|
mock_requests.get.return_value = _mock_response(500)
|
||||||
mock_requests.exceptions.RequestException = Exception
|
mock_requests.exceptions.RequestException = Exception
|
||||||
self.service_instance.check_http_status()
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
mock_http.post.assert_called_once()
|
mock_http.post.assert_called_once()
|
||||||
call_kwargs = mock_http.post.call_args
|
call_kwargs = mock_http.post.call_args
|
||||||
@@ -189,7 +201,7 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
self.assertEqual(payload["equipment"], self.equipment.name)
|
self.assertEqual(payload["equipment"], self.equipment.name)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 10 — Webhook skipped when no URL configured
|
# Test 10 -- Webhook skipped when no URL configured
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_webhook_skipped_when_no_url(self):
|
def test_webhook_skipped_when_no_url(self):
|
||||||
self.env["ir.config_parameter"].sudo().set_param(
|
self.env["ir.config_parameter"].sudo().set_param(
|
||||||
@@ -197,16 +209,17 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
)
|
)
|
||||||
with (
|
with (
|
||||||
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP),
|
||||||
patch(EQUIPMENT_HTTP_REQUESTS) as mock_http,
|
patch(EQUIPMENT_HTTP_REQUESTS) as mock_http,
|
||||||
):
|
):
|
||||||
mock_requests.get.return_value = _mock_response(500)
|
mock_requests.get.return_value = _mock_response(500)
|
||||||
mock_requests.exceptions.RequestException = Exception
|
mock_requests.exceptions.RequestException = Exception
|
||||||
self.service_instance.check_http_status()
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
mock_http.post.assert_not_called()
|
mock_http.post.assert_not_called()
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Test 11 — Service without equipment → check_http_status skips it
|
# Test 11 -- Service without equipment -> check_http_status skips it
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def test_service_without_equipment_is_ignored(self):
|
def test_service_without_equipment_is_ignored(self):
|
||||||
self.service_instance.write({"equipment_id": False})
|
self.service_instance.write({"equipment_id": False})
|
||||||
@@ -218,3 +231,52 @@ class TestHttpMonitoring(TransactionCase):
|
|||||||
|
|
||||||
mock_requests.get.assert_not_called()
|
mock_requests.get.assert_not_called()
|
||||||
self.assertFalse(self.service_instance.last_http_check_date)
|
self.assertFalse(self.service_instance.last_http_check_date)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Test 12 -- Transient failure (KO pass 1, OK pass 2) -> no request created
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def test_transient_failure_no_request_created(self):
|
||||||
|
with (
|
||||||
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP),
|
||||||
|
):
|
||||||
|
mock_requests.get.side_effect = [
|
||||||
|
_mock_response(500), # pass 1: KO
|
||||||
|
_mock_response(200), # pass 2 (retry): OK
|
||||||
|
]
|
||||||
|
mock_requests.exceptions.RequestException = Exception
|
||||||
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
|
# Final status reflects pass 2 result
|
||||||
|
self.assertTrue(self.service_instance.http_status_ok)
|
||||||
|
self.assertEqual(self.service_instance.last_http_status_code, 200)
|
||||||
|
# No maintenance.request must have been created
|
||||||
|
self.assertFalse(self.equipment.http_maintenance_request)
|
||||||
|
self.assertEqual(
|
||||||
|
self.env["maintenance.request"].search_count(
|
||||||
|
[("equipment_id", "=", self.equipment.id)]
|
||||||
|
),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Test 13 -- Confirmed failure (KO pass 1 and 2) -> request created
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def test_confirmed_failure_creates_request(self):
|
||||||
|
with (
|
||||||
|
patch(SERVICE_INSTANCE_REQUESTS) as mock_requests,
|
||||||
|
patch(SERVICE_INSTANCE_SLEEP) as mock_sleep,
|
||||||
|
):
|
||||||
|
mock_requests.get.return_value = _mock_response(503)
|
||||||
|
mock_requests.exceptions.RequestException = Exception
|
||||||
|
self.env["service.instance"].cron_check_http_services()
|
||||||
|
|
||||||
|
# sleep must have been called between the two passes
|
||||||
|
mock_sleep.assert_called_once_with(2)
|
||||||
|
# requests.get must have been called twice (pass 1 + pass 2)
|
||||||
|
self.assertEqual(mock_requests.get.call_count, 2)
|
||||||
|
# Final status is KO
|
||||||
|
self.assertFalse(self.service_instance.http_status_ok)
|
||||||
|
self.assertEqual(self.service_instance.last_http_status_code, 503)
|
||||||
|
# maintenance.request created
|
||||||
|
self.assertTrue(self.equipment.http_maintenance_request)
|
||||||
|
|||||||
Reference in New Issue
Block a user