|
114 | 114 | * After creating a new database or installing queue_job on an |
115 | 115 | existing database, Odoo must be restarted for the runner to detect it. |
116 | 116 |
|
117 | | -* When Odoo shuts down normally, it waits for running jobs to finish. |
118 | | - However, when the Odoo server crashes or is otherwise force-stopped, |
119 | | - running jobs are interrupted while the runner has no chance to know |
120 | | - they have been aborted. In such situations, jobs may remain in |
121 | | - ``started`` or ``enqueued`` state after the Odoo server is halted. |
122 | | - Since the runner has no way to know if they are actually running or |
123 | | - not, and does not know for sure if it is safe to restart the jobs, |
124 | | - it does not attempt to restart them automatically. Such stale jobs |
125 | | - therefore fill the running queue and prevent other jobs to start. |
126 | | - You must therefore requeue them manually, either from the Jobs view, |
127 | | - or by running the following SQL statement *before starting Odoo*: |
128 | | -
|
129 | | -.. code-block:: sql |
130 | | -
|
131 | | - update queue_job set state='pending' where state in ('started', 'enqueued') |
132 | | -
|
133 | 117 | .. rubric:: Footnotes |
134 | 118 |
|
135 | 119 | .. [1] From a security standpoint, it is safe to have an anonymous HTTP |
@@ -343,6 +327,62 @@ def set_job_enqueued(self, uuid): |
343 | 327 | (ENQUEUED, uuid), |
344 | 328 | ) |
345 | 329 |
|
| 330 | + def requeue_dead_jobs(self): |
| 331 | + """ |
| 332 | + Set started and enqueued jobs but not locked to pending |
| 333 | +
|
| 334 | + A job is locked when it's being executed |
| 335 | + When a job is killed, it releases the lock |
| 336 | +
|
| 337 | + Adding a buffer on 'date_enqueued' to check |
| 338 | + that it has been enqueued for more than 10sec. |
| 339 | + This prevents from requeuing jobs before they are actually started. |
| 340 | +
|
| 341 | + When Odoo shuts down normally, it waits for running jobs to finish. |
| 342 | + However, when the Odoo server crashes or is otherwise force-stopped, |
| 343 | + running jobs are interrupted while the runner has no chance to know |
| 344 | + they have been aborted. |
| 345 | + """ |
| 346 | + |
| 347 | + with closing(self.conn.cursor()) as cr: |
| 348 | + query = """ |
| 349 | + UPDATE |
| 350 | + queue_job |
| 351 | + SET |
| 352 | + state='pending', |
| 353 | + retry=(CASE WHEN state='started' THEN retry+1 ELSE retry END) |
| 354 | + WHERE |
| 355 | + id in ( |
| 356 | + SELECT |
| 357 | + id |
| 358 | + FROM |
| 359 | + queue_job_locks |
| 360 | + WHERE |
| 361 | + id in ( |
| 362 | + SELECT |
| 363 | + id |
| 364 | + FROM |
| 365 | + queue_job |
| 366 | + WHERE |
| 367 | + state IN ('enqueued','started') |
| 368 | + AND date_enqueued < |
| 369 | + (now() AT TIME ZONE 'utc' - INTERVAL '10 sec') |
| 370 | + ) |
| 371 | + FOR UPDATE SKIP LOCKED |
| 372 | + ) |
| 373 | + RETURNING uuid |
| 374 | + """ |
| 375 | + |
| 376 | + cr.execute(query) |
| 377 | + |
| 378 | + job_uuids_to_requeue = [job_uuid[0] for job_uuid in cr.fetchall()] |
| 379 | + if job_uuids_to_requeue: |
| 380 | + for uuid in job_uuids_to_requeue: |
| 381 | + _logger.warning( |
| 382 | + "Re-queued job with uuid: %s", |
| 383 | + str(uuid), |
| 384 | + ) |
| 385 | + |
346 | 386 |
|
347 | 387 | class QueueJobRunner(object): |
348 | 388 | def __init__( |
@@ -424,6 +464,11 @@ def initialize_databases(self): |
424 | 464 | self.channel_manager.notify(db_name, *job_data) |
425 | 465 | _logger.info("queue job runner ready for db %s", db_name) |
426 | 466 |
|
| 467 | + def requeue_dead_jobs(self): |
| 468 | + for db in self.db_by_name.values(): |
| 469 | + if db.has_queue_job: |
| 470 | + db.requeue_dead_jobs() |
| 471 | + |
427 | 472 | def run_jobs(self): |
428 | 473 | now = _odoo_now() |
429 | 474 | for job in self.channel_manager.get_jobs_to_run(now): |
@@ -516,6 +561,7 @@ def run(self): |
516 | 561 | _logger.info("database connections ready") |
517 | 562 | # inner loop does the normal processing |
518 | 563 | while not self._stop: |
| 564 | + self.requeue_dead_jobs() |
519 | 565 | self.process_notifications() |
520 | 566 | self.run_jobs() |
521 | 567 | self.wait_notification() |
|
0 commit comments