Skip to content

Bug: reserve_jobs=True fails when dj.config.jobs.add_job_metadata = True and dj.config.jobs.keep_completed = True #1379

@arturoptophys

Description

@arturoptophys

Bug Report

Description

When testing the job metadata functionality i encountered following problem:
When setting

dj.config.jobs.add_job_metadata = True
dj.config.jobs.keep_completed = True

i cant run populate with reserve_jobs=True.

Reproducibility

import datajoint as dj
dj.config.jobs.add_job_metadata = True
dj.config.jobs.keep_completed = True
dj.conn() # using datajoint.json
schema = dj.Schema("TEST_")

@schema
class Numbers(dj.Lookup):
    definition = """
    number: int64
    """
    contents = [(0,), (1,), (2,), (4,), (8,)]

@schema
class NumbersSquared(dj.Computed):
    definition = """
    -> Numbers
    ---
    squared: int64
    """

    def make(self, key):
        self.insert1({**key, "squared": key["number"] ** 2})

NumbersSquared.populate(reserve_jobs=True)

Fails with:

---------------------------------------------------------------------------
DataJointError                            Traceback (most recent call last)
Cell In[2], line 1
----> 1 NumbersSquared.populate(reserve_jobs=True)

File ~/datajoint-python/src/datajoint/autopopulate.py:366, in AutoPopulate.populate(self, suppress_errors, return_exception_objects, reserve_jobs, max_calls, display_progress, processes, make_kwargs, priority, refresh, *restrictions)
    363     raise DataJointError("Populate cannot be called during a transaction.")
    365 if reserve_jobs:
--> 366     return self._populate_distributed(
    367         *restrictions,
    368         suppress_errors=suppress_errors,
    369         return_exception_objects=return_exception_objects,
    370         max_calls=max_calls,
    371         display_progress=display_progress,
    372         processes=processes,
    373         make_kwargs=make_kwargs,
    374         priority=priority,
    375         refresh=refresh,
    376     )
    377 else:
    378     return self._populate_direct(
    379         *restrictions,
    380         suppress_errors=suppress_errors,
   (...)    385         make_kwargs=make_kwargs,
    386     )

File ~/datajoint-python/src/datajoint/autopopulate.py:490, in AutoPopulate._populate_distributed(self, suppress_errors, return_exception_objects, max_calls, display_progress, processes, make_kwargs, priority, refresh, *restrictions)
    486     refresh = config.jobs.auto_refresh
    487 if refresh:
    488     # Use delay=-1 to ensure jobs are immediately schedulable
    489     # (avoids race condition with scheduled_time <= CURRENT_TIMESTAMP(3) check)
--> 490     self.jobs.refresh(*restrictions, priority=priority, delay=-1)
    492 # Fetch pending jobs ordered by priority (use CURRENT_TIMESTAMP(3) for datetime(3) precision)
    493 pending_query = self.jobs.pending & "scheduled_time <= CURRENT_TIMESTAMP(3)"

File ~/datajoint-python/src/datajoint/jobs.py:398, in Job.refresh(self, delay, priority, stale_timeout, orphan_timeout, *restrictions)
    394 # 2. Re-pend success jobs if keep_completed=True
    395 if config.jobs.keep_completed:
    396     # Success jobs whose keys are in key_source but not in target
    397     # Disable semantic_check for Job table operations
--> 398     success_to_repend = self.completed.restrict(key_source, semantic_check=False) - self._target
    399     repend_keys = success_to_repend.keys()
    400     for key in repend_keys:

File ~/datajoint-python/src/datajoint/expression.py:282, in QueryExpression.__sub__(self, restriction)
    276 def __sub__(self, restriction):
    277     """
    278     Inverted restriction e.g. ``q1 - q2``.
    279     :return: a restricted copy of the input argument
    280     See QueryExpression.restrict for more detail.
    281     """
--> 282     return self.restrict(Not(restriction))

File ~/datajoint-python/src/datajoint/expression.py:234, in QueryExpression.restrict(self, restriction, semantic_check)
    232         result._top = restriction
    233     return result
--> 234 new_condition = make_condition(self, restriction, attributes, semantic_check=semantic_check)
    235 if new_condition is True:
    236     return self  # restriction has no effect, return the same object

File ~/datajoint-python/src/datajoint/condition.py:403, in make_condition(query_expression, condition, columns, semantic_check)
    401 # restrict by another expression
    402 if isinstance(condition, QueryExpression):
--> 403     assert_join_compatibility(query_expression, condition, semantic_check=semantic_check)
    404     # Match on all non-hidden namesakes (hidden attributes excluded)
    405     common_attributes = [q for q in condition.heading.names if q in query_expression.heading.names]

File ~/datajoint-python/src/datajoint/condition.py:256, in assert_join_compatibility(expr1, expr2, semantic_check)
    254 # Semantic match requires both lineages to be non-None and equal
    255 if lineage1 is None or lineage2 is None or lineage1 != lineage2:
--> 256     raise DataJointError(
    257         f"Cannot join on attribute `{name}`: "
    258         f"different lineages ({lineage1} vs {lineage2}). "
    259         f"Use .proj() to rename one of the attributes."
    260     )

DataJointError: Cannot join on attribute `number`: different lineages (TEST_.~~numbers_squared.number vs TEST_.#numbers.number). Use .proj() to rename one of the attributes.

Ubuntu 24.04, python 3.12.11
MySQL - remotely accessed myslq-latest docker (MySQL Server 9.6.0-1.el9)
DataJoint Version : DataJoint 2.0.0a27

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugIndicates an unexpected problem or unintended behaviortriageIndicates issues, pull requests, or discussions need to be reviewed for the first time

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions