Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
12db02b
create me a Compatibility class
dale-wahl Jun 15, 2026
bfaa455
test it on some processors
dale-wahl Jun 15, 2026
5eb8add
processor: clear default behavoir
dale-wahl Jun 15, 2026
d7cd873
convert more type processors to use Compatibility
dale-wahl Jun 15, 2026
97ba63a
update type is_compatible_with checks
dale-wahl Jun 16, 2026
76533fb
base_twitter_stats: compatibility with abstract class, cannot overwri…
dale-wahl Jun 16, 2026
f19adf9
is compatible w twitter stats subclasses
dale-wahl Jun 16, 2026
0ddfdde
move compatibility, fold in followups
dale-wahl Jun 16, 2026
500d6d1
fold in exclude processor followups into compatibility
dale-wahl Jun 16, 2026
6004dba
compatibility: convert extension type checks
dale-wahl Jun 16, 2026
91658ae
base_filter: abstract class compatibility
dale-wahl Jun 16, 2026
1168256
compatibility: convert top_dataset checks plus extension check
dale-wahl Jun 16, 2026
7b29184
Merge branch 'master' into compatibility
dale-wahl Jun 16, 2026
309e132
compatibility: multi type checks
dale-wahl Jun 16, 2026
863f085
compatibility: fix executable check (pass function) to settings check
dale-wahl Jun 16, 2026
d71f6ce
compatibility: figure out ffmpeg -> ffprobe connection and generalize.
dale-wahl Jun 16, 2026
f8d6408
compatibility: add a short circuit! do not check every requirement. a…
dale-wahl Jun 16, 2026
6f8e605
compatibility datasources
dale-wahl Jun 17, 2026
6ee0600
compatibility: add is_rankable and handle ranking multiple items True…
dale-wahl Jun 17, 2026
5384e92
compatibility media_types
dale-wahl Jun 17, 2026
27c8e2f
compatibility: required_settings
dale-wahl Jun 17, 2026
e18f264
compatibility: clarify the dataset-required separation and make a helper
dale-wahl Jun 17, 2026
345dd88
compatibility: excluded_types, is_collector, child_only axes
dale-wahl Jun 17, 2026
6f6421a
compatibility: base downloaders
dale-wahl Jun 17, 2026
e466c3f
compatibility: keep `is_compatible_with` overrides (for credentials),…
dale-wahl Jun 17, 2026
7483df1
compatibility: couple more with overrides
dale-wahl Jun 17, 2026
3c0da2d
compatibility: requires ANY column (in addition to requires all columns)
dale-wahl Jun 17, 2026
20dd2ff
video_hasher: easy compatibility
dale-wahl Jun 17, 2026
248622f
compatibilities w/ overrides
dale-wahl Jun 17, 2026
3302f48
compatibility cleanup
dale-wahl Jun 17, 2026
0fc03f7
clean up hasattr is_compatible_with checks
dale-wahl Jun 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 54 additions & 14 deletions backend/lib/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from backend.lib.worker import BasicWorker
from common.lib.dataset import DataSet, StatusType
from common.lib.compatibility import Compatibility
from common.lib.fourcat_module import FourcatModule
from common.lib.helpers import get_software_commit, remove_nuls, send_email, hash_to_md5
from common.lib.exceptions import (WorkerInterruptedException, ProcessorInterruptedException, ProcessorException,
Expand All @@ -37,10 +38,18 @@ class BasicProcessor(FourcatModule, BasicWorker, metaclass=abc.ABCMeta):
be used as input for another processor (though whether and when this is
useful is another question).

To determine whether a processor can process a given dataset, you can
define a `is_compatible_with(FourcatModule module=None, config=None):) -> bool` class
method which takes a dataset as argument and returns a bool that determines
if this processor is considered compatible with that dataset. For example:
To determine whether a processor can process a given dataset, declare a
Compatibility specification as the `compatibility` class attribute. The
default `is_compatible_with` is evaluated from it. For example:

.. code-block:: python

compatibility = Compatibility(types={"linguistic-features"})

Processors with genuinely dynamic requirements (e.g. ones that must inspect
a dataset's genealogy) may instead override `is_compatible_with(cls,
module=None, config=None) -> bool` directly; an override takes precedence
over the `compatibility` attribute. For example:

.. code-block:: python

Expand Down Expand Up @@ -97,6 +106,11 @@ def is_compatible_with(cls, module=None, config=None):
#: `remove_disposable_files()` method will be called.
for_cleanup = None

#: A common.lib.compatibility.Compatibility object describing which datasets
#: this processor accepts. When set, the default is_compatible_with() is
#: evaluated from it.
compatibility = None

def work(self):
"""
Process a dataset
Expand Down Expand Up @@ -973,6 +987,33 @@ def _validate_map_item_post_run(self):
except Exception:
pass

@classmethod
def is_compatible_with(cls, module=None, config=None):
"""
Determine whether this processor can run on a given module.

When the processor defines a `compatibility` attribute, this is
evaluated from it. Processors whose requirements cannot be expressed
that way (for example, ones that must inspect a dataset's ancestry) may
override this method instead; the override is used in preference to the
attribute.

When neither is provided, the processor accepts only top-level datasets
(those without a parent), which preserves the historical default.

:param module: Dataset (normally) or processor to check against
:param ConfigManager|None config: Context-aware configuration reader
:return bool:
"""
if cls.compatibility is not None:
return cls.compatibility.is_compatible_with(module, config=config)

# Legacy default: a processor that declares no `compatibility` and does
# not override this method is compatible only with top-level datasets
# (those with no parent), i.e. it runs on collected data and not on the
# output of other processors.
return Compatibility(top_dataset_only=True).is_compatible_with(module, config=config)

@classmethod
def is_filter(cls):
"""
Expand Down Expand Up @@ -1075,19 +1116,18 @@ def is_rankable(cls, multiple_items=True):
@classmethod
def exclude_followup_processors(cls, processor_type=None):
"""
Used for processor compatibility

To be defined by the child processor if it should exclude certain follow-up processors.
e.g.:
Determine whether a follow-up processor should be excluded.

def exclude_followup_processors(cls, processor_type):
if processor_type in ["undesirable-followup-processor"]:
return True
return False
Follow-up processors that should never be offered after this one are
listed in the `excluded_followups` field of the `compatibility`
specification. Processors with dynamic exclusion logic may override this
method instead.

:param str processor_type: Processor type to exclude
:return bool: True if processor should be excluded, False otherwise
:param str processor_type: Processor type to check
:return bool: True if the follow-up should be excluded, False otherwise
"""
if cls.compatibility is not None and processor_type in cls.compatibility.excluded_followups:
return True
return False

@abc.abstractmethod
Expand Down
Loading
Loading