diff --git a/beets/config_default.yaml b/beets/config_default.yaml index 8df7f61e61..62c8982414 100644 --- a/beets/config_default.yaml +++ b/beets/config_default.yaml @@ -52,6 +52,8 @@ import: item: artist title duplicate_action: ask duplicate_verbose_prompt: no + duplicate_track_resolution: no + duplicate_track_action: '' bell: no set_fields: {} ignored_alias_types: [] diff --git a/beets/importer/session.py b/beets/importer/session.py index 328de6ea1d..46910bd89f 100644 --- a/beets/importer/session.py +++ b/beets/importer/session.py @@ -183,6 +183,14 @@ def choose_match(self, task: ImportTask): def resolve_duplicate(self, task: ImportTask, found_duplicates): raise NotImplementedError + def resolve_track_duplicates(self, task: ImportTask, duplicates) -> str: + """Decide what to do with album tracks that already exist in the + library. Return ``"s"`` (skip the duplicate tracks and fold the + remaining new tracks into the existing album), ``"k"`` (keep all) or + ``"r"`` (remove the old items). + """ + raise NotImplementedError + def choose_item(self, task: ImportTask): raise NotImplementedError @@ -205,6 +213,10 @@ def run(self): # Split directory tasks into one task for each album. stages += [stagefuncs.group_albums(self)] + # Optionally drop or replace album tracks that already exist in + # the library before the autotag lookup runs. + stages += [stagefuncs.resolve_track_duplicates(self)] + # These stages either talk to the user to get a decision or, # in the case of a non-autotagged import, just choose to # import everything as-is. In *both* cases, these stages diff --git a/beets/importer/stages.py b/beets/importer/stages.py index 820fb312e4..879f376dab 100644 --- a/beets/importer/stages.py +++ b/beets/importer/stages.py @@ -19,8 +19,9 @@ import logging from typing import TYPE_CHECKING -from beets import config, plugins +from beets import config, dbcore, plugins from beets.util import MoveOperation, displayable_path, pipeline +from beets.util.color import colorize from .tasks import ( Action, @@ -127,6 +128,99 @@ def group(item): task = pipeline.multiple(tasks) +@pipeline.mutator_stage +def resolve_track_duplicates(session: ImportSession, task: ImportTask): + """Resolve tracks of an album that already exist in the library. + + When ``import.duplicate_track_resolution`` is enabled, each item of an + album import is checked against the library using + ``import.duplicate_keys.item``. Matched tracks are resolved according to + ``import.duplicate_track_action`` (which falls back to + ``import.duplicate_action`` when unset): + + * ``skip`` drops the duplicate tracks and adds the remaining new tracks to + the existing album they belong to (if every track is a duplicate, the + whole album is skipped); + * ``remove`` removes the matching old library items; + * ``keep`` (and ``merge``) import everything as-is; + * ``ask`` prompts the session for one of the above. + + This runs before :func:`lookup_candidates` so that dropped tracks are + excluded from the autotag match. Singleton imports are handled by the + regular duplicate resolution and are ignored here. + """ + if ( + task.skip + or not task.is_album + or not task.items + or not config["import"]["duplicate_track_resolution"].get(bool) + ): + return + + keys = config["import"]["duplicate_keys"]["item"].as_str_seq() + if not keys: + return + + # Map each incoming item to the existing library items it duplicates. + duplicates: dict[library.Item, list[library.Item]] = {} + for item in task.items: + if not any(item.get(k) for k in keys): + continue + matches = _find_track_duplicates(session.lib, item, keys) + if matches: + duplicates[item] = matches + + if not duplicates: + return + + action = _track_duplicate_action() + if action == "a": + action = session.resolve_track_duplicates(task, duplicates) + + if action == "s": + for item in duplicates: + log.info( + colorize("text_warning", "Skipping duplicate track: {}"), + displayable_path(item.path), + ) + task.items.remove(item) + if not task.items: + # Every track was a duplicate: skip the whole album. + log.info( + colorize( + "text_warning", + "Skipping album, all tracks are duplicates: {}", + ), + next(iter(duplicates)).album, + ) + task.set_choice(Action.SKIP) + return + # Only some tracks were duplicates; we have already dropped them, so + # don't let the album-level check skip the rest. + task.duplicate_tracks_resolved = True + # Fold the remaining new tracks into the existing album, if the + # matched duplicates all belong to a single one. + album_ids = { + match.album_id + for matches in duplicates.values() + for match in matches + if match.album_id is not None + } + if len(album_ids) == 1: + task.fold_into_album_id = album_ids.pop() + else: + log.warning( + "cannot fold tracks into a single existing album; " + "importing them as a new album" + ) + elif action == "r": + for matches in duplicates.values(): + task.duplicate_track_items_to_remove.extend(matches) + task.duplicate_tracks_resolved = True + # "k" (keep) and "m" (merge) leave the incoming tracks untouched; whole + # album duplicates are still handled by the regular resolution stage. + + @pipeline.mutator_stage def lookup_candidates(session: ImportSession, task: ImportTask): """A coroutine for performing the initial MusicBrainz lookup for an @@ -283,6 +377,9 @@ def manipulate_files(session: ImportSession, task: ImportTask): if task.should_remove_duplicates: task.remove_duplicates(session.lib) + if task.duplicate_track_items_to_remove: + task.remove_duplicate_track_items(session.lib) + if session.config["move"]: operation = MoveOperation.MOVE elif session.config["copy"]: @@ -333,10 +430,54 @@ def _apply_choice(session: ImportSession, task: ImportTask): task.set_fields(session.lib) +def _track_duplicate_action() -> str: + """Return the single-letter action for per-track duplicate resolution. + + Uses ``import.duplicate_track_action`` when set, otherwise falls back to + ``import.duplicate_action``. + """ + choices = { + "skip": "s", + "keep": "k", + "remove": "r", + "merge": "m", + "ask": "a", + } + cfg = config["import"] + view = ( + cfg["duplicate_track_action"] + if cfg["duplicate_track_action"].get() + else cfg["duplicate_action"] + ) + return view.as_choice(choices) + + +def _find_track_duplicates( + lib: library.Library, item: library.Item, keys: list[str] +) -> list[library.Item]: + """Return library items matching `item` on all `keys`, excluding the + item itself (so re-imports do not match their own files). + + Unlike :meth:`Item.duplicates_query`, this matches *every* library item, + including tracks that belong to an album -- not just singletons -- so a + track is caught regardless of how it was originally imported. + """ + query = dbcore.AndQuery( + [item.field_query(k, item.get(k), dbcore.MatchQuery) for k in keys] + ) + return [other for other in lib.items(query) if other.path != item.path] + + def _resolve_duplicates(session: ImportSession, task: ImportTask): """Check if a task conflicts with items or albums already imported and ask the session to resolve this. """ + if task.duplicate_tracks_resolved: + # Per-track duplicate resolution already pruned (or recorded for + # removal) the tracks of this album that exist in the library; the + # rest are new and should be imported without a whole-album skip. + return + if task.choice_flag in (Action.ASIS, Action.APPLY, Action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: diff --git a/beets/importer/tasks.py b/beets/importer/tasks.py index 976331aaf7..3e3687fbcd 100644 --- a/beets/importer/tasks.py +++ b/beets/importer/tasks.py @@ -72,6 +72,23 @@ log = logging.getLogger("beets") +def _remove_duplicate_item( + lib: library.Library, item: library.Item, with_album: bool = True +): + """Remove ``item`` from ``lib`` and delete its file when it lives inside + the library directory, pruning any newly-empty parent directories. + """ + item.remove(with_album=with_album) + if lib.directory in util.ancestry(item.path): + log.debug("deleting duplicate {.filepath}", item) + util.remove(item.path) + util.prune_dirs( + os.path.dirname(item.path), + lib.directory, + clutter=config["clutter"].as_str_seq(), + ) + + class ImportAbortError(Exception): """Raised when the user aborts the tagging operation.""" @@ -177,6 +194,16 @@ def __init__( super().__init__(toppath, paths, items) self.should_remove_duplicates = False self.should_merge_duplicates = False + # Existing library items to remove because individual tracks of this + # album duplicate them (see ``duplicate_track_resolution``). + self.duplicate_track_items_to_remove: list[library.Item] = [] + # Set once per-track duplicate resolution has handled this task, so the + # album-level duplicate check does not then skip the remaining tracks. + self.duplicate_tracks_resolved = False + # Id of an existing album to fold the imported items into (instead of + # creating a new album), set when skipping per-track duplicates leaves + # new tracks belonging to an existing album. + self.fold_into_album_id: int | None = None self.is_album = True def set_choice(self, choice: Action | AlbumMatch | TrackMatch): @@ -272,15 +299,7 @@ def remove_duplicates(self, lib: library.Library): artpath = album.artpath for item in album.items(): - item.remove(with_album=False) - if lib.directory in util.ancestry(item.path): - log.debug("deleting duplicate {.filepath}", item) - util.remove(item.path) - util.prune_dirs( - os.path.dirname(item.path), - lib.directory, - clutter=config["clutter"].as_str_seq(), - ) + _remove_duplicate_item(lib, item, with_album=False) album.remove(with_items=False) @@ -293,6 +312,17 @@ def remove_duplicates(self, lib: library.Library): clutter=config["clutter"].as_str_seq(), ) + def remove_duplicate_track_items(self, lib: library.Library): + """Remove the old library items that individual tracks of this album + duplicate, as recorded in ``duplicate_track_items_to_remove``. + """ + seen: set[int] = set() + for item in self.duplicate_track_items_to_remove: + if item.id in seen: + continue + seen.add(item.id) + _remove_duplicate_item(lib, item) + def set_fields(self, lib: library.Library): """Sets the fields given at CLI or configuration to the specified values, for both the album and all its items. @@ -515,6 +545,23 @@ def add(self, lib: library.Library): self.record_replaced(lib) self.remove_replaced(lib) + fold_album = ( + lib.get_album(self.fold_into_album_id) + if self.fold_into_album_id is not None + else None + ) + if fold_album is not None: + # Fold the imported items into an existing album rather than + # creating a new one. + self.album = fold_album + for item in self.imported_items(): + item.album_id = self.album.id + if item.id is None: + item.add(lib) + else: + item.store() + return + self.album = lib.add_album(self.imported_items()) if self.choice_flag == Action.APPLY and isinstance( self.match, AlbumMatch @@ -731,15 +778,7 @@ def remove_duplicates(self, lib: library.Library): duplicate_items = self.find_duplicates(lib) log.debug("removing {} old duplicated items", len(duplicate_items)) for item in duplicate_items: - item.remove() - if lib.directory in util.ancestry(item.path): - log.debug("deleting duplicate {.filepath}", item) - util.remove(item.path) - util.prune_dirs( - os.path.dirname(item.path), - lib.directory, - clutter=config["clutter"].as_str_seq(), - ) + _remove_duplicate_item(lib, item) def add(self, lib): with lib.transaction(): diff --git a/beets/test/helper.py b/beets/test/helper.py index b676ac450b..9478d76e96 100644 --- a/beets/test/helper.py +++ b/beets/test/helper.py @@ -669,6 +669,18 @@ def resolve_duplicate(self, task, found_duplicates): elif res == self.Resolution.MERGE: task.should_merge_duplicates = True + def resolve_track_duplicates(self, task, duplicates): + try: + res = self._resolutions.pop(0) + except IndexError: + res = self.default_resolution + + return { + self.Resolution.SKIP: "s", + self.Resolution.KEEPBOTH: "k", + self.Resolution.REMOVE: "r", + }.get(res, "k") + class TerminalImportSessionFixture(TerminalImportSession): def __init__(self, *args, **kwargs): diff --git a/beets/ui/commands/import_/session.py b/beets/ui/commands/import_/session.py index 007715b2f3..79ccbeeb43 100644 --- a/beets/ui/commands/import_/session.py +++ b/beets/ui/commands/import_/session.py @@ -204,6 +204,28 @@ def resolve_duplicate(self, task, found_duplicates): else: assert False + def resolve_track_duplicates(self, task, duplicates) -> str: + """Decide what to do with album tracks already in the library.""" + log.warning("Some tracks are already in the library!") + + if config["import"]["quiet"]: + # In quiet mode, don't prompt -- just skip the duplicate tracks. + log.info("Skipping duplicate tracks.") + return "s" + + existing = [item for matches in duplicates.values() for item in matches] + ui.print_("Old: " + summarize_items(existing, True)) + if config["import"]["duplicate_verbose_prompt"]: + for item in existing: + print(f" {item}") + + ui.print_("New: " + summarize_items(list(duplicates), True)) + if config["import"]["duplicate_verbose_prompt"]: + for item in duplicates: + print(f" {item}") + + return ui.input_options(("Skip dupes", "Keep all", "Remove old")) + def should_resume(self, path): return ui.input_yn( f"Import of the directory:\n{displayable_path(path)}\n" diff --git a/docs/changelog.rst b/docs/changelog.rst index 23812a7b7d..91bb27db4e 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -27,6 +27,13 @@ New features :conf:`plugins.musicbrainz:aliases_as_credits` to make aliases-as-artist-credit optional. - :doc:`plugins/badfiles`: Added settings for auto error and warning actions. +- Add the :ref:`duplicate_track_resolution` import option, which checks each + track of an album import against the library (using the :ref:`duplicate_keys` + ``item`` fields) and resolves matches via the new + :ref:`duplicate_track_action` option (falling back to :ref:`duplicate_action` + when unset). ``skip`` drops already-imported tracks and adds the remaining new + tracks to the existing album, completing a partially-imported album. Disabled + by default. Bug fixes ~~~~~~~~~ diff --git a/docs/reference/config.rst b/docs/reference/config.rst index bc0a0b706a..75ed79f83e 100644 --- a/docs/reference/config.rst +++ b/docs/reference/config.rst @@ -842,6 +842,66 @@ is applied, which would, considering the default, look like this: Default: ``no``. +.. _duplicate_track_resolution: + +duplicate_track_resolution +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When enabled, album imports also check each *individual track* against the +library, using the same fields as :ref:`duplicate_keys` ``item`` (by default +``artist`` and ``title``; set it to e.g. ``mb_trackid`` to match on the +MusicBrainz track ID). Tracks already in the library are resolved according to +:ref:`duplicate_track_action`. + +This complements the album-level duplicate check (which matches whole albums on +:ref:`duplicate_keys` ``album``): it catches the case where some tracks of an +album are already in your library even though the album itself is not. Matching +considers *all* library items, whether they were imported as singletons or as +part of another album. + +.. note:: + + The check runs *before* the autotagger lookup, so it matches on the incoming + files' existing tags rather than the metadata beets would apply. When + importing with autotagging on, match on a stable identifier such as + ``mb_trackid`` (via :ref:`duplicate_keys` ``item``); ``artist`` and + ``title`` may not yet agree with what is in your library. + +Default: ``no``. + +.. _duplicate_track_action: + +duplicate_track_action +~~~~~~~~~~~~~~~~~~~~~~ + +How to resolve individual album tracks that already exist in the library when +:ref:`duplicate_track_resolution` is enabled. The available actions are: + +- ``skip`` drops the duplicate tracks and adds the remaining *new* tracks to the + existing album they belong to, instead of importing them as a separate album. + Use this to complete a partially-imported album. If every track is already + present, the whole album is skipped. (If the matching tracks do not all belong + to a single album, the new tracks are imported as their own album.) +- ``remove`` removes the matching old items from the library before importing. +- ``keep`` (and ``merge``) import the album unchanged. +- ``ask`` prompts you to choose one of the above. + +When left empty, this falls back to :ref:`duplicate_action`. + +A typical configuration for completing partially-imported albums while +autotagging looks like this: + +:: + + import: + duplicate_track_resolution: yes + duplicate_action: ask # whole-album duplicates + duplicate_track_action: skip # per-track duplicates: fold new tracks into the existing album + duplicate_keys: + item: mb_trackid # match on a stable id (recommended when autotagging) + +Default: empty (inherit :ref:`duplicate_action`). + .. _bell: bell diff --git a/test/test_importer.py b/test/test_importer.py index 51f44e4602..be1620c764 100644 --- a/test/test_importer.py +++ b/test/test_importer.py @@ -48,6 +48,7 @@ AutotagStub, BeetsTestCase, ImportHelper, + ImportSessionFixture, PluginMixin, TestHelper, has_program, @@ -1304,6 +1305,216 @@ def add_item_fixture(self, **kwargs): return item +class ImportTrackDuplicateResolutionTest(ImportHelper, BeetsTestCase): + """``import.duplicate_track_resolution``: per-track dedup on album import. + + The imported album has two tracks (``Tag Track 1`` and ``Tag Track 2``); + tests seed the library with items matching one or both of them (as + singletons unless noted otherwise). + """ + + def setUp(self): + super().setUp() + self.prepare_album_for_import(2) + + def add_item_fixture(self, **kwargs): + item = self.add_item_fixtures()[0] + item.update(kwargs) + item.store() + return item + + def add_album_member_fixture(self, **kwargs): + """Seed the library with a track that belongs to an album (i.e. not a + singleton), so its ``album_id`` is set. + """ + item = self.add_item_fixture(**kwargs) + self.lib.add_album([item]) + item.store() + return item + + def _import( + self, action="skip", enabled=True, resolution=None, track_action=None + ): + self.setup_importer( + autotag=False, + duplicate_track_resolution=enabled, + duplicate_action=action, + duplicate_track_action=track_action or "", + ) + if resolution is not None: + self.importer.default_resolution = resolution + self.importer.run() + + def test_skip_singleton_dup_imports_remainder_as_new_album(self): + # The matching track is a singleton, so there is no single album to + # fold into: the duplicate is dropped and the remaining track is + # imported as its own album. + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + self._import(action="skip") + + assert len(self.lib.albums()) == 1 + assert {i.title for i in self.lib.items()} == { + "Tag Track 1", + "Tag Track 2", + } + assert len(self.lib.items()) == 2 + + def test_skip_folds_missing_tracks_into_existing_album(self): + # Import the album fully, then lose one track from the library and + # re-import the same folder. The present track is skipped as a + # duplicate and the missing one is folded back into the *same* album + # (no second album is created). + self._import(action="skip") + album = self.lib.albums().get() + assert {i.title for i in album.items()} == { + "Tag Track 1", + "Tag Track 2", + } + + missing = self.lib.items("title:'Tag Track 2'").get() + missing.remove(delete=True) + assert {i.title for i in self.lib.items()} == {"Tag Track 1"} + + self._import(action="skip") + + assert len(self.lib.albums()) == 1 + album = self.lib.albums().get() + folded = self.lib.items("title:'Tag Track 2'").get() + assert folded.album_id == album.id + assert folded.filepath.exists() + assert {i.title for i in album.items()} == { + "Tag Track 1", + "Tag Track 2", + } + + def test_skip_matches_existing_album_member(self): + # A matching track that already belongs to an album in the library + # (not a singleton) must still be caught, and the remaining new track + # folded into that album. + item = self.add_album_member_fixture( + artist="Tag Artist", title="Tag Track 1" + ) + assert item.album_id is not None + + self._import(action="skip") + + # The duplicate "Tag Track 1" is dropped; "Tag Track 2" is folded into + # the existing album. + assert len(self.lib.albums()) == 1 + album = self.lib.albums().get() + assert {i.title for i in album.items()} == { + "Tag Track 1", + "Tag Track 2", + } + + def test_skip_all_duplicates_skips_album(self): + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + self.add_item_fixture(artist="Tag Artist", title="Tag Track 2") + + self._import(action="skip") + + # Every track is a duplicate, so the whole album is skipped. + assert len(self.lib.albums()) == 0 + assert len(self.lib.items()) == 2 + + def test_remove_replaces_old_item(self): + old = self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + assert old.filepath.exists() + + self._import(action="remove") + + # The old matching item (and its file) is removed; both album tracks + # are imported. + assert not old.filepath.exists() + assert sorted(i.title for i in self.lib.items()) == [ + "Tag Track 1", + "Tag Track 2", + ] + assert len(self.lib.albums()) == 1 + + def test_keep_imports_all(self): + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + self._import(action="keep") + + # Nothing is dropped or removed. + assert len(self.lib.items()) == 3 + assert len(self.lib.albums()) == 1 + + def test_disabled_by_default(self): + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + self._import(action="skip", enabled=False) + + # With the option off, no track-level resolution happens. + assert len(self.lib.items()) == 3 + + def test_ask_skip_drops_duplicate_track(self): + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + # With "ask", the session is prompted; answer SKIP. + self._import( + action="ask", resolution=ImportSessionFixture.Resolution.SKIP + ) + + assert len(self.lib.albums()) == 1 + assert len(self.lib.items()) == 2 + + def test_ask_remove_replaces_old_item(self): + old = self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + # With "ask", the session is prompted; answer REMOVE. + self._import( + action="ask", resolution=ImportSessionFixture.Resolution.REMOVE + ) + + assert not old.filepath.exists() + assert len(self.lib.albums()) == 1 + assert sorted(i.title for i in self.lib.items()) == [ + "Tag Track 1", + "Tag Track 2", + ] + + def test_inherits_duplicate_action_when_unset(self): + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + # No duplicate_track_action: should inherit duplicate_action=skip. + self._import(action="skip", track_action=None) + + assert len(self.lib.albums()) == 1 + assert {i.title for i in self.lib.items()} == { + "Tag Track 1", + "Tag Track 2", + } + + def test_track_action_overrides_duplicate_action(self): + self.add_item_fixture(artist="Tag Artist", title="Tag Track 1") + + # duplicate_action says keep, but the track-specific action skips. + self._import(action="keep", track_action="skip") + + # The duplicate was dropped (skip), not kept, so only two items exist. + assert len(self.lib.items()) == 2 + + def test_ask_skip_folds_into_existing_album(self): + self._import(action="skip") + missing = self.lib.items("title:'Tag Track 2'").get() + missing.remove(delete=True) + + # With "ask", the session is prompted; answer SKIP, which folds the + # remaining track into the existing album. + self._import( + action="ask", resolution=ImportSessionFixture.Resolution.SKIP + ) + + assert len(self.lib.albums()) == 1 + assert {i.title for i in self.lib.albums().get().items()} == { + "Tag Track 1", + "Tag Track 2", + } + + class TagLogTest(unittest.TestCase): def test_tag_log_line(self): sio = StringIO()