Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions beets/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ import:
item: artist title
duplicate_action: ask
duplicate_verbose_prompt: no
duplicate_track_resolution: no
duplicate_track_action: ''
bell: no
set_fields: {}
ignored_alias_types: []
Expand Down
12 changes: 12 additions & 0 deletions beets/importer/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,14 @@ def choose_match(self, task: ImportTask):
def resolve_duplicate(self, task: ImportTask, found_duplicates):
raise NotImplementedError

def resolve_track_duplicates(self, task: ImportTask, duplicates) -> str:
"""Decide what to do with album tracks that already exist in the
library. Return ``"s"`` (skip the duplicate tracks and fold the
remaining new tracks into the existing album), ``"k"`` (keep all) or
``"r"`` (remove the old items).
"""
raise NotImplementedError

def choose_item(self, task: ImportTask):
raise NotImplementedError

Expand All @@ -205,6 +213,10 @@ def run(self):
# Split directory tasks into one task for each album.
stages += [stagefuncs.group_albums(self)]

# Optionally drop or replace album tracks that already exist in
# the library before the autotag lookup runs.
stages += [stagefuncs.resolve_track_duplicates(self)]

# These stages either talk to the user to get a decision or,
# in the case of a non-autotagged import, just choose to
# import everything as-is. In *both* cases, these stages
Expand Down
143 changes: 142 additions & 1 deletion beets/importer/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import logging
from typing import TYPE_CHECKING

from beets import config, plugins
from beets import config, dbcore, plugins
from beets.util import MoveOperation, displayable_path, pipeline
from beets.util.color import colorize

from .tasks import (
Action,
Expand Down Expand Up @@ -127,6 +128,99 @@ def group(item):
task = pipeline.multiple(tasks)


@pipeline.mutator_stage
def resolve_track_duplicates(session: ImportSession, task: ImportTask):
"""Resolve tracks of an album that already exist in the library.

When ``import.duplicate_track_resolution`` is enabled, each item of an
album import is checked against the library using
``import.duplicate_keys.item``. Matched tracks are resolved according to
``import.duplicate_track_action`` (which falls back to
``import.duplicate_action`` when unset):

* ``skip`` drops the duplicate tracks and adds the remaining new tracks to
the existing album they belong to (if every track is a duplicate, the
whole album is skipped);
* ``remove`` removes the matching old library items;
* ``keep`` (and ``merge``) import everything as-is;
* ``ask`` prompts the session for one of the above.

This runs before :func:`lookup_candidates` so that dropped tracks are
excluded from the autotag match. Singleton imports are handled by the
regular duplicate resolution and are ignored here.
"""
if (
task.skip
or not task.is_album
or not task.items
or not config["import"]["duplicate_track_resolution"].get(bool)
):
return

keys = config["import"]["duplicate_keys"]["item"].as_str_seq()
if not keys:
return

# Map each incoming item to the existing library items it duplicates.
duplicates: dict[library.Item, list[library.Item]] = {}
for item in task.items:
if not any(item.get(k) for k in keys):
continue
matches = _find_track_duplicates(session.lib, item, keys)
if matches:
duplicates[item] = matches

if not duplicates:
return

action = _track_duplicate_action()
if action == "a":
action = session.resolve_track_duplicates(task, duplicates)

if action == "s":
for item in duplicates:
log.info(
colorize("text_warning", "Skipping duplicate track: {}"),
displayable_path(item.path),
)
task.items.remove(item)
if not task.items:
# Every track was a duplicate: skip the whole album.
log.info(
colorize(
"text_warning",
"Skipping album, all tracks are duplicates: {}",
),
next(iter(duplicates)).album,
)
task.set_choice(Action.SKIP)
return
# Only some tracks were duplicates; we have already dropped them, so
# don't let the album-level check skip the rest.
task.duplicate_tracks_resolved = True
# Fold the remaining new tracks into the existing album, if the
# matched duplicates all belong to a single one.
album_ids = {
match.album_id
for matches in duplicates.values()
for match in matches
if match.album_id is not None
}
if len(album_ids) == 1:
task.fold_into_album_id = album_ids.pop()
else:
log.warning(
"cannot fold tracks into a single existing album; "
"importing them as a new album"
)
elif action == "r":
for matches in duplicates.values():
task.duplicate_track_items_to_remove.extend(matches)
task.duplicate_tracks_resolved = True
# "k" (keep) and "m" (merge) leave the incoming tracks untouched; whole
# album duplicates are still handled by the regular resolution stage.


@pipeline.mutator_stage
def lookup_candidates(session: ImportSession, task: ImportTask):
"""A coroutine for performing the initial MusicBrainz lookup for an
Expand Down Expand Up @@ -283,6 +377,9 @@ def manipulate_files(session: ImportSession, task: ImportTask):
if task.should_remove_duplicates:
task.remove_duplicates(session.lib)

if task.duplicate_track_items_to_remove:
task.remove_duplicate_track_items(session.lib)

if session.config["move"]:
operation = MoveOperation.MOVE
elif session.config["copy"]:
Expand Down Expand Up @@ -333,10 +430,54 @@ def _apply_choice(session: ImportSession, task: ImportTask):
task.set_fields(session.lib)


def _track_duplicate_action() -> str:
"""Return the single-letter action for per-track duplicate resolution.

Uses ``import.duplicate_track_action`` when set, otherwise falls back to
``import.duplicate_action``.
"""
choices = {
"skip": "s",
"keep": "k",
"remove": "r",
"merge": "m",
"ask": "a",
}
cfg = config["import"]
view = (
cfg["duplicate_track_action"]
if cfg["duplicate_track_action"].get()
else cfg["duplicate_action"]
)
return view.as_choice(choices)


def _find_track_duplicates(
lib: library.Library, item: library.Item, keys: list[str]
) -> list[library.Item]:
"""Return library items matching `item` on all `keys`, excluding the
item itself (so re-imports do not match their own files).

Unlike :meth:`Item.duplicates_query`, this matches *every* library item,
including tracks that belong to an album -- not just singletons -- so a
track is caught regardless of how it was originally imported.
"""
query = dbcore.AndQuery(
[item.field_query(k, item.get(k), dbcore.MatchQuery) for k in keys]
)
return [other for other in lib.items(query) if other.path != item.path]


def _resolve_duplicates(session: ImportSession, task: ImportTask):
"""Check if a task conflicts with items or albums already imported
and ask the session to resolve this.
"""
if task.duplicate_tracks_resolved:
# Per-track duplicate resolution already pruned (or recorded for
# removal) the tracks of this album that exist in the library; the
# rest are new and should be imported without a whole-album skip.
return

if task.choice_flag in (Action.ASIS, Action.APPLY, Action.RETAG):
found_duplicates = task.find_duplicates(session.lib)
if found_duplicates:
Expand Down
75 changes: 57 additions & 18 deletions beets/importer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@
log = logging.getLogger("beets")


def _remove_duplicate_item(
lib: library.Library, item: library.Item, with_album: bool = True
):
"""Remove ``item`` from ``lib`` and delete its file when it lives inside
the library directory, pruning any newly-empty parent directories.
"""
item.remove(with_album=with_album)
if lib.directory in util.ancestry(item.path):
log.debug("deleting duplicate {.filepath}", item)
util.remove(item.path)
util.prune_dirs(
os.path.dirname(item.path),
lib.directory,
clutter=config["clutter"].as_str_seq(),
)


class ImportAbortError(Exception):
"""Raised when the user aborts the tagging operation."""

Expand Down Expand Up @@ -177,6 +194,16 @@ def __init__(
super().__init__(toppath, paths, items)
self.should_remove_duplicates = False
self.should_merge_duplicates = False
# Existing library items to remove because individual tracks of this
# album duplicate them (see ``duplicate_track_resolution``).
self.duplicate_track_items_to_remove: list[library.Item] = []
# Set once per-track duplicate resolution has handled this task, so the
# album-level duplicate check does not then skip the remaining tracks.
self.duplicate_tracks_resolved = False
# Id of an existing album to fold the imported items into (instead of
# creating a new album), set when skipping per-track duplicates leaves
# new tracks belonging to an existing album.
self.fold_into_album_id: int | None = None
self.is_album = True

def set_choice(self, choice: Action | AlbumMatch | TrackMatch):
Expand Down Expand Up @@ -272,15 +299,7 @@ def remove_duplicates(self, lib: library.Library):
artpath = album.artpath

for item in album.items():
item.remove(with_album=False)
if lib.directory in util.ancestry(item.path):
log.debug("deleting duplicate {.filepath}", item)
util.remove(item.path)
util.prune_dirs(
os.path.dirname(item.path),
lib.directory,
clutter=config["clutter"].as_str_seq(),
)
_remove_duplicate_item(lib, item, with_album=False)

album.remove(with_items=False)

Expand All @@ -293,6 +312,17 @@ def remove_duplicates(self, lib: library.Library):
clutter=config["clutter"].as_str_seq(),
)

def remove_duplicate_track_items(self, lib: library.Library):
"""Remove the old library items that individual tracks of this album
duplicate, as recorded in ``duplicate_track_items_to_remove``.
"""
seen: set[int] = set()
for item in self.duplicate_track_items_to_remove:
if item.id in seen:
continue
seen.add(item.id)
_remove_duplicate_item(lib, item)

def set_fields(self, lib: library.Library):
"""Sets the fields given at CLI or configuration to the specified
values, for both the album and all its items.
Expand Down Expand Up @@ -515,6 +545,23 @@ def add(self, lib: library.Library):
self.record_replaced(lib)
self.remove_replaced(lib)

fold_album = (
lib.get_album(self.fold_into_album_id)
if self.fold_into_album_id is not None
else None
)
if fold_album is not None:
# Fold the imported items into an existing album rather than
# creating a new one.
self.album = fold_album
for item in self.imported_items():
item.album_id = self.album.id
if item.id is None:
item.add(lib)
else:
item.store()
return

self.album = lib.add_album(self.imported_items())
if self.choice_flag == Action.APPLY and isinstance(
self.match, AlbumMatch
Expand Down Expand Up @@ -731,15 +778,7 @@ def remove_duplicates(self, lib: library.Library):
duplicate_items = self.find_duplicates(lib)
log.debug("removing {} old duplicated items", len(duplicate_items))
for item in duplicate_items:
item.remove()
if lib.directory in util.ancestry(item.path):
log.debug("deleting duplicate {.filepath}", item)
util.remove(item.path)
util.prune_dirs(
os.path.dirname(item.path),
lib.directory,
clutter=config["clutter"].as_str_seq(),
)
_remove_duplicate_item(lib, item)

def add(self, lib):
with lib.transaction():
Expand Down
12 changes: 12 additions & 0 deletions beets/test/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,18 @@ def resolve_duplicate(self, task, found_duplicates):
elif res == self.Resolution.MERGE:
task.should_merge_duplicates = True

def resolve_track_duplicates(self, task, duplicates):
try:
res = self._resolutions.pop(0)
except IndexError:
res = self.default_resolution

return {
self.Resolution.SKIP: "s",
self.Resolution.KEEPBOTH: "k",
self.Resolution.REMOVE: "r",
}.get(res, "k")


class TerminalImportSessionFixture(TerminalImportSession):
def __init__(self, *args, **kwargs):
Expand Down
22 changes: 22 additions & 0 deletions beets/ui/commands/import_/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,28 @@ def resolve_duplicate(self, task, found_duplicates):
else:
assert False

def resolve_track_duplicates(self, task, duplicates) -> str:
"""Decide what to do with album tracks already in the library."""
log.warning("Some tracks are already in the library!")

if config["import"]["quiet"]:
# In quiet mode, don't prompt -- just skip the duplicate tracks.
log.info("Skipping duplicate tracks.")
return "s"

existing = [item for matches in duplicates.values() for item in matches]
ui.print_("Old: " + summarize_items(existing, True))
if config["import"]["duplicate_verbose_prompt"]:
for item in existing:
print(f" {item}")

ui.print_("New: " + summarize_items(list(duplicates), True))
if config["import"]["duplicate_verbose_prompt"]:
for item in duplicates:
print(f" {item}")

return ui.input_options(("Skip dupes", "Keep all", "Remove old"))

def should_resume(self, path):
return ui.input_yn(
f"Import of the directory:\n{displayable_path(path)}\n"
Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ New features
:conf:`plugins.musicbrainz:aliases_as_credits` to make
aliases-as-artist-credit optional.
- :doc:`plugins/badfiles`: Added settings for auto error and warning actions.
- Add the :ref:`duplicate_track_resolution` import option, which checks each
track of an album import against the library (using the :ref:`duplicate_keys`
``item`` fields) and resolves matches via the new
:ref:`duplicate_track_action` option (falling back to :ref:`duplicate_action`
when unset). ``skip`` drops already-imported tracks and adds the remaining new
tracks to the existing album, completing a partially-imported album. Disabled
by default.

Bug fixes
~~~~~~~~~
Expand Down
Loading
Loading