Importer updates: watch directories, handle metadata updates
This commit is contained in:
parent
2b5a2b39ac
commit
752c993e8e
16 changed files with 1005 additions and 120 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import datetime
|
||||
import hashlib
|
||||
|
||||
from django.core.files.base import ContentFile
|
||||
from django.http import request
|
||||
|
|
@ -458,3 +459,19 @@ def monkey_patch_request_build_absolute_uri():
|
|||
|
||||
request.HttpRequest.scheme = property(scheme)
|
||||
request.HttpRequest.get_host = get_host
|
||||
|
||||
|
||||
def get_file_hash(file, algo=None, chunk_size=None, full_read=False):
|
||||
algo = algo or settings.HASHING_ALGORITHM
|
||||
chunk_size = chunk_size or settings.HASHING_CHUNK_SIZE
|
||||
handler = getattr(hashlib, algo)
|
||||
hash = handler()
|
||||
file.seek(0)
|
||||
if full_read:
|
||||
for byte_block in iter(lambda: file.read(chunk_size), b""):
|
||||
hash.update(byte_block)
|
||||
else:
|
||||
# sometimes, it's useful to only hash the beginning of the file, e.g
|
||||
# to avoid a lot of I/O when crawling large libraries
|
||||
hash.update(file.read(chunk_size))
|
||||
return "{}:{}".format(algo, hash.hexdigest())
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand
|
|||
from django.db import transaction
|
||||
from django.db.models import Q
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music import models, utils
|
||||
|
||||
|
||||
|
|
@ -17,9 +18,9 @@ class Command(BaseCommand):
|
|||
help="Do not execute anything",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mimetypes",
|
||||
"--mimetype",
|
||||
action="store_true",
|
||||
dest="mimetypes",
|
||||
dest="mimetype",
|
||||
default=True,
|
||||
help="Check and fix mimetypes",
|
||||
)
|
||||
|
|
@ -37,16 +38,33 @@ class Command(BaseCommand):
|
|||
default=False,
|
||||
help="Check and fix file size, can be really slow because it needs to access files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--checksum",
|
||||
action="store_true",
|
||||
dest="checksum",
|
||||
default=False,
|
||||
help="Check and fix file size, can be really slow because it needs to access files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
"-s",
|
||||
dest="batch_size",
|
||||
default=1000,
|
||||
type=int,
|
||||
help="Size of each updated batch",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
if options["dry_run"]:
|
||||
self.stdout.write("Dry-run on, will not commit anything")
|
||||
if options["mimetypes"]:
|
||||
if options["mimetype"]:
|
||||
self.fix_mimetypes(**options)
|
||||
if options["data"]:
|
||||
self.fix_file_data(**options)
|
||||
if options["size"]:
|
||||
self.fix_file_size(**options)
|
||||
if options["checksum"]:
|
||||
self.fix_file_checksum(**options)
|
||||
|
||||
@transaction.atomic
|
||||
def fix_mimetypes(self, dry_run, **kwargs):
|
||||
|
|
@ -54,11 +72,12 @@ class Command(BaseCommand):
|
|||
matching = models.Upload.objects.filter(
|
||||
Q(source__startswith="file://") | Q(source__startswith="upload://")
|
||||
).exclude(mimetype__startswith="audio/")
|
||||
total = matching.count()
|
||||
self.stdout.write(
|
||||
"[mimetypes] {} entries found with bad or no mimetype".format(
|
||||
matching.count()
|
||||
)
|
||||
"[mimetypes] {} entries found with bad or no mimetype".format(total)
|
||||
)
|
||||
if not total:
|
||||
return
|
||||
for extension, mimetype in utils.EXTENSION_TO_MIMETYPE.items():
|
||||
qs = matching.filter(source__endswith=".{}".format(extension))
|
||||
self.stdout.write(
|
||||
|
|
@ -81,24 +100,36 @@ class Command(BaseCommand):
|
|||
)
|
||||
if dry_run:
|
||||
return
|
||||
for i, upload in enumerate(matching.only("audio_file")):
|
||||
self.stdout.write(
|
||||
"[bitrate/length] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
audio_file = upload.get_audio_file()
|
||||
if audio_file:
|
||||
chunks = common_utils.chunk_queryset(
|
||||
matching.only("id", "audio_file", "source"), kwargs["batch_size"]
|
||||
)
|
||||
handled = 0
|
||||
for chunk in chunks:
|
||||
updated = []
|
||||
for upload in chunk:
|
||||
handled += 1
|
||||
self.stdout.write(
|
||||
"[bitrate/length] {}/{} fixing file #{}".format(
|
||||
handled, total, upload.pk
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
audio_file = upload.get_audio_file()
|
||||
data = utils.get_audio_file_data(audio_file)
|
||||
upload.bitrate = data["bitrate"]
|
||||
upload.duration = data["length"]
|
||||
upload.save(update_fields=["duration", "bitrate"])
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[bitrate/length] error with file #{}: {}".format(
|
||||
upload.pk, str(e)
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.stderr.write("[bitrate/length] no file found")
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[bitrate/length] error with file #{}: {}".format(upload.pk, str(e))
|
||||
)
|
||||
updated.append(upload)
|
||||
|
||||
models.Upload.objects.bulk_update(updated, ["bitrate", "duration"])
|
||||
|
||||
def fix_file_size(self, dry_run, **kwargs):
|
||||
self.stdout.write("Fixing missing size...")
|
||||
|
|
@ -107,15 +138,64 @@ class Command(BaseCommand):
|
|||
self.stdout.write("[size] {} entries found with missing values".format(total))
|
||||
if dry_run:
|
||||
return
|
||||
for i, upload in enumerate(matching.only("size")):
|
||||
self.stdout.write(
|
||||
"[size] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
upload.size = upload.get_file_size()
|
||||
upload.save(update_fields=["size"])
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[size] error with file #{}: {}".format(upload.pk, str(e))
|
||||
chunks = common_utils.chunk_queryset(
|
||||
matching.only("id", "audio_file", "source"), kwargs["batch_size"]
|
||||
)
|
||||
handled = 0
|
||||
for chunk in chunks:
|
||||
updated = []
|
||||
for upload in chunk:
|
||||
handled += 1
|
||||
|
||||
self.stdout.write(
|
||||
"[size] {}/{} fixing file #{}".format(handled, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
upload.size = upload.get_file_size()
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[size] error with file #{}: {}".format(upload.pk, str(e))
|
||||
)
|
||||
else:
|
||||
updated.append(upload)
|
||||
|
||||
models.Upload.objects.bulk_update(updated, ["size"])
|
||||
|
||||
def fix_file_checksum(self, dry_run, **kwargs):
|
||||
self.stdout.write("Fixing missing checksums...")
|
||||
matching = models.Upload.objects.filter(
|
||||
Q(checksum=None)
|
||||
& (Q(audio_file__isnull=False) | Q(source__startswith="file://"))
|
||||
)
|
||||
total = matching.count()
|
||||
self.stdout.write(
|
||||
"[checksum] {} entries found with missing values".format(total)
|
||||
)
|
||||
if dry_run:
|
||||
return
|
||||
chunks = common_utils.chunk_queryset(
|
||||
matching.only("id", "audio_file", "source"), kwargs["batch_size"]
|
||||
)
|
||||
handled = 0
|
||||
for chunk in chunks:
|
||||
updated = []
|
||||
for upload in chunk:
|
||||
handled += 1
|
||||
self.stdout.write(
|
||||
"[checksum] {}/{} fixing file #{}".format(handled, total, upload.pk)
|
||||
)
|
||||
|
||||
try:
|
||||
upload.checksum = common_utils.get_file_hash(
|
||||
upload.get_audio_file()
|
||||
)
|
||||
except Exception as e:
|
||||
self.stderr.write(
|
||||
"[checksum] error with file #{}: {}".format(upload.pk, str(e))
|
||||
)
|
||||
else:
|
||||
updated.append(upload)
|
||||
|
||||
models.Upload.objects.bulk_update(updated, ["checksum"])
|
||||
|
|
|
|||
|
|
@ -1,17 +1,29 @@
|
|||
import collections
|
||||
import datetime
|
||||
import itertools
|
||||
import os
|
||||
import urllib.parse
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
import watchdog.events
|
||||
import watchdog.observers
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.files import File
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from funkwhale_api.common import utils as common_utils
|
||||
from funkwhale_api.music import models, tasks, utils
|
||||
|
||||
|
||||
def crawl_dir(dir, extensions, recursive=True):
|
||||
def crawl_dir(dir, extensions, recursive=True, ignored=[]):
|
||||
if os.path.isfile(dir):
|
||||
yield dir
|
||||
return
|
||||
|
|
@ -20,9 +32,12 @@ def crawl_dir(dir, extensions, recursive=True):
|
|||
if entry.is_file():
|
||||
for e in extensions:
|
||||
if entry.name.lower().endswith(".{}".format(e.lower())):
|
||||
yield entry.path
|
||||
if entry.path not in ignored:
|
||||
yield entry.path
|
||||
elif recursive and entry.is_dir():
|
||||
yield from crawl_dir(entry, extensions, recursive=recursive)
|
||||
yield from crawl_dir(
|
||||
entry, extensions, recursive=recursive, ignored=ignored
|
||||
)
|
||||
|
||||
|
||||
def batch(iterable, n=1):
|
||||
|
|
@ -116,6 +131,17 @@ class Command(BaseCommand):
|
|||
"of overhead on your server and on servers you are federating with."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--watch",
|
||||
action="store_true",
|
||||
dest="watch",
|
||||
default=False,
|
||||
help=(
|
||||
"Start the command in watch mode. Instead of running a full import, "
|
||||
"and exit, watch the given path and import new files, remove deleted "
|
||||
"files, and update metadata corresponding to updated files."
|
||||
),
|
||||
)
|
||||
parser.add_argument("-e", "--extension", nargs="+")
|
||||
|
||||
parser.add_argument(
|
||||
|
|
@ -128,6 +154,15 @@ class Command(BaseCommand):
|
|||
"This causes some overhead, so it's disabled by default."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prune",
|
||||
action="store_true",
|
||||
dest="prune",
|
||||
default=False,
|
||||
help=(
|
||||
"Once the import is completed, prune tracks, ablums and artists that aren't linked to any upload."
|
||||
),
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--reference",
|
||||
|
|
@ -157,6 +192,8 @@ class Command(BaseCommand):
|
|||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# handle relative directories
|
||||
options["path"] = [os.path.abspath(path) for path in options["path"]]
|
||||
self.is_confirmed = False
|
||||
try:
|
||||
library = models.Library.objects.select_related("actor__user").get(
|
||||
|
|
@ -182,22 +219,12 @@ class Command(BaseCommand):
|
|||
)
|
||||
if p and not import_path.startswith(p):
|
||||
raise CommandError(
|
||||
"Importing in-place only works if importing"
|
||||
"Importing in-place only works if importing "
|
||||
"from {} (MUSIC_DIRECTORY_PATH), as this directory"
|
||||
"needs to be accessible by the webserver."
|
||||
"Culprit: {}".format(p, import_path)
|
||||
)
|
||||
|
||||
extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
|
||||
crawler = itertools.chain(
|
||||
*[
|
||||
crawl_dir(p, extensions=extensions, recursive=options["recursive"])
|
||||
for p in options["path"]
|
||||
]
|
||||
)
|
||||
errors = []
|
||||
total = 0
|
||||
start_time = time.time()
|
||||
reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())
|
||||
|
||||
import_url = "{}://{}/library/{}/upload?{}"
|
||||
|
|
@ -212,8 +239,62 @@ class Command(BaseCommand):
|
|||
reference, import_url
|
||||
)
|
||||
)
|
||||
extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
|
||||
if options["watch"]:
|
||||
if len(options["path"]) > 1:
|
||||
raise CommandError("Watch only work with a single directory")
|
||||
|
||||
return self.setup_watcher(
|
||||
extensions=extensions,
|
||||
path=options["path"][0],
|
||||
reference=reference,
|
||||
library=library,
|
||||
in_place=options["in_place"],
|
||||
prune=options["prune"],
|
||||
recursive=options["recursive"],
|
||||
replace=options["replace"],
|
||||
dispatch_outbox=options["outbox"],
|
||||
broadcast=options["broadcast"],
|
||||
)
|
||||
|
||||
update = True
|
||||
checked_paths = set()
|
||||
if options["in_place"] and update:
|
||||
self.stdout.write("Checking existing files for updates…")
|
||||
message = (
|
||||
"Are you sure you want to do this?\n\n"
|
||||
"Type 'yes' to continue, or 'no' to skip checking for updates in "
|
||||
"already imported files: "
|
||||
)
|
||||
if options["interactive"] and input("".join(message)) != "yes":
|
||||
pass
|
||||
else:
|
||||
checked_paths = check_updates(
|
||||
stdout=self.stdout,
|
||||
paths=options["path"],
|
||||
extensions=extensions,
|
||||
library=library,
|
||||
batch_size=options["batch_size"],
|
||||
)
|
||||
self.stdout.write("Existing files checked, moving on to next step!")
|
||||
|
||||
crawler = itertools.chain(
|
||||
*[
|
||||
crawl_dir(
|
||||
p,
|
||||
extensions=extensions,
|
||||
recursive=options["recursive"],
|
||||
ignored=checked_paths,
|
||||
)
|
||||
for p in options["path"]
|
||||
]
|
||||
)
|
||||
errors = []
|
||||
total = 0
|
||||
start_time = time.time()
|
||||
batch_start = None
|
||||
batch_duration = None
|
||||
self.stdout.write("Starting import of new files…")
|
||||
for i, entries in enumerate(batch(crawler, options["batch_size"])):
|
||||
total += len(entries)
|
||||
batch_start = time.time()
|
||||
|
|
@ -225,7 +306,7 @@ class Command(BaseCommand):
|
|||
if entries:
|
||||
self.stdout.write(
|
||||
"Handling batch {} ({} items){}".format(
|
||||
i + 1, options["batch_size"], time_stats,
|
||||
i + 1, len(entries), time_stats,
|
||||
)
|
||||
)
|
||||
batch_errors = self.handle_batch(
|
||||
|
|
@ -240,9 +321,9 @@ class Command(BaseCommand):
|
|||
|
||||
batch_duration = time.time() - batch_start
|
||||
|
||||
message = "Successfully imported {} tracks in {}s"
|
||||
message = "Successfully imported {} new tracks in {}s"
|
||||
if options["async_"]:
|
||||
message = "Successfully launched import for {} tracks in {}s"
|
||||
message = "Successfully launched import for {} new tracks in {}s"
|
||||
|
||||
self.stdout.write(
|
||||
message.format(total - len(errors), int(time.time() - start_time))
|
||||
|
|
@ -259,6 +340,12 @@ class Command(BaseCommand):
|
|||
)
|
||||
)
|
||||
|
||||
if options["prune"]:
|
||||
self.stdout.write(
|
||||
"Pruning dangling tracks, albums and artists from library…"
|
||||
)
|
||||
prune()
|
||||
|
||||
def handle_batch(self, library, paths, batch, reference, options):
|
||||
matching = []
|
||||
for m in paths:
|
||||
|
|
@ -362,15 +449,15 @@ class Command(BaseCommand):
|
|||
message.format(batch=batch, path=path, i=i + 1, total=len(paths))
|
||||
)
|
||||
try:
|
||||
self.create_upload(
|
||||
path,
|
||||
reference,
|
||||
library,
|
||||
async_,
|
||||
options["replace"],
|
||||
options["in_place"],
|
||||
options["outbox"],
|
||||
options["broadcast"],
|
||||
create_upload(
|
||||
path=path,
|
||||
reference=reference,
|
||||
library=library,
|
||||
async_=async_,
|
||||
replace=options["replace"],
|
||||
in_place=options["in_place"],
|
||||
dispatch_outbox=options["outbox"],
|
||||
broadcast=options["broadcast"],
|
||||
)
|
||||
except Exception as e:
|
||||
if options["exit_on_failure"]:
|
||||
|
|
@ -382,34 +469,311 @@ class Command(BaseCommand):
|
|||
errors.append((path, "{} {}".format(e.__class__.__name__, e)))
|
||||
return errors
|
||||
|
||||
def create_upload(
|
||||
self,
|
||||
path,
|
||||
reference,
|
||||
library,
|
||||
async_,
|
||||
replace,
|
||||
in_place,
|
||||
dispatch_outbox,
|
||||
broadcast,
|
||||
):
|
||||
import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
|
||||
upload = models.Upload(library=library, import_reference=reference)
|
||||
upload.source = "file://" + path
|
||||
upload.import_metadata = {
|
||||
"funkwhale": {
|
||||
"config": {
|
||||
"replace": replace,
|
||||
"dispatch_outbox": dispatch_outbox,
|
||||
"broadcast": broadcast,
|
||||
}
|
||||
def setup_watcher(self, path, extensions, recursive, **kwargs):
|
||||
watchdog_queue = queue.Queue()
|
||||
# Set up a worker thread to process database load
|
||||
worker = threading.Thread(
|
||||
target=process_load_queue(self.stdout, **kwargs), args=(watchdog_queue,),
|
||||
)
|
||||
worker.setDaemon(True)
|
||||
worker.start()
|
||||
|
||||
# setup watchdog to monitor directory for trigger files
|
||||
patterns = ["*.{}".format(e) for e in extensions]
|
||||
event_handler = Watcher(
|
||||
stdout=self.stdout, queue=watchdog_queue, patterns=patterns,
|
||||
)
|
||||
observer = watchdog.observers.Observer()
|
||||
observer.schedule(event_handler, path, recursive=recursive)
|
||||
observer.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
self.stdout.write(
|
||||
"Watching for changes at {}…".format(path), ending="\r"
|
||||
)
|
||||
time.sleep(10)
|
||||
if kwargs["prune"] and GLOBAL["need_pruning"]:
|
||||
self.stdout.write("Some files were deleted, pruning library…")
|
||||
prune()
|
||||
GLOBAL["need_pruning"] = False
|
||||
except KeyboardInterrupt:
|
||||
self.stdout.write("Exiting…")
|
||||
observer.stop()
|
||||
|
||||
observer.join()
|
||||
|
||||
|
||||
GLOBAL = {"need_pruning": False}
|
||||
|
||||
|
||||
def prune():
|
||||
call_command(
|
||||
"prune_library",
|
||||
dry_run=False,
|
||||
prune_artists=True,
|
||||
prune_albums=True,
|
||||
prune_tracks=True,
|
||||
)
|
||||
|
||||
|
||||
def create_upload(
|
||||
path, reference, library, async_, replace, in_place, dispatch_outbox, broadcast,
|
||||
):
|
||||
import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
|
||||
upload = models.Upload(library=library, import_reference=reference)
|
||||
upload.source = "file://" + path
|
||||
upload.import_metadata = {
|
||||
"funkwhale": {
|
||||
"config": {
|
||||
"replace": replace,
|
||||
"dispatch_outbox": dispatch_outbox,
|
||||
"broadcast": broadcast,
|
||||
}
|
||||
}
|
||||
if not in_place:
|
||||
name = os.path.basename(path)
|
||||
with open(path, "rb") as f:
|
||||
upload.audio_file.save(name, File(f), save=False)
|
||||
}
|
||||
if not in_place:
|
||||
name = os.path.basename(path)
|
||||
with open(path, "rb") as f:
|
||||
upload.audio_file.save(name, File(f), save=False)
|
||||
|
||||
upload.save()
|
||||
upload.save()
|
||||
|
||||
import_handler(upload_id=upload.pk)
|
||||
import_handler(upload_id=upload.pk)
|
||||
|
||||
|
||||
def process_load_queue(stdout, **kwargs):
|
||||
def inner(q):
|
||||
# we batch events, to avoid calling same methods multiple times if a file is modified
|
||||
# a lot in a really short time
|
||||
flush_delay = 2
|
||||
batched_events = collections.OrderedDict()
|
||||
while True:
|
||||
while True:
|
||||
if not q.empty():
|
||||
event = q.get()
|
||||
batched_events[event["path"]] = event
|
||||
else:
|
||||
break
|
||||
for path, event in batched_events.copy().items():
|
||||
if time.time() - event["time"] <= flush_delay:
|
||||
continue
|
||||
now = datetime.datetime.utcnow()
|
||||
stdout.write(
|
||||
"{} -- Processing {}:{}...\n".format(
|
||||
now.strftime("%Y/%m/%d %H:%M:%S"), event["type"], event["path"]
|
||||
)
|
||||
)
|
||||
del batched_events[path]
|
||||
handle_event(event, stdout=stdout, **kwargs)
|
||||
time.sleep(1)
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
class Watcher(watchdog.events.PatternMatchingEventHandler):
|
||||
def __init__(self, stdout, queue, patterns):
|
||||
self.stdout = stdout
|
||||
self.queue = queue
|
||||
super().__init__(patterns=patterns)
|
||||
|
||||
def enqueue(self, event):
|
||||
e = {
|
||||
"is_directory": event.is_directory,
|
||||
"type": event.event_type,
|
||||
"path": event.src_path,
|
||||
"src_path": event.src_path,
|
||||
"dest_path": getattr(event, "dest_path", None),
|
||||
"time": time.time(),
|
||||
}
|
||||
self.queue.put(e)
|
||||
|
||||
def on_moved(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
def on_created(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
def on_deleted(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
def on_modified(self, event):
|
||||
self.enqueue(event)
|
||||
|
||||
|
||||
def handle_event(event, stdout, **kwargs):
|
||||
handlers = {
|
||||
"modified": handle_modified,
|
||||
"created": handle_created,
|
||||
"moved": handle_moved,
|
||||
"deleted": handle_deleted,
|
||||
}
|
||||
handlers[event["type"]](event=event, stdout=stdout, **kwargs)
|
||||
|
||||
|
||||
def handle_modified(event, stdout, library, in_place, **kwargs):
|
||||
existing_candidates = library.uploads.filter(import_status="finished")
|
||||
with open(event["path"], "rb") as f:
|
||||
checksum = common_utils.get_file_hash(f)
|
||||
|
||||
existing = existing_candidates.filter(checksum=checksum).first()
|
||||
if existing:
|
||||
# found an existing file with same checksum, nothing to do
|
||||
stdout.write(" File already imported and metadata is up-to-date")
|
||||
return
|
||||
|
||||
to_update = None
|
||||
if in_place:
|
||||
source = "file://{}".format(event["path"])
|
||||
to_update = (
|
||||
existing_candidates.in_place()
|
||||
.filter(source=source)
|
||||
.select_related(
|
||||
"track__attributed_to", "track__artist", "track__album__artist",
|
||||
)
|
||||
.first()
|
||||
)
|
||||
if to_update:
|
||||
if (
|
||||
to_update.track.attributed_to
|
||||
and to_update.track.attributed_to != library.actor
|
||||
):
|
||||
stdout.write(
|
||||
" Cannot update track metadata, track belongs to someone else".format(
|
||||
to_update.pk
|
||||
)
|
||||
)
|
||||
return
|
||||
else:
|
||||
stdout.write(
|
||||
" Updating existing file #{} with new metadata…".format(
|
||||
to_update.pk
|
||||
)
|
||||
)
|
||||
audio_metadata = to_update.get_metadata()
|
||||
try:
|
||||
tasks.update_track_metadata(audio_metadata, to_update.track)
|
||||
except serializers.ValidationError as e:
|
||||
stdout.write(" Invalid metadata: {}".format(e))
|
||||
else:
|
||||
to_update.checksum = checksum
|
||||
to_update.save(update_fields=["checksum"])
|
||||
return
|
||||
|
||||
stdout.write(" Launching import for new file")
|
||||
create_upload(
|
||||
path=event["path"],
|
||||
reference=kwargs["reference"],
|
||||
library=library,
|
||||
async_=False,
|
||||
replace=kwargs["replace"],
|
||||
in_place=in_place,
|
||||
dispatch_outbox=kwargs["dispatch_outbox"],
|
||||
broadcast=kwargs["broadcast"],
|
||||
)
|
||||
|
||||
|
||||
def handle_created(event, stdout, **kwargs):
|
||||
"""
|
||||
Created is essentially an alias for modified, because for instance when copying a file in the watched directory,
|
||||
a created event will be fired on the initial touch, then many modified event (as the file is written).
|
||||
"""
|
||||
return handle_modified(event, stdout, **kwargs)
|
||||
|
||||
|
||||
def handle_moved(event, stdout, library, in_place, **kwargs):
|
||||
if not in_place:
|
||||
return
|
||||
|
||||
old_source = "file://{}".format(event["src_path"])
|
||||
new_source = "file://{}".format(event["dest_path"])
|
||||
existing_candidates = library.uploads.filter(import_status="finished")
|
||||
existing_candidates = existing_candidates.in_place().filter(source=old_source)
|
||||
existing = existing_candidates.first()
|
||||
if existing:
|
||||
stdout.write(" Updating path of existing file #{}".format(existing.pk))
|
||||
existing.source = new_source
|
||||
existing.save(update_fields=["source"])
|
||||
|
||||
|
||||
def handle_deleted(event, stdout, library, in_place, **kwargs):
|
||||
if not in_place:
|
||||
return
|
||||
source = "file://{}".format(event["path"])
|
||||
existing_candidates = library.uploads.filter(import_status="finished")
|
||||
existing_candidates = existing_candidates.in_place().filter(source=source)
|
||||
if existing_candidates.count():
|
||||
stdout.write(" Removing file from DB")
|
||||
existing_candidates.delete()
|
||||
GLOBAL["need_pruning"] = True
|
||||
|
||||
|
||||
def check_updates(stdout, library, extensions, paths, batch_size):
|
||||
existing = (
|
||||
library.uploads.in_place()
|
||||
.filter(import_status="finished")
|
||||
.exclude(checksum=None)
|
||||
.select_related("library", "track")
|
||||
)
|
||||
queries = []
|
||||
checked_paths = set()
|
||||
for path in paths:
|
||||
for ext in extensions:
|
||||
queries.append(
|
||||
Q(source__startswith="file://{}".format(path))
|
||||
& Q(source__endswith=".{}".format(ext))
|
||||
)
|
||||
query, remainder = queries[0], queries[1:]
|
||||
for q in remainder:
|
||||
query = q | query
|
||||
existing = existing.filter(query)
|
||||
total = existing.count()
|
||||
stdout.write("Found {} files to check in database!".format(total))
|
||||
uploads = existing.order_by("source")
|
||||
for i, rows in enumerate(batch(uploads.iterator(), batch_size)):
|
||||
stdout.write("Handling batch {} ({} items)".format(i + 1, len(rows),))
|
||||
|
||||
for upload in rows:
|
||||
|
||||
check_upload(stdout, upload)
|
||||
checked_paths.add(upload.source.replace("file://", "", 1))
|
||||
|
||||
return checked_paths
|
||||
|
||||
|
||||
def check_upload(stdout, upload):
|
||||
try:
|
||||
audio_file = upload.get_audio_file()
|
||||
except FileNotFoundError:
|
||||
stdout.write(
|
||||
" Removing file #{} missing from disk at {}".format(
|
||||
upload.pk, upload.source
|
||||
)
|
||||
)
|
||||
return upload.delete()
|
||||
|
||||
checksum = common_utils.get_file_hash(audio_file)
|
||||
if upload.checksum != checksum:
|
||||
stdout.write(
|
||||
" File #{} at {} was modified, updating metadata…".format(
|
||||
upload.pk, upload.source
|
||||
)
|
||||
)
|
||||
if upload.library.actor_id != upload.track.attributed_to_id:
|
||||
stdout.write(
|
||||
" Cannot update track metadata, track belongs to someone else".format(
|
||||
upload.pk
|
||||
)
|
||||
)
|
||||
else:
|
||||
track = models.Track.objects.select_related("artist", "album__artist").get(
|
||||
pk=upload.track_id
|
||||
)
|
||||
try:
|
||||
tasks.update_track_metadata(upload.get_metadata(), track)
|
||||
except serializers.ValidationError as e:
|
||||
stdout.write(" Invalid metadata: {}".format(e))
|
||||
return
|
||||
else:
|
||||
upload.checksum = checksum
|
||||
upload.save(update_fields=["checksum"])
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
# Generated by Django 3.0.4 on 2020-05-05 08:10
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('music', '0051_auto_20200319_1249'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='upload',
|
||||
name='checksum',
|
||||
field=models.CharField(blank=True, db_index=True, max_length=100, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='uploadversion',
|
||||
name='mimetype',
|
||||
field=models.CharField(choices=[('audio/mp3', 'mp3'), ('audio/mpeg3', 'mp3'), ('audio/x-mp3', 'mp3'), ('audio/mpeg', 'mp3'), ('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
|
||||
),
|
||||
]
|
||||
|
|
@ -655,6 +655,14 @@ class Track(APIModelMixin):
|
|||
|
||||
|
||||
class UploadQuerySet(common_models.NullsLastQuerySet):
|
||||
def in_place(self, include=True):
|
||||
query = models.Q(source__startswith="file://") & (
|
||||
models.Q(audio_file="") | models.Q(audio_file=None)
|
||||
)
|
||||
if not include:
|
||||
query = ~query
|
||||
return self.filter(query)
|
||||
|
||||
def playable_by(self, actor, include=True):
|
||||
libraries = Library.objects.viewable_by(actor)
|
||||
|
||||
|
|
@ -754,6 +762,9 @@ class Upload(models.Model):
|
|||
)
|
||||
downloads_count = models.PositiveIntegerField(default=0)
|
||||
|
||||
# stores checksums such as `sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
|
||||
checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True)
|
||||
|
||||
objects = UploadQuerySet.as_manager()
|
||||
|
||||
@property
|
||||
|
|
@ -833,7 +844,7 @@ class Upload(models.Model):
|
|||
def get_audio_file(self):
|
||||
if self.audio_file:
|
||||
return self.audio_file.open()
|
||||
if self.source.startswith("file://"):
|
||||
if self.source and self.source.startswith("file://"):
|
||||
return open(self.source.replace("file://", "", 1), "rb")
|
||||
|
||||
def get_audio_data(self):
|
||||
|
|
@ -866,6 +877,15 @@ class Upload(models.Model):
|
|||
self.mimetype = mimetypes.guess_type(self.source)[0]
|
||||
if not self.size and self.audio_file:
|
||||
self.size = self.audio_file.size
|
||||
if not self.checksum:
|
||||
try:
|
||||
audio_file = self.get_audio_file()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
if audio_file:
|
||||
self.checksum = common_utils.get_file_hash(audio_file)
|
||||
|
||||
if not self.pk and not self.fid and self.library.actor.get_user():
|
||||
self.fid = self.get_federation_id()
|
||||
return super().save(**kwargs)
|
||||
|
|
|
|||
|
|
@ -851,3 +851,71 @@ def update_library_entity(obj, data):
|
|||
obj.save(update_fields=list(data.keys()))
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
UPDATE_CONFIG = {
|
||||
"track": {
|
||||
"position": {},
|
||||
"title": {},
|
||||
"mbid": {},
|
||||
"disc_number": {},
|
||||
"copyright": {},
|
||||
"license": {
|
||||
"getter": lambda data, field: licenses.match(
|
||||
data.get("license"), data.get("copyright")
|
||||
)
|
||||
},
|
||||
},
|
||||
"album": {"title": {}, "mbid": {}, "release_date": {}},
|
||||
"artist": {"name": {}, "mbid": {}},
|
||||
"album_artist": {"name": {}, "mbid": {}},
|
||||
}
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def update_track_metadata(audio_metadata, track):
|
||||
# XXX: implement this to support updating metadata when an imported files
|
||||
# is updated by an outside tool (e.g beets).
|
||||
serializer = metadata.TrackMetadataSerializer(data=audio_metadata)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
new_data = serializer.validated_data
|
||||
|
||||
to_update = [
|
||||
("track", track, lambda data: data),
|
||||
("album", track.album, lambda data: data["album"]),
|
||||
("artist", track.artist, lambda data: data["artists"][0]),
|
||||
(
|
||||
"album_artist",
|
||||
track.album.artist if track.album else None,
|
||||
lambda data: data["album"]["artists"][0],
|
||||
),
|
||||
]
|
||||
for id, obj, data_getter in to_update:
|
||||
if not obj:
|
||||
continue
|
||||
obj_updated_fields = []
|
||||
try:
|
||||
obj_data = data_getter(new_data)
|
||||
except IndexError:
|
||||
continue
|
||||
for field, config in UPDATE_CONFIG[id].items():
|
||||
getter = config.get(
|
||||
"getter", lambda data, field: data[config.get("field", field)]
|
||||
)
|
||||
try:
|
||||
new_value = getter(obj_data, field)
|
||||
except KeyError:
|
||||
continue
|
||||
old_value = getattr(obj, field)
|
||||
if new_value == old_value:
|
||||
continue
|
||||
obj_updated_fields.append(field)
|
||||
setattr(obj, field, new_value)
|
||||
|
||||
if obj_updated_fields:
|
||||
obj.save(update_fields=obj_updated_fields)
|
||||
|
||||
if track.album and "album" in new_data and new_data["album"].get("cover_data"):
|
||||
common_utils.attach_file(
|
||||
track.album, "attachment_cover", new_data["album"].get("cover_data")
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue