Denormalized audio permission logic in a separate table to enhance performance

This commit is contained in:
Eliot Berriot 2019-10-19 18:10:42 +02:00
commit 4e7f1e63d2
No known key found for this signature in database
GPG key ID: 6B501DFD73514E14
17 changed files with 648 additions and 21 deletions

View file

@ -0,0 +1,59 @@
from argparse import RawTextHelpFormatter
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import transaction
from django.db.models import Q
from funkwhale_api.music.models import TrackActor, Library
from funkwhale_api.federation.models import Actor
class Command(BaseCommand):
help = """
Rebuild audio permission table. You shouldn't have to do this by hand, but if you face
any weird things (tracks still shown when they shouldn't, or tracks not shown when they should),
this may help.
"""
def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = RawTextHelpFormatter
return parser
def add_arguments(self, parser):
parser.add_argument(
"username",
nargs='*',
help="Rebuild only for given users",
)
@transaction.atomic
def handle(self, *args, **options):
actor_ids = []
if options['username']:
actors = Actor.objects.all().local(True)
actor_ids = list(actors.filter(preferred_username__in=options['username']).values_list('id', flat=True))
if len(actor_ids) < len(options['username']):
raise CommandError('Invalid username')
print('Emptying permission table for specified users…')
qs = TrackActor.objects.all().filter(Q(actor__pk__in=actor_ids) | Q(actor=None))
qs._raw_delete(qs.db)
else:
print('Emptying permission table…')
qs = TrackActor.objects.all()
qs._raw_delete(qs.db)
libraries = Library.objects.all()
objs = []
total_libraries = len(libraries)
for i, library in enumerate(libraries):
print('[{}/{}] Populating permission table for library {}'.format(i + 1, total_libraries, library.pk))
objs += TrackActor.get_objs(
library=library,
actor_ids=actor_ids,
upload_and_track_ids=[],
)
print('Commiting changes…')
TrackActor.objects.bulk_create(objs, batch_size=5000, ignore_conflicts=True)

View file

@ -0,0 +1,32 @@
# Generated by Django 2.2.5 on 2019-10-21 13:18
from django.conf import settings
import django.contrib.postgres.fields.jsonb
import django.core.serializers.json
from django.db import migrations, models
import django.db.models.deletion
import funkwhale_api.music.models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('music', '0039_auto_20190423_0820'),
]
operations = [
migrations.CreateModel(
name='TrackActor',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('internal', models.BooleanField(db_index=True, default=False)),
('track', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='track_actor_items', to='music.Track')),
('upload', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='track_actor_items', to='music.Upload')),
('actor', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='track_actor_items', to='federation.Actor')),
],
options={
'unique_together': {('track', 'actor', 'internal', 'upload')},
},
),
]

View file

@ -0,0 +1,47 @@
# Generated by Django 2.2.5 on 2019-10-21 17:05
import django.contrib.postgres.fields.jsonb
import django.core.serializers.json
from django.db import migrations, models
import django.db.models.deletion
import funkwhale_api.music.models
class Migration(migrations.Migration):
dependencies = [
('music', '0040_auto_20191021_1318'),
]
operations = [
migrations.AlterField(
model_name='album',
name='release_date',
field=models.DateField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name='upload',
name='from_activity',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='federation.Activity'),
),
migrations.AlterField(
model_name='upload',
name='import_details',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=funkwhale_api.music.models.empty_dict, encoder=django.core.serializers.json.DjangoJSONEncoder, max_length=50000),
),
migrations.AlterField(
model_name='upload',
name='import_metadata',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=funkwhale_api.music.models.empty_dict, encoder=django.core.serializers.json.DjangoJSONEncoder, max_length=50000),
),
migrations.AlterField(
model_name='upload',
name='metadata',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=funkwhale_api.music.models.empty_dict, encoder=django.core.serializers.json.DjangoJSONEncoder, max_length=50000),
),
migrations.AlterField(
model_name='uploadversion',
name='mimetype',
field=models.CharField(choices=[('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/mpeg', 'mp3'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
),
]

View file

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
"""
This migration is disabled until settings.MUSIC_USE_DENORMALIZATION is set to default=True
"""
from __future__ import unicode_literals
from django.db import migrations
from funkwhale_api.music.utils import guess_mimetype
def denormalize(apps, schema_editor):
Upload = apps.get_model("music", "Upload")
if not Upload.objects.count():
print('Skipping…')
from funkwhale_api.music.models import TrackActor, Library
libraries = Library.objects.all()
objs = []
total_libraries = len(libraries)
for i, library in enumerate(libraries):
print('[{}/{}] Populating permission table for library {}'.format(i+1, total_libraries, library.pk))
objs += TrackActor.get_objs(
library=library,
actor_ids=[],
upload_and_track_ids=[],
)
print('Commiting changes…')
TrackActor.objects.bulk_create(objs, batch_size=5000, ignore_conflicts=True)
def rewind(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [("music", "0041_auto_20191021_1705")]
operations = [migrations.RunPython(denormalize, rewind)]

View file

@ -14,7 +14,7 @@ from django.contrib.postgres.fields import JSONField
from django.core.files.base import ContentFile
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models, transaction
from django.db.models.signals import post_save
from django.db.models.signals import post_save, pre_save
from django.dispatch import receiver
from django.urls import reverse
from django.utils import timezone
@ -185,8 +185,8 @@ class ArtistQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.annotate(is_playable_by_actor=subquery)
def playable_by(self, actor, include=True):
tracks = Track.objects.playable_by(actor, include)
matches = self.filter(tracks__in=tracks).values_list("pk")
tracks = Track.objects.playable_by(actor)
matches = self.filter(pk__in=tracks.values("artist_id")).values_list("pk")
if include:
return self.filter(pk__in=matches)
else:
@ -269,8 +269,8 @@ class AlbumQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.annotate(is_playable_by_actor=subquery)
def playable_by(self, actor, include=True):
tracks = Track.objects.playable_by(actor, include)
matches = self.filter(tracks__in=tracks).values_list("pk")
tracks = Track.objects.playable_by(actor)
matches = self.filter(pk__in=tracks.values("album_id")).values_list("pk")
if include:
return self.filter(pk__in=matches)
else:
@ -418,6 +418,7 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.select_related().select_related("album__artist", "artist")
def annotate_playable_by_actor(self, actor):
files = (
Upload.objects.playable_by(actor)
.filter(track=models.OuterRef("id"))
@ -428,6 +429,15 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.annotate(is_playable_by_actor=subquery)
def playable_by(self, actor, include=True):
if settings.MUSIC_USE_DENORMALIZATION:
if actor is not None:
query = models.Q(actor=None) | models.Q(actor=actor)
else:
query = models.Q(actor=None, internal=False)
if not include:
query = ~query
return self.filter(pk__in=TrackActor.objects.filter(query).values("track"))
files = Upload.objects.playable_by(actor, include)
matches = self.filter(uploads__in=files).values_list("pk")
if include:
@ -1147,11 +1157,134 @@ class LibraryScan(models.Model):
modification_date = models.DateTimeField(null=True, blank=True)
class TrackActor(models.Model):
"""
Denormalization table to store all playable tracks for a given user
Empty user means the track is public or internal (cf internal flag too)
"""
id = models.BigAutoField(primary_key=True)
actor = models.ForeignKey(
"federation.Actor",
on_delete=models.CASCADE,
related_name="track_actor_items",
blank=True,
null=True,
)
track = models.ForeignKey(
Track, on_delete=models.CASCADE, related_name="track_actor_items"
)
upload = models.ForeignKey(
Upload, on_delete=models.CASCADE, related_name="track_actor_items"
)
internal = models.BooleanField(default=False, db_index=True)
class Meta:
unique_together = ("track", "actor", "internal", "upload")
@classmethod
def get_objs(cls, library, actor_ids, upload_and_track_ids):
upload_and_track_ids = upload_and_track_ids or library.uploads.filter(
import_status="finished", track__isnull=False
).values_list("id", "track")
objs = []
if library.privacy_level == "me":
follow_queryset = library.received_follows.filter(approved=True).exclude(
actor__user__isnull=True
)
if actor_ids:
follow_queryset = follow_queryset.filter(actor__pk__in=actor_ids)
final_actor_ids = list(follow_queryset.values_list("actor", flat=True))
owner = library.actor if library.actor.is_local else None
if owner and (not actor_ids or owner in final_actor_ids):
final_actor_ids.append(owner.pk)
for actor_id in final_actor_ids:
for upload_id, track_id in upload_and_track_ids:
objs.append(
cls(actor_id=actor_id, track_id=track_id, upload_id=upload_id)
)
elif library.privacy_level == "instance":
for upload_id, track_id in upload_and_track_ids:
objs.append(
cls(
actor_id=None,
track_id=track_id,
upload_id=upload_id,
internal=True,
)
)
elif library.privacy_level == "everyone":
for upload_id, track_id in upload_and_track_ids:
objs.append(cls(actor_id=None, track_id=track_id, upload_id=upload_id))
return objs
@classmethod
def create_entries(
cls, library, delete_existing=True, actor_ids=None, upload_and_track_ids=None
):
if not settings.MUSIC_USE_DENORMALIZATION:
# skip
return
if delete_existing:
to_delete = cls.objects.filter(upload__library=library)
if actor_ids:
to_delete = to_delete.filter(actor__pk__in=actor_ids)
# we don't use .delete() here because we don't want signals to fire
to_delete._raw_delete(to_delete.db)
objs = cls.get_objs(
library, actor_ids=actor_ids, upload_and_track_ids=upload_and_track_ids
)
return cls.objects.bulk_create(objs, ignore_conflicts=True, batch_size=5000)
@receiver(post_save, sender=ImportJob)
def update_batch_status(sender, instance, **kwargs):
instance.batch.update_status()
@receiver(post_save, sender=Upload)
def update_denormalization_track_actor(sender, instance, created, **kwargs):
if (
created
and settings.MUSIC_USE_DENORMALIZATION
and instance.track_id
and instance.import_status == "finished"
):
TrackActor.create_entries(
instance.library,
delete_existing=False,
upload_and_track_ids=[(instance.pk, instance.track_id)],
)
@receiver(pre_save, sender=Library)
def set_privacy_level_updated(sender, instance, update_fields, **kwargs):
if not instance.pk:
return
if update_fields is not None and "privacy_level" not in update_fields:
return
db_value = instance.__class__.objects.filter(pk=instance.pk).values_list(
"privacy_level", flat=True
)[0]
if db_value != instance.privacy_level:
# Needed to update denormalized permissions
setattr(instance, "_privacy_level_updated", True)
@receiver(post_save, sender=Library)
def update_denormalization_track_user_library_privacy_level(
sender, instance, created, **kwargs
):
if created:
return
updated = getattr(instance, "_privacy_level_updated", False)
if updated:
TrackActor.create_entries(instance)
@receiver(post_save, sender=ImportBatch)
def update_request_status(sender, instance, created, **kwargs):
update_fields = kwargs.get("update_fields", []) or []

View file

@ -170,7 +170,7 @@ def fail_import(upload, error_code, detail=None, **fields):
),
"upload",
)
def process_upload(upload):
def process_upload(upload, update_denormalization=True):
import_metadata = upload.import_metadata or {}
old_status = upload.import_status
audio_file = upload.get_audio_file()
@ -249,6 +249,13 @@ def process_upload(upload):
]
)
if update_denormalization:
models.TrackActor.create_entries(
library=upload.library,
upload_and_track_ids=[(upload.pk, upload.track_id)],
delete_existing=False,
)
# update album cover, if needed
if not track.album.cover:
update_album_cover(