Update invidious instance

Closes #8
This commit is contained in:
Czar 2023-12-14 21:53:51 +01:00
commit 9801202e39

736
main.py
View file

@ -1,368 +1,368 @@
import requests import requests
import bs4 import bs4
import yt_dlp as youtube_dl import yt_dlp as youtube_dl
import random import random
import configparser import configparser
import re import re
import io import io
import os import os
import asyncio import asyncio
from collections import defaultdict from collections import defaultdict
from PythonSed import Sed from PythonSed import Sed
from slixmpp import ClientXMPP from slixmpp import ClientXMPP
from urllib.parse import urlparse, parse_qs, urlunparse from urllib.parse import urlparse, parse_qs, urlunparse
from pantomime import normalize_mimetype from pantomime import normalize_mimetype
import cgi import cgi
sed_parse = re.compile("(?<!\\\\)[/#]") sed_parse = re.compile("(?<!\\\\)[/#]")
sed_cmd = re.compile("^s[/#].*[/#].*[/#]") sed_cmd = re.compile("^s[/#].*[/#].*[/#]")
parser = "html.parser" parser = "html.parser"
user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0)" user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0)"
" Gecko/20100101 Firefox/10.0" " Gecko/20100101 Firefox/10.0"
accept_lang = "en-US" accept_lang = "en-US"
data_limit = 100000000 # 100MB data_limit = 100000000 # 100MB
headers = { headers = {
"user-agent": user_agent, "user-agent": user_agent,
"Accept-Language": accept_lang, "Accept-Language": accept_lang,
"Cache-Control": "no-cache", "Cache-Control": "no-cache",
} }
youtube_links = ["www.youtube.com", "m.youtube.com"] youtube_links = ["www.youtube.com", "m.youtube.com"]
youtube_link = "youtu.be" youtube_link = "youtu.be"
ydl = youtube_dl.YoutubeDL() ydl = youtube_dl.YoutubeDL()
invidious_instances = ["invidious.snopyta.org"] invidious_instances = ["invidious.kalli.st"]
block_list = ("localhost", "127.0.0.1", "0.0.0.0") block_list = ("localhost", "127.0.0.1", "0.0.0.0")
req_list = ("http://", "https://") req_list = ("http://", "https://")
html_files = ("text/html", "application/xhtml+xml") html_files = ("text/html", "application/xhtml+xml")
class Lifo(list): class Lifo(list):
"""Limited size LIFO array to store messages and urls.""" """Limited size LIFO array to store messages and urls."""
def __init__(self, size): def __init__(self, size):
"""Initialize the LIFO array.""" """Initialize the LIFO array."""
super().__init__() super().__init__()
self.size = size self.size = size
def add(self, item): def add(self, item):
"""Add an item to the LIFO array.""" """Add an item to the LIFO array."""
self.insert(0, item) self.insert(0, item)
if len(self) > self.size: if len(self) > self.size:
self.pop() self.pop()
def get_youtube_title(url): def get_youtube_title(url):
"""Get the title of a youtube video.""" """Get the title of a youtube video."""
try: try:
info = ydl.extract_info(url, download=False) info = ydl.extract_info(url, download=False)
return info["title"] return info["title"]
except Exception as e: except Exception as e:
print(e) print(e)
return None return None
def get_invidious_link(yurl): def get_invidious_link(yurl):
"""Get an invidious link from a youtube link.""" """Get an invidious link from a youtube link."""
video = yurl.split("/")[-1] video = yurl.split("/")[-1]
instance = random.choice(invidious_instances) instance = random.choice(invidious_instances)
return f"https://{instance}/watch?v={video}" return f"https://{instance}/watch?v={video}"
def get_yurl(path): def get_yurl(path):
"""Get a youtube link from a path.""" """Get a youtube link from a path."""
yurl = f"https://youtu.be/{path}" yurl = f"https://youtu.be/{path}"
return yurl return yurl
class AngelBot(ClientXMPP): class AngelBot(ClientXMPP):
"""AngelBot class.""" """AngelBot class."""
messages = defaultdict( messages = defaultdict(
lambda: { lambda: {
"messages": Lifo(100), "messages": Lifo(100),
"links": Lifo(10), "links": Lifo(10),
"previews": Lifo(10), "previews": Lifo(10),
} }
) )
def get_urls(self, msg): def get_urls(self, msg):
"""Get urls from a message.""" """Get urls from a message."""
str_list = msg["body"].strip().split() str_list = msg["body"].strip().split()
urls = [u for u in str_list if any(r in u for r in req_list)] urls = [u for u in str_list if any(r in u for r in req_list)]
return urls return urls
def send_youtube_info(self, uri, sender, mtype): def send_youtube_info(self, uri, sender, mtype):
"""Send youtube info to the sender.""" """Send youtube info to the sender."""
yurl = None yurl = None
if uri.netloc == youtube_link: if uri.netloc == youtube_link:
yurl = get_yurl(uri.path) yurl = get_yurl(uri.path)
elif "v" in (query := parse_qs(uri.query)): elif "v" in (query := parse_qs(uri.query)):
if v := query["v"]: if v := query["v"]:
yurl = get_yurl(v[0]) yurl = get_yurl(v[0])
else: else:
return return
invidious = get_invidious_link(yurl) invidious = get_invidious_link(yurl)
if output := get_youtube_title(invidious): if output := get_youtube_title(invidious):
if output in self.messages[sender]["previews"]: if output in self.messages[sender]["previews"]:
return return
self.messages[sender]["previews"].add(output) self.messages[sender]["previews"].add(output)
self.send_message(mto=sender, mbody=f"*{output}*", mtype=mtype) self.send_message(mto=sender, mbody=f"*{output}*", mtype=mtype)
self.send_message(mto=sender, mbody=invidious, mtype=mtype) self.send_message(mto=sender, mbody=invidious, mtype=mtype)
async def parse_uri(self, uri, sender, mtype): async def parse_uri(self, uri, sender, mtype):
"""Parse a uri and send the result to the sender.""" """Parse a uri and send the result to the sender."""
netloc = uri.netloc netloc = uri.netloc
if netloc in (youtube_links + [youtube_link]): if netloc in (youtube_links + [youtube_link]):
self.send_youtube_info(uri, sender, mtype) self.send_youtube_info(uri, sender, mtype)
elif netloc.split(":")[0] in block_list: elif netloc.split(":")[0] in block_list:
return return
else: else:
await self.process_link(uri, sender, mtype) await self.process_link(uri, sender, mtype)
async def process_link(self, uri, sender, mtype): async def process_link(self, uri, sender, mtype):
"""Process a link and send the result to the sender.""" """Process a link and send the result to the sender."""
url = urlunparse(uri) url = urlunparse(uri)
r = requests.get(url, stream=True, headers=headers, timeout=6) r = requests.get(url, stream=True, headers=headers, timeout=6)
if not r.ok: if not r.ok:
return return
ftype = normalize_mimetype(r.headers.get("content-type")) ftype = normalize_mimetype(r.headers.get("content-type"))
if not ftype: if not ftype:
return return
if ftype in html_files: if ftype in html_files:
data = "" data = ""
for i in r.iter_content(chunk_size=1024, decode_unicode=False): for i in r.iter_content(chunk_size=1024, decode_unicode=False):
data += i.decode("utf-8", errors="ignore") data += i.decode("utf-8", errors="ignore")
if len(data) > data_limit or "</head>" in data.lower(): if len(data) > data_limit or "</head>" in data.lower():
break break
soup = bs4.BeautifulSoup(data, parser) soup = bs4.BeautifulSoup(data, parser)
if title := soup.find("title"): if title := soup.find("title"):
output = title.text.strip() output = title.text.strip()
if output: if output:
output = f"*{output}*" if ("\n" not in output) else output output = f"*{output}*" if ("\n" not in output) else output
if output in self.messages[sender]["previews"]: if output in self.messages[sender]["previews"]:
return return
self.messages[sender]["previews"].add(output) self.messages[sender]["previews"].add(output)
if r.history: if r.history:
self.send_message(mto=sender, mbody=r.url, mtype=mtype) self.send_message(mto=sender, mbody=r.url, mtype=mtype)
self.send_message(mto=sender, mbody=output, mtype=mtype) self.send_message(mto=sender, mbody=output, mtype=mtype)
else: else:
try: try:
lenght = 0 lenght = 0
outfile = io.BytesIO() outfile = io.BytesIO()
for chunk in r.iter_content( for chunk in r.iter_content(
chunk_size=512, chunk_size=512,
decode_unicode=False, decode_unicode=False,
): ):
lenght += 512 lenght += 512
if lenght >= data_limit: if lenght >= data_limit:
return return
outfile.write(chunk) outfile.write(chunk)
content_disposition = r.headers.get("content-disposition") content_disposition = r.headers.get("content-disposition")
filename = None filename = None
if content_disposition: if content_disposition:
_, params = cgi.parse_header(content_disposition) _, params = cgi.parse_header(content_disposition)
filename = params.get("filename") filename = params.get("filename")
else: else:
filename = os.path.basename(uri.path) filename = os.path.basename(uri.path)
ext = os.path.splitext(filename)[1] if filename else ".txt" ext = os.path.splitext(filename)[1] if filename else ".txt"
fname = filename if filename else f"file{ext}" fname = filename if filename else f"file{ext}"
await self.embed_file(url, sender, mtype, ftype, fname, outfile) await self.embed_file(url, sender, mtype, ftype, fname, outfile)
except Exception as e: except Exception as e:
print(e) print(e)
async def embed_file(self, url, sender, mtype, ftype, fname, outfile): async def embed_file(self, url, sender, mtype, ftype, fname, outfile):
"""Embed a file and send the result to the sender.""" """Embed a file and send the result to the sender."""
furl = await self.plugin["xep_0363"].upload_file( furl = await self.plugin["xep_0363"].upload_file(
fname, content_type=ftype, input_file=outfile fname, content_type=ftype, input_file=outfile
) )
message = self.make_message(sender) message = self.make_message(sender)
message["body"] = furl message["body"] = furl
message["type"] = mtype message["type"] = mtype
message["oob"]["url"] = furl message["oob"]["url"] = furl
message.send() message.send()
async def parse_urls(self, msg, urls, sender, mtype): async def parse_urls(self, msg, urls, sender, mtype):
"""Parse urls and send the result to the sender.""" """Parse urls and send the result to the sender."""
body = msg["body"].lower() body = msg["body"].lower()
if "nsfw" in body: return if "nsfw" in body: return
if "nsfl" in body: return if "nsfl" in body: return
for u in urls: for u in urls:
if u in self.messages[sender]["links"]: if u in self.messages[sender]["links"]:
continue continue
else: else:
self.messages[sender]["links"].add(u) self.messages[sender]["links"].add(u)
uri = urlparse(u) uri = urlparse(u)
await self.parse_uri(uri, sender, mtype) await self.parse_uri(uri, sender, mtype)
def sed_command(self, msg, sender, mtype): def sed_command(self, msg, sender, mtype):
"""Process sed command.""" """Process sed command."""
try: try:
text = msg["body"] text = msg["body"]
if not sed_cmd.match(text): if not sed_cmd.match(text):
self.messages[sender]["messages"].add(text) self.messages[sender]["messages"].add(text)
return return
sed_args = sed_parse.split(text) sed_args = sed_parse.split(text)
if len(sed_args) < 4: if len(sed_args) < 4:
return return
sed = Sed() sed = Sed()
sed.load_string(text) sed.load_string(text)
for message in self.messages[sender]["messages"]: for message in self.messages[sender]["messages"]:
if sed_args[1] not in message: if sed_args[1] not in message:
continue continue
msg = io.StringIO(message) msg = io.StringIO(message)
res = "\n".join(sed.apply(msg, None)) res = "\n".join(sed.apply(msg, None))
self.messages[sender]["messages"].add(res) self.messages[sender]["messages"].add(res)
return self.send_message( return self.send_message(
mto=sender, mto=sender,
mbody=res, mbody=res,
mtype=mtype, mtype=mtype,
) )
except Exception as e: except Exception as e:
print(e) print(e)
def __init__(self, jid, password, nick="angel", autojoin=None): def __init__(self, jid, password, nick="angel", autojoin=None):
"""Initialize the bot.""" """Initialize the bot."""
ClientXMPP.__init__(self, jid, password) ClientXMPP.__init__(self, jid, password)
self.jid = jid self.jid = jid
self.nick = nick self.nick = nick
self.autojoin = autojoin or [] self.autojoin = autojoin or []
self.register_plugin("xep_0030") self.register_plugin("xep_0030")
self.register_plugin("xep_0060") self.register_plugin("xep_0060")
self.register_plugin("xep_0054") self.register_plugin("xep_0054")
self.register_plugin("xep_0045") self.register_plugin("xep_0045")
self.register_plugin("xep_0066") self.register_plugin("xep_0066")
self.register_plugin("xep_0084") self.register_plugin("xep_0084")
self.register_plugin("xep_0153") self.register_plugin("xep_0153")
self.register_plugin("xep_0363") self.register_plugin("xep_0363")
self.add_event_handler("session_start", self.session_start) self.add_event_handler("session_start", self.session_start)
self.add_event_handler("message", self.message) self.add_event_handler("message", self.message)
self.add_event_handler("groupchat_message", self.muc_message) self.add_event_handler("groupchat_message", self.muc_message)
# self.add_event_handler("vcard_avatar_update", self.debug_event) # self.add_event_handler("vcard_avatar_update", self.debug_event)
# self.add_event_handler("stream_error", self.debug_event) # self.add_event_handler("stream_error", self.debug_event)
self.add_event_handler("disconnected", lambda _: self.connect()) self.add_event_handler("disconnected", lambda _: self.connect())
async def session_start(self, event): async def session_start(self, event):
"""Start the bot.""" """Start the bot."""
self.send_presence() self.send_presence()
await self.get_roster() await self.get_roster()
await self.update_info() await self.update_info()
for channel in self.autojoin: for channel in self.autojoin:
try: try:
self.plugin["xep_0045"].join_muc(channel, self.nick) self.plugin["xep_0045"].join_muc(channel, self.nick)
except Exception as e: except Exception as e:
print(e) print(e)
async def update_info(self): async def update_info(self):
"""Update the bot info.""" """Update the bot info."""
with open("angel.png", "rb") as avatar_file: with open("angel.png", "rb") as avatar_file:
avatar = avatar_file.read() avatar = avatar_file.read()
avatar_type = "image/png" avatar_type = "image/png"
avatar_id = self.plugin["xep_0084"].generate_id(avatar) avatar_id = self.plugin["xep_0084"].generate_id(avatar)
avatar_bytes = len(avatar) avatar_bytes = len(avatar)
asyncio.gather(self.plugin["xep_0084"].publish_avatar(avatar)) asyncio.gather(self.plugin["xep_0084"].publish_avatar(avatar))
asyncio.gather( asyncio.gather(
self.plugin["xep_0153"].set_avatar( self.plugin["xep_0153"].set_avatar(
avatar=avatar, avatar=avatar,
mtype=avatar_type, mtype=avatar_type,
) )
) )
info = { info = {
"id": avatar_id, "id": avatar_id,
"type": avatar_type, "type": avatar_type,
"bytes": avatar_bytes, "bytes": avatar_bytes,
} }
asyncio.gather(self.plugin["xep_0084"].publish_avatar_metadata([info])) asyncio.gather(self.plugin["xep_0084"].publish_avatar_metadata([info]))
vcard = self.plugin["xep_0054"].make_vcard() vcard = self.plugin["xep_0054"].make_vcard()
vcard["URL"] = "https://wiki.kalli.st/Angel" vcard["URL"] = "https://wiki.kalli.st/Angel"
vcard["DESC"] = "Angel is a bot that can do link previews and embeds." vcard["DESC"] = "Angel is a bot that can do link previews and embeds."
vcard["NICKNAME"] = "Angel" vcard["NICKNAME"] = "Angel"
vcard["FN"] = "Angel" vcard["FN"] = "Angel"
asyncio.gather(self.plugin["xep_0054"].publish_vcard(vcard)) asyncio.gather(self.plugin["xep_0054"].publish_vcard(vcard))
async def message(self, msg): async def message(self, msg):
"""Process a message.""" """Process a message."""
if msg["type"] in ("chat", "normal"): if msg["type"] in ("chat", "normal"):
edit = "urn:xmpp:message-correct:0" in str(msg) edit = "urn:xmpp:message-correct:0" in str(msg)
if edit: if edit:
return return
mtype = msg["type"] mtype = msg["type"]
sender = msg["from"].bare sender = msg["from"].bare
try: try:
if not msg["oob"]["url"]: if not msg["oob"]["url"]:
if urls := self.get_urls(msg): if urls := self.get_urls(msg):
await self.parse_urls(msg, urls, sender, mtype) await self.parse_urls(msg, urls, sender, mtype)
except Exception as e: except Exception as e:
print(e) print(e)
self.sed_command(msg, sender, mtype) self.sed_command(msg, sender, mtype)
async def muc_message(self, msg): async def muc_message(self, msg):
"""Process a groupchat message.""" """Process a groupchat message."""
if msg["type"] in ("groupchat", "normal"): if msg["type"] in ("groupchat", "normal"):
edit = "urn:xmpp:message-correct:0" in str(msg) edit = "urn:xmpp:message-correct:0" in str(msg)
if edit: if edit:
return return
if msg["mucnick"] == self.nick: if msg["mucnick"] == self.nick:
return return
mtype = msg["type"] mtype = msg["type"]
sender = msg["from"].bare sender = msg["from"].bare
try: try:
if not msg["oob"]["url"]: if not msg["oob"]["url"]:
if urls := self.get_urls(msg): if urls := self.get_urls(msg):
await self.parse_urls(msg, urls, sender, mtype) await self.parse_urls(msg, urls, sender, mtype)
except Exception as e: except Exception as e:
print(e) print(e)
self.sed_command(msg, sender, mtype) self.sed_command(msg, sender, mtype)
if __name__ == "__main__": if __name__ == "__main__":
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.read("config.ini") config.read("config.ini")
jid = config["angel"]["jid"] jid = config["angel"]["jid"]
password = config["angel"]["password"] password = config["angel"]["password"]
autojoin = config["angel"]["autojoin"].split() autojoin = config["angel"]["autojoin"].split()
nick = config["angel"]["nick"] nick = config["angel"]["nick"]
bot = AngelBot(jid, password, nick=nick, autojoin=autojoin) bot = AngelBot(jid, password, nick=nick, autojoin=autojoin)
bot.connect() bot.connect()
bot.process(forever=True) bot.process(forever=True)