gardening
This commit is contained in:
parent
acaad7b5a1
commit
cecf31720c
2 changed files with 291 additions and 235 deletions
351
angel.py
351
angel.py
|
|
@ -1,37 +1,7 @@
|
||||||
import requests
|
|
||||||
import bs4
|
|
||||||
import yt_dlp as youtube_dl
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
import os
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from slixmpp import ClientXMPP
|
from slixmpp import ClientXMPP
|
||||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
from slixmpp.stanza import Message
|
||||||
from pantomime import normalize_mimetype
|
|
||||||
import cgi
|
|
||||||
import ipaddress
|
|
||||||
import io
|
|
||||||
|
|
||||||
parser = "html.parser"
|
|
||||||
user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0)"
|
|
||||||
" Gecko/20100101 Firefox/10.0"
|
|
||||||
accept_lang = "en-US"
|
|
||||||
data_limit = 100000000 # 100MB
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
"user-agent": user_agent,
|
|
||||||
"Accept-Language": accept_lang,
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
}
|
|
||||||
|
|
||||||
youtube_link = "youtu.be"
|
|
||||||
|
|
||||||
ydl = youtube_dl.YoutubeDL()
|
|
||||||
|
|
||||||
req_list = ("http://", "https://")
|
|
||||||
|
|
||||||
html_files = ("text/html", "application/xhtml+xml")
|
|
||||||
|
|
||||||
class Lifo(list):
|
class Lifo(list):
|
||||||
"""Limited size LIFO array to store messages and urls."""
|
"""Limited size LIFO array to store messages and urls."""
|
||||||
|
|
@ -47,30 +17,24 @@ class Lifo(list):
|
||||||
if len(self) > self.size:
|
if len(self) > self.size:
|
||||||
self.pop()
|
self.pop()
|
||||||
|
|
||||||
|
def create_messages_dict():
|
||||||
|
return defaultdict(
|
||||||
|
lambda: {
|
||||||
|
"messages": Lifo(100),
|
||||||
|
"links": Lifo(10),
|
||||||
|
"previews": Lifo(10),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def get_youtube_title(url):
|
|
||||||
"""Get the title of a youtube video."""
|
|
||||||
try:
|
|
||||||
info = ydl.extract_info(url, download=False)
|
|
||||||
return info["title"]
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_yurl(path):
|
|
||||||
"""Get a youtube link from a path."""
|
|
||||||
yurl = f"https://youtu.be/{path}"
|
|
||||||
return yurl
|
|
||||||
|
|
||||||
# decorator to define a regex command
|
|
||||||
class RegexCmd:
|
class RegexCmd:
|
||||||
"""Regex command decorator."""
|
"""Regex command decorator."""
|
||||||
|
|
||||||
def __init__(self, bot, pattern):
|
def __init__(self, bot, pattern, block=False, matcher=None):
|
||||||
"""Initialize the decorator."""
|
"""Initialize the decorator."""
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.bot = bot
|
self.bot = bot
|
||||||
|
self.block = block
|
||||||
|
self.matcher = matcher
|
||||||
|
|
||||||
def __call__(self, func):
|
def __call__(self, func):
|
||||||
"""Call the decorator."""
|
"""Call the decorator."""
|
||||||
|
|
@ -81,126 +45,66 @@ class RegexCmd:
|
||||||
class AngelBot(ClientXMPP):
|
class AngelBot(ClientXMPP):
|
||||||
"""AngelBot class."""
|
"""AngelBot class."""
|
||||||
|
|
||||||
messages = defaultdict(
|
def __init__(self, jid, password, nick="angel", autojoin=None,
|
||||||
lambda: {
|
youtube_links=None,
|
||||||
"messages": Lifo(100),
|
invidious_instances=None):
|
||||||
"links": Lifo(10),
|
"""Initialize the bot."""
|
||||||
"previews": Lifo(10),
|
super().__init__(jid, password)
|
||||||
}
|
self.jid = jid
|
||||||
|
self.nick = nick
|
||||||
|
self.autojoin = autojoin or []
|
||||||
|
self.invidious_instances = invidious_instances or []
|
||||||
|
self.youtube_links = youtube_links or []
|
||||||
|
self.messages = create_messages_dict()
|
||||||
|
self.register_plugins()
|
||||||
|
self.add_handlers()
|
||||||
|
|
||||||
|
def reply(self, msg, body):
|
||||||
|
"""Reply to a message."""
|
||||||
|
self.save_message_history(msg)
|
||||||
|
self.raw_reply(msg, body)
|
||||||
|
|
||||||
|
def raw_reply(self, msg, body):
|
||||||
|
"""Reply to a message without saving history."""
|
||||||
|
self.send_message(
|
||||||
|
mto=msg["from"].bare,
|
||||||
|
mbody=body,
|
||||||
|
mtype=msg["type"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def save_message_history(self, msg):
|
||||||
|
"""Save the history of messages."""
|
||||||
|
sender = msg["from"].bare
|
||||||
|
self.messages[sender]["messages"].add(msg["body"])
|
||||||
|
|
||||||
|
def get_message_history(self, msg):
|
||||||
|
"""Get the messages from the sender."""
|
||||||
|
sender = msg["from"].bare
|
||||||
|
return self.messages[sender]["messages"]
|
||||||
|
|
||||||
|
def save_link_history(self, msg, url):
|
||||||
|
"""Save the history of links."""
|
||||||
|
sender = msg["from"].bare
|
||||||
|
self.messages[sender]["links"].add(url)
|
||||||
|
|
||||||
|
def get_link_history(self, msg):
|
||||||
|
"""Get the links from the sender."""
|
||||||
|
sender = msg["from"].bare
|
||||||
|
return self.messages[sender]["links"]
|
||||||
|
|
||||||
|
def save_preview_history(self, msg, preview):
|
||||||
|
"""Save the history of previews."""
|
||||||
|
sender = msg["from"].bare
|
||||||
|
self.messages[sender]["previews"].add(preview)
|
||||||
|
|
||||||
|
def get_preview_history(self, msg):
|
||||||
|
"""Get the previews from the sender."""
|
||||||
|
sender = msg["from"].bare
|
||||||
|
return self.messages[sender]["previews"]
|
||||||
|
|
||||||
regex_cmds = []
|
regex_cmds = []
|
||||||
|
|
||||||
def get_urls(self, msg):
|
async def embed_file(self, sender, mtype, ftype, fname, outfile):
|
||||||
"""Get urls from a message."""
|
|
||||||
str_list = msg["body"].strip().split()
|
|
||||||
urls = [u for u in str_list if any(r in u for r in req_list)]
|
|
||||||
return urls
|
|
||||||
|
|
||||||
def get_invidious_link(self, yurl):
|
|
||||||
"""Get an invidious link from a youtube link."""
|
|
||||||
video = yurl.split("/")[-1]
|
|
||||||
instance = random.choice(self.invidious_instances)
|
|
||||||
return f"https://{instance}/watch?v={video}"
|
|
||||||
|
|
||||||
|
|
||||||
def send_youtube_info(self, uri, sender, mtype):
|
|
||||||
"""Send youtube info to the sender."""
|
|
||||||
yurl = None
|
|
||||||
if uri.netloc == youtube_link:
|
|
||||||
yurl = get_yurl(uri.path)
|
|
||||||
elif "v" in (query := parse_qs(uri.query)):
|
|
||||||
if v := query["v"]:
|
|
||||||
yurl = get_yurl(v[0])
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
|
|
||||||
invidious = self.get_invidious_link(yurl)
|
|
||||||
|
|
||||||
if output := get_youtube_title(invidious):
|
|
||||||
if output in self.messages[sender]["previews"]:
|
|
||||||
return
|
|
||||||
self.messages[sender]["previews"].add(output)
|
|
||||||
|
|
||||||
self.send_message(mto=sender, mbody=f"*{output}*", mtype=mtype)
|
|
||||||
self.send_message(mto=sender, mbody=invidious, mtype=mtype)
|
|
||||||
|
|
||||||
async def parse_uri(self, uri, sender, mtype):
|
|
||||||
"""Parse a uri and send the result to the sender."""
|
|
||||||
netloc = uri.netloc
|
|
||||||
if self.invidious_instances and netloc in (self.youtube_links + [youtube_link]):
|
|
||||||
self.send_youtube_info(uri, sender, mtype)
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
if ipaddress.ip_address(netloc.split(":")[0]).is_private:
|
|
||||||
return
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
await self.process_link(uri, sender, mtype)
|
|
||||||
|
|
||||||
async def process_link(self, uri, sender, mtype):
|
|
||||||
"""Process a link and send the result to the sender."""
|
|
||||||
url = urlunparse(uri)
|
|
||||||
r = requests.get(url, stream=True, headers=headers, timeout=6)
|
|
||||||
if not r.ok:
|
|
||||||
return
|
|
||||||
|
|
||||||
ftype = normalize_mimetype(r.headers.get("content-type"))
|
|
||||||
|
|
||||||
if not ftype:
|
|
||||||
return
|
|
||||||
|
|
||||||
if ftype in html_files:
|
|
||||||
data = ""
|
|
||||||
for i in r.iter_content(chunk_size=1024, decode_unicode=False):
|
|
||||||
data += i.decode("utf-8", errors="ignore")
|
|
||||||
if len(data) > data_limit or "</head>" in data.lower():
|
|
||||||
break
|
|
||||||
soup = bs4.BeautifulSoup(data, parser)
|
|
||||||
if title := soup.find("title"):
|
|
||||||
output = title.text.strip()
|
|
||||||
if output:
|
|
||||||
output = f"*{output}*" if ("\n" not in output) else output
|
|
||||||
if output in self.messages[sender]["previews"]:
|
|
||||||
return
|
|
||||||
|
|
||||||
self.messages[sender]["previews"].add(output)
|
|
||||||
if r.history:
|
|
||||||
self.send_message(mto=sender, mbody=r.url, mtype=mtype)
|
|
||||||
self.send_message(mto=sender, mbody=output, mtype=mtype)
|
|
||||||
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
lenght = 0
|
|
||||||
outfile = io.BytesIO()
|
|
||||||
for chunk in r.iter_content(
|
|
||||||
chunk_size=512,
|
|
||||||
decode_unicode=False,
|
|
||||||
):
|
|
||||||
lenght += 512
|
|
||||||
if lenght >= data_limit:
|
|
||||||
return
|
|
||||||
outfile.write(chunk)
|
|
||||||
|
|
||||||
content_disposition = r.headers.get("content-disposition")
|
|
||||||
filename = None
|
|
||||||
if content_disposition:
|
|
||||||
_, params = cgi.parse_header(content_disposition)
|
|
||||||
filename = params.get("filename")
|
|
||||||
if params.get("filename*"):
|
|
||||||
filename = params.get("filename*")
|
|
||||||
filename = filename.split("''")[-1]
|
|
||||||
else:
|
|
||||||
filename = os.path.basename(uri.path)
|
|
||||||
|
|
||||||
ext = os.path.splitext(filename)[1] if filename else ".txt"
|
|
||||||
fname = filename if filename else f"file{ext}"
|
|
||||||
await self.embed_file(url, sender, mtype, ftype, fname, outfile)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
async def embed_file(self, url, sender, mtype, ftype, fname, outfile):
|
|
||||||
"""Embed a file and send the result to the sender."""
|
"""Embed a file and send the result to the sender."""
|
||||||
furl = await self.plugin["xep_0363"].upload_file(
|
furl = await self.plugin["xep_0363"].upload_file(
|
||||||
fname, content_type=ftype, input_file=outfile
|
fname, content_type=ftype, input_file=outfile
|
||||||
|
|
@ -212,32 +116,7 @@ class AngelBot(ClientXMPP):
|
||||||
message["oob"]["url"] = furl
|
message["oob"]["url"] = furl
|
||||||
message.send()
|
message.send()
|
||||||
|
|
||||||
async def parse_urls(self, msg, urls, sender, mtype):
|
def register_plugins(self):
|
||||||
"""Parse urls and send the result to the sender."""
|
|
||||||
body = msg["body"].lower()
|
|
||||||
if "nsfw" in body: return
|
|
||||||
if "nsfl" in body: return
|
|
||||||
for u in urls:
|
|
||||||
if u in self.messages[sender]["links"]:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
self.messages[sender]["links"].add(u)
|
|
||||||
|
|
||||||
uri = urlparse(u)
|
|
||||||
await self.parse_uri(uri, sender, mtype)
|
|
||||||
|
|
||||||
def __init__(self, jid, password, nick="angel", autojoin=None,
|
|
||||||
youtube_links=None,
|
|
||||||
invidious_instances=None):
|
|
||||||
"""Initialize the bot."""
|
|
||||||
ClientXMPP.__init__(self, jid, password)
|
|
||||||
self.jid = jid
|
|
||||||
self.nick = nick
|
|
||||||
self.autojoin = autojoin or []
|
|
||||||
self.invidious_instances = invidious_instances or []
|
|
||||||
self.youtube_links = youtube_links or []
|
|
||||||
|
|
||||||
|
|
||||||
self.register_plugin("xep_0030")
|
self.register_plugin("xep_0030")
|
||||||
self.register_plugin("xep_0060")
|
self.register_plugin("xep_0060")
|
||||||
self.register_plugin("xep_0054")
|
self.register_plugin("xep_0054")
|
||||||
|
|
@ -247,6 +126,7 @@ class AngelBot(ClientXMPP):
|
||||||
self.register_plugin("xep_0153")
|
self.register_plugin("xep_0153")
|
||||||
self.register_plugin("xep_0363")
|
self.register_plugin("xep_0363")
|
||||||
|
|
||||||
|
def add_handlers(self):
|
||||||
self.add_event_handler("session_start", self.session_start)
|
self.add_event_handler("session_start", self.session_start)
|
||||||
self.add_event_handler("message", self.message)
|
self.add_event_handler("message", self.message)
|
||||||
self.add_event_handler("groupchat_message", self.muc_message)
|
self.add_event_handler("groupchat_message", self.muc_message)
|
||||||
|
|
@ -310,14 +190,6 @@ class AngelBot(ClientXMPP):
|
||||||
mtype = msg["type"]
|
mtype = msg["type"]
|
||||||
sender = msg["from"].bare
|
sender = msg["from"].bare
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not msg["oob"]["url"]:
|
|
||||||
if urls := self.get_urls(msg):
|
|
||||||
await self.parse_urls(msg, urls, sender, mtype)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
self.process_commands(msg, sender, mtype)
|
self.process_commands(msg, sender, mtype)
|
||||||
|
|
||||||
async def muc_message(self, msg):
|
async def muc_message(self, msg):
|
||||||
|
|
@ -334,14 +206,6 @@ class AngelBot(ClientXMPP):
|
||||||
mtype = msg["type"]
|
mtype = msg["type"]
|
||||||
sender = msg["from"].bare
|
sender = msg["from"].bare
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not msg["oob"]["url"]:
|
|
||||||
if urls := self.get_urls(msg):
|
|
||||||
await self.parse_urls(msg, urls, sender, mtype)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
self.process_commands(msg, sender, mtype)
|
self.process_commands(msg, sender, mtype)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -349,5 +213,78 @@ class AngelBot(ClientXMPP):
|
||||||
"""Process commands."""
|
"""Process commands."""
|
||||||
for cmd in self.regex_cmds:
|
for cmd in self.regex_cmds:
|
||||||
if cmd.pattern.match(msg["body"]):
|
if cmd.pattern.match(msg["body"]):
|
||||||
return cmd.func(self, msg, sender, mtype)
|
ctx = CommandContext(self, msg)
|
||||||
|
if cmd.matcher and not cmd.matcher(ctx):
|
||||||
|
continue
|
||||||
|
cmd.func(ctx)
|
||||||
|
if(cmd.block):
|
||||||
|
return
|
||||||
self.messages[sender]["messages"].add(msg["body"])
|
self.messages[sender]["messages"].add(msg["body"])
|
||||||
|
|
||||||
|
class CommandContext:
|
||||||
|
"""Command context."""
|
||||||
|
|
||||||
|
def __init__(self, bot: AngelBot, msg: Message):
|
||||||
|
"""Initialize the command context."""
|
||||||
|
self.bot = bot
|
||||||
|
self.msg = msg
|
||||||
|
|
||||||
|
def reply(self, body):
|
||||||
|
"""Get the reply function."""
|
||||||
|
return self.bot.reply(self.msg, body)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sender(self):
|
||||||
|
"""Get the sender of the message."""
|
||||||
|
return self.msg["from"].bare
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mtype(self):
|
||||||
|
"""Get the message type."""
|
||||||
|
return self.msg["type"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def body(self):
|
||||||
|
"""Get the message body."""
|
||||||
|
return self.msg["body"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def raw_reply(self, body):
|
||||||
|
"""Get the raw reply function."""
|
||||||
|
return self.bot.raw_reply(self.msg, body)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def message_history(self):
|
||||||
|
"""Get the message history."""
|
||||||
|
return self.bot.get_message_history(self.msg)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def link_history(self):
|
||||||
|
"""Get the link history."""
|
||||||
|
return self.bot.get_link_history(self.msg)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def preview_history(self):
|
||||||
|
"""Get the preview history."""
|
||||||
|
return self.bot.get_preview_history(self.msg)
|
||||||
|
|
||||||
|
def save_link_history(self, url):
|
||||||
|
"""Save the link history."""
|
||||||
|
self.bot.save_link_history(self.msg, url)
|
||||||
|
|
||||||
|
def save_message_history(self):
|
||||||
|
"""Save the message history."""
|
||||||
|
self.bot.save_message_history(self.msg)
|
||||||
|
|
||||||
|
def save_preview_history(self, preview):
|
||||||
|
"""Save the preview history."""
|
||||||
|
self.bot.save_preview_history(self.msg, preview)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_oob(self):
|
||||||
|
"""Check if the message is OOB."""
|
||||||
|
return bool(self.msg["oob"]["url"])
|
||||||
|
|
||||||
|
def embed_file(self, ftype, fname, outfile):
|
||||||
|
"""Embed a file and send the result to the sender."""
|
||||||
|
asyncio.gather(self.bot.embed_file(self.sender, self.mtype, ftype, fname, outfile))
|
||||||
|
|
|
||||||
177
main.py
177
main.py
|
|
@ -1,8 +1,15 @@
|
||||||
from angel import AngelBot, RegexCmd
|
from angel import AngelBot, RegexCmd, CommandContext
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from PythonSed import Sed
|
from PythonSed import Sed
|
||||||
import re
|
import re
|
||||||
import io
|
import io
|
||||||
|
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||||
|
from pantomime import normalize_mimetype
|
||||||
|
import cgi
|
||||||
|
import ipaddress
|
||||||
|
import bs4
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
|
||||||
sed_parse = re.compile("(?<!\\\\)[/#]")
|
sed_parse = re.compile("(?<!\\\\)[/#]")
|
||||||
sed_cmd = re.compile("^s[/#].*[/#].*[/#]")
|
sed_cmd = re.compile("^s[/#].*[/#].*[/#]")
|
||||||
|
|
@ -23,46 +30,158 @@ bot = AngelBot(jid, password, nick=nick, autojoin=autojoin,
|
||||||
youtube_links=youtube_links,
|
youtube_links=youtube_links,
|
||||||
invidious_instances=invidious_instances)
|
invidious_instances=invidious_instances)
|
||||||
|
|
||||||
@RegexCmd(bot, sed_cmd)
|
|
||||||
def sed_command(bot, msg, sender, mtype):
|
def default_matcher(ctx: CommandContext) -> bool:
|
||||||
|
if ctx.is_oob:
|
||||||
|
return False
|
||||||
|
body = ctx.body.lower()
|
||||||
|
return "nsfw" not in body and "nsfl" not in body
|
||||||
|
|
||||||
|
@RegexCmd(bot, sed_cmd, block=True)
|
||||||
|
def sed_command(ctx: CommandContext):
|
||||||
"""Process sed command."""
|
"""Process sed command."""
|
||||||
try:
|
try:
|
||||||
text = msg["body"]
|
text = ctx.body
|
||||||
if not sed_cmd.match(text):
|
|
||||||
bot.messages[sender]["messages"].add(text)
|
|
||||||
return
|
|
||||||
sed_args = sed_parse.split(text)
|
sed_args = sed_parse.split(text)
|
||||||
|
|
||||||
if len(sed_args) < 4:
|
|
||||||
return
|
|
||||||
|
|
||||||
sed = Sed()
|
sed = Sed()
|
||||||
sed.load_string(text)
|
sed.load_string(text)
|
||||||
|
pattern = re.compile(sed_args[1])
|
||||||
for message in bot.messages[sender]["messages"]:
|
for history_message in ctx.message_history:
|
||||||
if not re.search(sed_args[1], message):
|
if not pattern.search(history_message):
|
||||||
continue
|
continue
|
||||||
msg = io.StringIO(message)
|
msg = io.StringIO(history_message)
|
||||||
res = "\n".join(sed.apply(msg, None))
|
response = "\n".join(sed.apply(msg, None))
|
||||||
bot.messages[sender]["messages"].add(res)
|
return ctx.reply(response)
|
||||||
return bot.send_message(
|
|
||||||
mto=sender,
|
|
||||||
mbody=res,
|
|
||||||
mtype=mtype,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
# ping command
|
|
||||||
@RegexCmd(bot, re.compile(r"^ping$"))
|
@RegexCmd(bot, re.compile(r"^ping$"))
|
||||||
def ping_command(bot, msg, sender, mtype):
|
def ping_command(ctx: CommandContext):
|
||||||
"""Process ping command."""
|
"""Process ping command."""
|
||||||
bot.send_message(
|
ctx.reply("pong")
|
||||||
mto=sender,
|
|
||||||
mbody="pong",
|
|
||||||
mtype=mtype,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
@RegexCmd(bot, re.compile(r"^https?://"), matcher=default_matcher)
|
||||||
|
def url_command(ctx: CommandContext):
|
||||||
|
"""Process url command."""
|
||||||
|
urls = get_urls(ctx.body)
|
||||||
|
if not urls:
|
||||||
|
return
|
||||||
|
parse_urls(ctx, urls)
|
||||||
|
|
||||||
|
|
||||||
|
# URL parsing
|
||||||
|
|
||||||
|
req_list = ("http://", "https://")
|
||||||
|
|
||||||
|
html_files = ("text/html", "application/xhtml+xml")
|
||||||
|
|
||||||
|
parser = "html.parser"
|
||||||
|
user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0)"
|
||||||
|
" Gecko/20100101 Firefox/10.0"
|
||||||
|
accept_lang = "en-US"
|
||||||
|
data_limit = 100000000 # 100MB
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"user-agent": user_agent,
|
||||||
|
"Accept-Language": accept_lang,
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_urls(body):
|
||||||
|
"""Get urls from a message."""
|
||||||
|
str_list = body.strip().split()
|
||||||
|
urls = [u for u in str_list if any(r in u for r in req_list)]
|
||||||
|
return urls
|
||||||
|
|
||||||
|
def is_private(uri):
|
||||||
|
"""Check if a uri is private."""
|
||||||
|
netloc = uri.netloc
|
||||||
|
try:
|
||||||
|
if ipaddress.ip_address(netloc.split(":")[0]).is_private:
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
def preview_page(ctx: CommandContext, r):
|
||||||
|
data = ""
|
||||||
|
|
||||||
|
for i in r.iter_content(chunk_size=1024, decode_unicode=False):
|
||||||
|
data += i.decode("utf-8", errors="ignore")
|
||||||
|
if len(data) > data_limit or "</head>" in data.lower():
|
||||||
|
break
|
||||||
|
soup = bs4.BeautifulSoup(data, parser)
|
||||||
|
if title := soup.find("title"):
|
||||||
|
output = title.text.strip()
|
||||||
|
if output:
|
||||||
|
output = f"*{output}*" if ("\n" not in output) else output
|
||||||
|
if output in ctx.preview_history:
|
||||||
|
return
|
||||||
|
|
||||||
|
ctx.save_preview_history(output)
|
||||||
|
|
||||||
|
if r.history:
|
||||||
|
ctx.raw_reply(r.url)
|
||||||
|
|
||||||
|
ctx.reply(output)
|
||||||
|
|
||||||
|
def preview_file(ctx: CommandContext, uri, ftype, r):
|
||||||
|
try:
|
||||||
|
lenght = 0
|
||||||
|
outfile = io.BytesIO()
|
||||||
|
for chunk in r.iter_content(
|
||||||
|
chunk_size=512,
|
||||||
|
decode_unicode=False,
|
||||||
|
):
|
||||||
|
lenght += 512
|
||||||
|
if lenght >= data_limit:
|
||||||
|
return
|
||||||
|
outfile.write(chunk)
|
||||||
|
|
||||||
|
content_disposition = r.headers.get("content-disposition")
|
||||||
|
filename = None
|
||||||
|
if content_disposition:
|
||||||
|
_, params = cgi.parse_header(content_disposition)
|
||||||
|
filename = params.get("filename")
|
||||||
|
if params.get("filename*"):
|
||||||
|
filename = params.get("filename*")
|
||||||
|
filename = filename.split("''")[-1]
|
||||||
|
else:
|
||||||
|
filename = os.path.basename(uri.path)
|
||||||
|
|
||||||
|
ext = os.path.splitext(filename)[1] if filename else ".txt"
|
||||||
|
fname = filename if filename else f"file{ext}"
|
||||||
|
ctx.embed_file(ftype, fname, outfile)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
def process_link(ctx: CommandContext, uri):
|
||||||
|
"""Process a link and send the result to the sender."""
|
||||||
|
url = urlunparse(uri)
|
||||||
|
r = requests.get(url, stream=True, headers=headers, timeout=6)
|
||||||
|
if not r.ok:
|
||||||
|
return
|
||||||
|
|
||||||
|
ftype = normalize_mimetype(r.headers.get("content-type"))
|
||||||
|
|
||||||
|
if not ftype:
|
||||||
|
return
|
||||||
|
|
||||||
|
if ftype in html_files:
|
||||||
|
preview_page(ctx, r)
|
||||||
|
else:
|
||||||
|
preview_file(ctx, uri, ftype, r)
|
||||||
|
|
||||||
|
def parse_urls(ctx: CommandContext, urls):
|
||||||
|
"""Parse urls and send the result to the sender."""
|
||||||
|
for u in urls:
|
||||||
|
if u in ctx.link_history:
|
||||||
|
continue
|
||||||
|
ctx.save_link_history(u)
|
||||||
|
uri = urlparse(u)
|
||||||
|
if is_private(uri):
|
||||||
|
continue
|
||||||
|
process_link(ctx, uri)
|
||||||
|
|
||||||
bot.connect()
|
bot.connect()
|
||||||
bot.process(forever=True)
|
bot.process(forever=True)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue