2025-04-09 21:31:37 -03:00
|
|
|
from angel import AngelBot, RegexCmd, CommandContext
|
2025-04-07 15:57:52 -03:00
|
|
|
from configparser import ConfigParser
|
|
|
|
|
from PythonSed import Sed
|
2024-02-16 01:56:08 +01:00
|
|
|
import re
|
|
|
|
|
import io
|
2025-06-01 00:44:06 -03:00
|
|
|
from urllib.parse import urlparse, urlunparse
|
2025-04-09 21:31:37 -03:00
|
|
|
from pantomime import normalize_mimetype
|
|
|
|
|
import cgi
|
|
|
|
|
import ipaddress
|
|
|
|
|
import bs4
|
|
|
|
|
import requests
|
|
|
|
|
import os
|
2025-06-01 00:44:06 -03:00
|
|
|
import gusmobile
|
|
|
|
|
import yt_dlp as youtube_dl
|
|
|
|
|
import random
|
2024-02-16 01:56:08 +01:00
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
sed_parse = re.compile('(?<!\\\\)[/#]')
|
|
|
|
|
sed_cmd = re.compile('^s[/#].*[/#].*[/#]')
|
|
|
|
|
url_cmd = re.compile(r'gemini://|https?://')
|
2024-02-16 01:56:08 +01:00
|
|
|
|
2025-04-07 15:57:52 -03:00
|
|
|
config = ConfigParser()
|
2025-06-01 00:44:06 -03:00
|
|
|
config.read('config.ini')
|
|
|
|
|
jid = config['angel']['jid']
|
|
|
|
|
password = config['angel']['password']
|
|
|
|
|
autojoin = config['angel'].get('autojoin', '').split()
|
|
|
|
|
nick = config['angel']['nick']
|
|
|
|
|
youtube_links = config['angel'].get('youtube_links', '').split()
|
|
|
|
|
invidious_instances = config['angel'].get('invidious_instances', '').split()
|
2024-02-16 01:56:08 +01:00
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
bot = AngelBot(jid, password, nick=nick, autojoin=autojoin)
|
2024-02-16 01:56:08 +01:00
|
|
|
|
2025-04-09 21:31:37 -03:00
|
|
|
|
|
|
|
|
def default_matcher(ctx: CommandContext) -> bool:
|
|
|
|
|
if ctx.is_oob:
|
|
|
|
|
return False
|
|
|
|
|
body = ctx.body.lower()
|
2025-06-01 00:44:06 -03:00
|
|
|
return 'nsfw' not in body and 'nsfl' not in body
|
|
|
|
|
|
2025-04-09 21:31:37 -03:00
|
|
|
|
|
|
|
|
@RegexCmd(bot, sed_cmd, block=True)
|
|
|
|
|
def sed_command(ctx: CommandContext):
|
2025-04-07 15:57:52 -03:00
|
|
|
"""Process sed command."""
|
2024-02-16 01:56:08 +01:00
|
|
|
try:
|
2025-04-09 21:31:37 -03:00
|
|
|
text = ctx.body
|
2025-04-07 15:57:52 -03:00
|
|
|
sed_args = sed_parse.split(text)
|
|
|
|
|
sed = Sed()
|
|
|
|
|
sed.load_string(text)
|
2025-04-09 21:31:37 -03:00
|
|
|
pattern = re.compile(sed_args[1])
|
|
|
|
|
for history_message in ctx.message_history:
|
|
|
|
|
if not pattern.search(history_message):
|
2025-04-07 15:57:52 -03:00
|
|
|
continue
|
2025-04-09 21:31:37 -03:00
|
|
|
msg = io.StringIO(history_message)
|
2025-06-01 00:44:06 -03:00
|
|
|
response = '\n'.join(sed.apply(msg, None))
|
2025-04-09 21:31:37 -03:00
|
|
|
return ctx.reply(response)
|
2024-02-16 01:56:08 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
print(e)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
|
|
|
|
@RegexCmd(bot, re.compile(r'^ping$'))
|
2025-04-09 21:31:37 -03:00
|
|
|
def ping_command(ctx: CommandContext):
|
2025-04-07 15:57:52 -03:00
|
|
|
"""Process ping command."""
|
2025-06-01 00:44:06 -03:00
|
|
|
ctx.reply('pong')
|
2024-02-16 01:56:08 +01:00
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
|
|
|
|
@RegexCmd(bot, url_cmd, matcher=default_matcher)
|
2025-04-09 21:31:37 -03:00
|
|
|
def url_command(ctx: CommandContext):
|
|
|
|
|
"""Process url command."""
|
2025-06-01 00:44:06 -03:00
|
|
|
urls = get_urls(ctx.body) + get_gemini_urls(ctx.body)
|
2025-04-09 21:31:37 -03:00
|
|
|
if not urls:
|
|
|
|
|
return
|
|
|
|
|
parse_urls(ctx, urls)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# URL parsing
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
req_list = ('http://', 'https://')
|
2025-04-09 21:31:37 -03:00
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
gemini_links = ('gemini://',)
|
2025-04-09 21:31:37 -03:00
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
html_files = ('text/html', 'application/xhtml+xml', 'text/xml')
|
|
|
|
|
|
|
|
|
|
html_parser = 'html.parser'
|
|
|
|
|
xml_parser = 'xml'
|
|
|
|
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
|
|
|
|
|
' Gecko/20100101 Firefox/10.0'
|
|
|
|
|
accept_lang = 'en-US'
|
2025-04-09 21:31:37 -03:00
|
|
|
data_limit = 100000000 # 100MB
|
|
|
|
|
|
|
|
|
|
headers = {
|
2025-06-01 00:44:06 -03:00
|
|
|
'user-agent': user_agent,
|
|
|
|
|
'Accept-Language': accept_lang,
|
|
|
|
|
'Cache-Control': 'no-cache',
|
2025-04-09 21:31:37 -03:00
|
|
|
}
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
2025-04-09 21:31:37 -03:00
|
|
|
def get_urls(body):
|
|
|
|
|
"""Get urls from a message."""
|
|
|
|
|
str_list = body.strip().split()
|
|
|
|
|
urls = [u for u in str_list if any(r in u for r in req_list)]
|
|
|
|
|
return urls
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
|
|
|
|
def get_gemini_urls(body) -> list[str]:
|
|
|
|
|
"""Get gemini urls from a message."""
|
|
|
|
|
str_list = body.strip().split()
|
|
|
|
|
urls = [u for u in str_list if any(r in u for r in gemini_links)]
|
|
|
|
|
return urls
|
|
|
|
|
|
|
|
|
|
|
2025-04-09 21:31:37 -03:00
|
|
|
def is_private(uri):
|
|
|
|
|
"""Check if a uri is private."""
|
|
|
|
|
netloc = uri.netloc
|
|
|
|
|
try:
|
2025-06-01 00:44:06 -03:00
|
|
|
if ipaddress.ip_address(netloc.split(':')[0]).is_private:
|
2025-04-09 21:31:37 -03:00
|
|
|
return True
|
|
|
|
|
except ValueError:
|
|
|
|
|
pass
|
|
|
|
|
return False
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
|
|
|
|
def preview_page(ctx: CommandContext, r, ftype):
|
|
|
|
|
data = ''
|
2025-04-09 21:31:37 -03:00
|
|
|
|
|
|
|
|
for i in r.iter_content(chunk_size=1024, decode_unicode=False):
|
2025-06-01 00:44:06 -03:00
|
|
|
data += i.decode('utf-8', errors='ignore')
|
|
|
|
|
if len(data) > data_limit or '</head>' in data.lower():
|
2025-04-09 21:31:37 -03:00
|
|
|
break
|
2025-06-01 00:44:06 -03:00
|
|
|
if ftype == 'text/xml':
|
|
|
|
|
soup = bs4.BeautifulSoup(data, xml_parser)
|
|
|
|
|
else:
|
|
|
|
|
soup = bs4.BeautifulSoup(data, html_parser)
|
|
|
|
|
if title := soup.find('title'):
|
2025-04-09 21:31:37 -03:00
|
|
|
output = title.text.strip()
|
|
|
|
|
if output:
|
2025-06-01 00:44:06 -03:00
|
|
|
output = f'*{output}*' if ('\n' not in output) else output
|
2025-04-09 21:31:37 -03:00
|
|
|
if output in ctx.preview_history:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
ctx.save_preview_history(output)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
if r.history and r.url:
|
2025-04-09 21:31:37 -03:00
|
|
|
ctx.raw_reply(r.url)
|
|
|
|
|
|
|
|
|
|
ctx.reply(output)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
2025-04-09 21:31:37 -03:00
|
|
|
def preview_file(ctx: CommandContext, uri, ftype, r):
|
|
|
|
|
try:
|
|
|
|
|
lenght = 0
|
|
|
|
|
outfile = io.BytesIO()
|
|
|
|
|
for chunk in r.iter_content(
|
|
|
|
|
chunk_size=512,
|
|
|
|
|
decode_unicode=False,
|
|
|
|
|
):
|
|
|
|
|
lenght += 512
|
|
|
|
|
if lenght >= data_limit:
|
|
|
|
|
return
|
|
|
|
|
outfile.write(chunk)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
content_disposition = r.headers.get('content-disposition')
|
2025-04-09 21:31:37 -03:00
|
|
|
filename = None
|
|
|
|
|
if content_disposition:
|
|
|
|
|
_, params = cgi.parse_header(content_disposition)
|
2025-06-01 00:44:06 -03:00
|
|
|
filename = params.get('filename')
|
|
|
|
|
if params.get('filename*'):
|
|
|
|
|
filename = params.get('filename*')
|
2025-04-09 21:31:37 -03:00
|
|
|
filename = filename.split("''")[-1]
|
|
|
|
|
else:
|
|
|
|
|
filename = os.path.basename(uri.path)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
ext = os.path.splitext(filename)[1] if filename else '.txt'
|
|
|
|
|
fname = filename if filename else f'file{ext}'
|
2025-04-09 21:31:37 -03:00
|
|
|
ctx.embed_file(ftype, fname, outfile)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(e)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
|
|
|
|
def process_http_url(ctx: CommandContext, uri):
|
2025-04-09 21:31:37 -03:00
|
|
|
"""Process a link and send the result to the sender."""
|
|
|
|
|
url = urlunparse(uri)
|
|
|
|
|
r = requests.get(url, stream=True, headers=headers, timeout=6)
|
|
|
|
|
if not r.ok:
|
|
|
|
|
return
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
ftype = normalize_mimetype(r.headers.get('content-type'))
|
2025-04-09 21:31:37 -03:00
|
|
|
|
|
|
|
|
if not ftype:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
if ftype in html_files:
|
2025-06-01 00:44:06 -03:00
|
|
|
preview_page(ctx, r, ftype)
|
2025-04-09 21:31:37 -03:00
|
|
|
else:
|
|
|
|
|
preview_file(ctx, uri, ftype, r)
|
|
|
|
|
|
2025-06-01 00:44:06 -03:00
|
|
|
|
|
|
|
|
def process_gemini_url(ctx: CommandContext, uri):
|
|
|
|
|
url = urlunparse(uri)
|
|
|
|
|
|
|
|
|
|
response = gusmobile.fetch(url)
|
|
|
|
|
|
|
|
|
|
if not response:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
if response.status != '20':
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
content: str = response.content
|
|
|
|
|
|
|
|
|
|
title: str = content.strip().split('\n', 1)[0].strip()
|
|
|
|
|
|
|
|
|
|
if title:
|
|
|
|
|
ctx.reply(f'*{title.strip("#").strip()}*')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_youtube_url(ctx: CommandContext, uri):
|
|
|
|
|
"""Process a YouTube link and send the result to the sender."""
|
|
|
|
|
url = urlunparse(uri)
|
|
|
|
|
|
|
|
|
|
with youtube_dl.YoutubeDL() as ydl:
|
|
|
|
|
try:
|
|
|
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
|
title = info.get('title', 'No title')
|
|
|
|
|
if invidious_instances:
|
|
|
|
|
instance = random.choice(invidious_instances)
|
|
|
|
|
invidious_url = f'{instance}/watch?v={info["id"]}'
|
|
|
|
|
ctx.raw_reply(invidious_url)
|
|
|
|
|
ctx.reply(f'*{title}*')
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
|
2025-04-09 21:31:37 -03:00
|
|
|
def parse_urls(ctx: CommandContext, urls):
|
|
|
|
|
"""Parse urls and send the result to the sender."""
|
|
|
|
|
for u in urls:
|
|
|
|
|
if u in ctx.link_history:
|
|
|
|
|
continue
|
|
|
|
|
ctx.save_link_history(u)
|
|
|
|
|
uri = urlparse(u)
|
|
|
|
|
if is_private(uri):
|
|
|
|
|
continue
|
2025-06-01 00:44:06 -03:00
|
|
|
if uri.scheme == 'gemini':
|
|
|
|
|
process_gemini_url(ctx, uri)
|
|
|
|
|
elif uri.scheme in ('http', 'https'):
|
|
|
|
|
if any(youtube in u for youtube in youtube_links):
|
|
|
|
|
process_youtube_url(ctx, uri)
|
|
|
|
|
else:
|
|
|
|
|
process_http_url(ctx, uri)
|
|
|
|
|
|
2024-02-16 01:56:08 +01:00
|
|
|
|
2025-04-07 15:57:52 -03:00
|
|
|
bot.connect()
|
|
|
|
|
bot.process(forever=True)
|