previewbot/urlpreview.py
2025-05-15 13:51:54 +02:00

39 lines
1.1 KiB
Python

import aiohttp
import html.parser
import asyncio
class TitleExtractor(html.parser.HTMLParser):
def __init__(self):
super().__init__()
self.current_data = None
self.latest_title = None
def handle_data(self, data):
self.current_data = data
def handle_endtag(self, tag):
if tag == "title":
self.latest_title = self.current_data
async def get_preview(url):
session = aiohttp.ClientSession()
session.headers["user-agent"] = "Mozilla/5.0 XmppPreviewer/1.0"
resp = await session.get(url)
title = None
print(url, resp.status)
if resp.headers["content-type"].startswith("text/html"):
parser = TitleExtractor()
while chunk := await resp.content.read(4096):
parser.feed(chunk.decode("utf-8")) # assume utf-8
if parser.latest_title:
title = parser.latest_title
break
resp.close()
await session.close()
return title
if __name__ == "__main__":
async def main():
print(await get_preview("https://youtu.be/YUMaoIt1rDU"))
asyncio.run(main())