Youtube shorts workaround

This commit is contained in:
Gabriel Huber 2025-05-15 14:19:01 +02:00
parent c7d6ef4414
commit e689fa9653

View file

@ -1,6 +1,7 @@
import aiohttp import aiohttp
import html.parser import html.parser
import asyncio import asyncio
import urllib.parse
class TitleExtractor(html.parser.HTMLParser): class TitleExtractor(html.parser.HTMLParser):
def __init__(self): def __init__(self):
@ -16,11 +17,14 @@ class TitleExtractor(html.parser.HTMLParser):
self.latest_title = self.current_data self.latest_title = self.current_data
async def get_preview(url): async def get_preview(url):
url = url.replace("youtube.com/shorts/", "youtube.com/watch?v=")
url_parsed = urllib.parse.urlparse(url)
url_no_fragment = url_parsed._replace(fragment="").geturl()
session = aiohttp.ClientSession() session = aiohttp.ClientSession()
session.headers["user-agent"] = "Mozilla/5.0 XmppPreviewer/1.0" session.headers["user-agent"] = "Mozilla/5.0 XmppPreviewer/1.0"
resp = await session.get(url) resp = await session.get(url_no_fragment)
title = None title = None
print(url, resp.status) print(url_no_fragment, resp.status)
if resp.headers["content-type"].startswith("text/html"): if resp.headers["content-type"].startswith("text/html"):
parser = TitleExtractor() parser = TitleExtractor()
while chunk := await resp.content.read(4096): while chunk := await resp.content.read(4096):
@ -34,6 +38,6 @@ async def get_preview(url):
if __name__ == "__main__": if __name__ == "__main__":
async def main(): async def main():
print(await get_preview("https://youtu.be/YUMaoIt1rDU")) print(await get_preview("https://youtu.be/WNFahAioGP8"))
asyncio.run(main()) asyncio.run(main())