import re
import aiohttp
import asyncio
import logging
logging.basicConfig(level=logging.DEBUG)
def parse_title(html):
finds = re.search(r"<title>(.+)</title>", html)
if finds:
title = finds.group(1)
parts = title.split("-")
parts.pop()
return "-".join(parts).strip()
def remove_duplicities():
pass
def load_playlist(filename):
with open(filename, "r") as f:
content = f.readlines()
urls = []
for l in content:
if not l.startswith("#"):
urls.append(l.strip())
return urls
def to_chunks(urls):
in_bulk = 10
for i in range(0, len(urls), in_bulk):
yield urls[i:i + in_bulk]
async def fetch(chunks):
async def fetch_one(session, url):
async with session.get(url) as response:
if 200 == response.status:
title = parse_title(await response.text())
logging.debug(f"URL {url} scraped.")
return title
async def fetch_all(session, chunks):
titles = []
for i, ch in enumerate(chunks):
logging.debug(f"{'-' * 10} CHUNK {i} {'-' * 10}")
titles.extend(await asyncio.gather(*[fetch_one(session, url) for url in ch]))
await asyncio.sleep(0.5)
return titles
async with aiohttp.ClientSession() as session:
titles = await fetch_all(session, chunks)
print(titles)
urls = load_playlist("../youtube_streams.m3u")
urls = to_chunks(urls)
asyncio.run(fetch(urls))