ytitler 0.0.4-pre0

#!/usr/bin/env python3


import re
import aiohttp
import asyncio
import logging


logging.basicConfig(level=logging.DEBUG)


def parse_title(html):
    """
    Fetches title from HTML title tag
    from the given URL. Also removes the
    "- Youtube" suffix.

    :param str url: URL the title is fetched from.
    :return: Parsed video title.
    :rtype: str
    """

    finds = re.search(r"<title>(.+)</title>", html)

    if finds:
        title = finds.group(1)
        parts = title.split("-")
        parts.pop()

        return "-".join(parts).strip()


def remove_duplicities():

    pass


def load_playlist(filename):

    # Load file content.
    with open(filename, "r") as f:
        content = f.readlines()

    # Parse content.
    urls = []

    for l in content:
        if not l.startswith("#"):
            urls.append(l.strip())

    return urls


def to_chunks(urls):
    """
    Splits URL list into smaller lists
    where each (a chunk) has 10 items.

    :param list urls: List of URLs.
    :return: List of lists (chunks).
    :rtype: list
    """

    in_bulk = 10

    for i in range(0, len(urls), in_bulk):
        yield urls[i:i + in_bulk]


async def fetch(chunks):

    async def fetch_one(session, url):

        async with session.get(url) as response:
            if 200 == response.status:
                title = parse_title(await response.text())

                logging.debug(f"URL {url} scraped.")

                return title

    async def fetch_all(session, chunks):
        """
        Fetches all chunks.

        :param aiohttp.ClientSession() session: Aiohttp session.
        :param list chunks: List of lists with URLs.
        :return: A list of fetched titles.
        :rtype: list
        """

        titles = []

        for i, ch in enumerate(chunks):

            logging.debug(f"{'-' * 10} CHUNK {i} {'-' * 10}")

            # Create a task for each URL and gather the bulk result into a list.
            titles.extend(await asyncio.gather(*[fetch_one(session, url) for url in ch]))

            # Security slow down.
            await asyncio.sleep(0.5)

        return titles

    # for b in bulks:
    async with aiohttp.ClientSession() as session:
        titles = await fetch_all(session, chunks)
        print(titles)


urls = load_playlist("../youtube_streams.m3u")
urls = to_chunks(urls)
asyncio.run(fetch(urls))