fedifeeds/main.py

import feedparser
import json
from markdownify import markdownify
import re
import sys
import time


def process_html(html: str) -> str:
    return re.sub(r"\n{3,}", "\n\n",
                  re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip()


def split_notes(text: str, limit: int) -> list[str]:
    notes = []

    prev = 0
    while len(text[prev:prev + limit]) >= limit:
        sub = text[prev:prev + limit]

        i = sub.rfind("\n")
        if i == -1:
            i = sub.rfind(" ")

        notes.append(sub[:i].strip())
        prev += i + 1

    return notes


if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage:", sys.argv[0], "<account> <config>")
        exit(1)

    account = sys.argv[1]
    config_path = sys.argv[2]

    config_file = open(config_path, "r")
    config = json.load(config_file)
    config_file.close()

    for url in config.keys():
        print("Updating", url)
        rss = feedparser.parse(url)

        rss.entries.sort(key=lambda entry: entry.published_parsed)

        if config[url] is None:
            config[url] = 0.0
        if time.mktime(rss.entries[-1].published_parsed) <= config[url]:
            continue

        for entry in rss.entries:
            if time.mktime(rss.feed.published_parsed) <= config[url]:
                continue

            notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html(
                entry.content[0].value) + "\n\n" + entry.link, 500)
            [print(note) for note in notes]
            print()

        config[url] = time.mktime(rss.entries[-1].published_parsed)

    print("Saving config", config)
    config_file = open(config_path, "w")
    json.dump(config, config_file, indent=4)
    config_file.close()