import feedparser import json from markdownify import markdownify import re import sys import time def process_html(html: str) -> str: return re.sub(r"\n{3,}", "\n\n", re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip() def split_notes(text: str, limit: int) -> list[str]: notes = [] prev = 0 while len(text[prev:prev + limit]) >= limit: sub = text[prev:prev + limit] i = sub.rfind("\n") if i == -1: i = sub.rfind(" ") notes.append(sub[:i].strip()) prev += i + 1 return notes if __name__ == "__main__": if len(sys.argv) < 3: print("Usage:", sys.argv[0], " ") exit(1) account = sys.argv[1] config_path = sys.argv[2] config_file = open(config_path, "r") config = json.load(config_file) config_file.close() for url in config.keys(): print("Updating", url) rss = feedparser.parse(url) rss.entries.sort(key=lambda entry: entry.published_parsed) if config[url] is None: config[url] = 0.0 if time.mktime(rss.entries[-1].published_parsed) <= config[url]: continue for entry in rss.entries: if time.mktime(rss.feed.published_parsed) <= config[url]: continue notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html( entry.content[0].value) + "\n\n" + entry.link, 500) [print(note) for note in notes] print() config[url] = time.mktime(rss.entries[-1].published_parsed) print("Saving config", config) config_file = open(config_path, "w") json.dump(config, config_file, indent=4) config_file.close()