fedifeeds/main.py
2024-11-25 00:07:33 +10:00

68 lines
1.8 KiB
Python

import feedparser
import json
from markdownify import markdownify
import re
import sys
import time
def process_html(html: str) -> str:
return re.sub(r"\n{3,}", "\n\n",
re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip()
def split_notes(text: str, limit: int) -> list[str]:
notes = []
prev = 0
while len(text[prev:prev + limit]) >= limit:
sub = text[prev:prev + limit]
i = sub.rfind("\n")
if i == -1:
i = sub.rfind(" ")
notes.append(sub[:i].strip())
prev += i + 1
return notes
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage:", sys.argv[0], "<account> <config>")
exit(1)
account = sys.argv[1]
config_path = sys.argv[2]
config_file = open(config_path, "r")
config = json.load(config_file)
config_file.close()
for url in config.keys():
print("Updating", url)
rss = feedparser.parse(url)
rss.entries.sort(key=lambda entry: entry.published_parsed)
if config[url] is None:
config[url] = 0.0
if time.mktime(rss.entries[-1].published_parsed) <= config[url]:
continue
for entry in rss.entries:
if time.mktime(rss.feed.published_parsed) <= config[url]:
continue
notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html(
entry.content[0].value) + "\n\n" + entry.link, 500)
[print(note) for note in notes]
print()
config[url] = time.mktime(rss.entries[-1].published_parsed)
print("Saving config", config)
config_file = open(config_path, "w")
json.dump(config, config_file, indent=4)
config_file.close()