61 lines
1.6 KiB
Python
61 lines
1.6 KiB
Python
|
import feedparser
|
||
|
import json
|
||
|
from markdownify import markdownify
|
||
|
import re
|
||
|
import time
|
||
|
|
||
|
|
||
|
def process_html(html: str) -> str:
|
||
|
return re.sub(r"\n{3,}", "\n\n",
|
||
|
re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip()
|
||
|
|
||
|
|
||
|
def split_notes(text: str, limit: int) -> list[str]:
|
||
|
notes = []
|
||
|
|
||
|
prev = 0
|
||
|
while len(text[prev:prev + limit]) >= limit:
|
||
|
sub = text[prev:prev + limit]
|
||
|
|
||
|
i = sub.rfind("\n")
|
||
|
if i == -1:
|
||
|
i = sub.rfind(" ")
|
||
|
|
||
|
notes.append(sub[:i].strip())
|
||
|
prev += i + 1
|
||
|
|
||
|
return notes
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
config_file = open("./config.json", "r")
|
||
|
config = json.load(config_file)
|
||
|
config_file.close()
|
||
|
|
||
|
for url in config.keys():
|
||
|
print("Updating", url)
|
||
|
rss = feedparser.parse(url)
|
||
|
|
||
|
rss.entries.sort(key=lambda entry: entry.published_parsed)
|
||
|
|
||
|
if config[url] is None:
|
||
|
config[url] = 0.0
|
||
|
if time.mktime(rss.entries[-1].published_parsed) <= config[url]:
|
||
|
continue
|
||
|
|
||
|
for entry in rss.entries:
|
||
|
if time.mktime(rss.feed.published_parsed) <= config[url]:
|
||
|
continue
|
||
|
|
||
|
notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html(
|
||
|
entry.content[0].value) + "\n\n" + entry.link, 500)
|
||
|
[print(note) for note in notes]
|
||
|
print()
|
||
|
|
||
|
config[url] = time.mktime(rss.entries[-1].published_parsed)
|
||
|
|
||
|
print("Saving config", config)
|
||
|
config_file = open("./config.json", "w")
|
||
|
json.dump(config, config_file, indent=4)
|
||
|
config_file.close()
|