commit 5c65cab619cacc7e8dd6a0c8a5cf9605a48e6727 Author: pancakes Date: Tue Oct 15 18:56:14 2024 +1000 Add RSS reading functionality diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a6c57f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.json diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..df87cf9 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/mastofeeds.iml b/.idea/mastofeeds.iml new file mode 100644 index 0000000..2c80e12 --- /dev/null +++ b/.idea/mastofeeds.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..ab1e4fb --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a56992e --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..f92a9c4 --- /dev/null +++ b/main.py @@ -0,0 +1,60 @@ +import feedparser +import json +from markdownify import markdownify +import re +import time + + +def process_html(html: str) -> str: + return re.sub(r"\n{3,}", "\n\n", + re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip() + + +def split_notes(text: str, limit: int) -> list[str]: + notes = [] + + prev = 0 + while len(text[prev:prev + limit]) >= limit: + sub = text[prev:prev + limit] + + i = sub.rfind("\n") + if i == -1: + i = sub.rfind(" ") + + notes.append(sub[:i].strip()) + prev += i + 1 + + return notes + + +if __name__ == "__main__": + config_file = open("./config.json", "r") + config = json.load(config_file) + config_file.close() + + for url in config.keys(): + print("Updating", url) + rss = feedparser.parse(url) + + rss.entries.sort(key=lambda entry: entry.published_parsed) + + if config[url] is None: + config[url] = 0.0 + if time.mktime(rss.entries[-1].published_parsed) <= config[url]: + continue + + for entry in rss.entries: + if time.mktime(rss.feed.published_parsed) <= config[url]: + continue + + notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html( + entry.content[0].value) + "\n\n" + entry.link, 500) + [print(note) for note in notes] + print() + + config[url] = time.mktime(rss.entries[-1].published_parsed) + + print("Saving config", config) + config_file = open("./config.json", "w") + json.dump(config, config_file, indent=4) + config_file.close() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..defb310 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +beautifulsoup4==4.12.3 +blurhash==1.1.4 +certifi==2024.8.30 +charset-normalizer==3.4.0 +decorator==5.1.1 +feedparser==6.0.11 +idna==3.10 +markdownify==0.13.1 +Mastodon.py==1.8.1 +python-dateutil==2.9.0.post0 +python-magic==0.4.27 +requests==2.32.3 +sgmllib3k==1.0.0 +six==1.16.0 +soupsieve==2.6 +urllib3==2.2.3