Add RSS reading functionality

This commit is contained in:
pancakes 2024-10-15 18:56:14 +10:00
commit 5c65cab619
No known key found for this signature in database
GPG key ID: ED53D426432B861B
10 changed files with 118 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.json

3
.idea/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

4
.idea/encodings.xml Normal file
View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with BOM under Windows, with no BOM otherwise" />
</project>

View file

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

10
.idea/mastofeeds.iml Normal file
View file

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

4
.idea/misc.xml Normal file
View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (mastofeeds)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/mastofeeds.iml" filepath="$PROJECT_DIR$/.idea/mastofeeds.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

60
main.py Normal file
View file

@ -0,0 +1,60 @@
import feedparser
import json
from markdownify import markdownify
import re
import time
def process_html(html: str) -> str:
return re.sub(r"\n{3,}", "\n\n",
re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip()
def split_notes(text: str, limit: int) -> list[str]:
notes = []
prev = 0
while len(text[prev:prev + limit]) >= limit:
sub = text[prev:prev + limit]
i = sub.rfind("\n")
if i == -1:
i = sub.rfind(" ")
notes.append(sub[:i].strip())
prev += i + 1
return notes
if __name__ == "__main__":
config_file = open("./config.json", "r")
config = json.load(config_file)
config_file.close()
for url in config.keys():
print("Updating", url)
rss = feedparser.parse(url)
rss.entries.sort(key=lambda entry: entry.published_parsed)
if config[url] is None:
config[url] = 0.0
if time.mktime(rss.entries[-1].published_parsed) <= config[url]:
continue
for entry in rss.entries:
if time.mktime(rss.feed.published_parsed) <= config[url]:
continue
notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html(
entry.content[0].value) + "\n\n" + entry.link, 500)
[print(note) for note in notes]
print()
config[url] = time.mktime(rss.entries[-1].published_parsed)
print("Saving config", config)
config_file = open("./config.json", "w")
json.dump(config, config_file, indent=4)
config_file.close()

16
requirements.txt Normal file
View file

@ -0,0 +1,16 @@
beautifulsoup4==4.12.3
blurhash==1.1.4
certifi==2024.8.30
charset-normalizer==3.4.0
decorator==5.1.1
feedparser==6.0.11
idna==3.10
markdownify==0.13.1
Mastodon.py==1.8.1
python-dateutil==2.9.0.post0
python-magic==0.4.27
requests==2.32.3
sgmllib3k==1.0.0
six==1.16.0
soupsieve==2.6
urllib3==2.2.3