Add RSS reading functionality
This commit is contained in:
commit
5c65cab619
10 changed files with 118 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
*.json
|
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
4
.idea/encodings.xml
Normal file
4
.idea/encodings.xml
Normal file
|
@ -0,0 +1,4 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding" addBOMForNewFiles="with BOM under Windows, with no BOM otherwise" />
|
||||
</project>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
10
.idea/mastofeeds.iml
Normal file
10
.idea/mastofeeds.iml
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
|
@ -0,0 +1,4 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (mastofeeds)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/mastofeeds.iml" filepath="$PROJECT_DIR$/.idea/mastofeeds.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
60
main.py
Normal file
60
main.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
import feedparser
|
||||
import json
|
||||
from markdownify import markdownify
|
||||
import re
|
||||
import time
|
||||
|
||||
|
||||
def process_html(html: str) -> str:
|
||||
return re.sub(r"\n{3,}", "\n\n",
|
||||
re.sub(r"\\(\W)", r"\1", re.sub(r"^(\s*)\* ", r"\1- ", markdownify(html), 0, re.MULTILINE))).strip()
|
||||
|
||||
|
||||
def split_notes(text: str, limit: int) -> list[str]:
|
||||
notes = []
|
||||
|
||||
prev = 0
|
||||
while len(text[prev:prev + limit]) >= limit:
|
||||
sub = text[prev:prev + limit]
|
||||
|
||||
i = sub.rfind("\n")
|
||||
if i == -1:
|
||||
i = sub.rfind(" ")
|
||||
|
||||
notes.append(sub[:i].strip())
|
||||
prev += i + 1
|
||||
|
||||
return notes
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config_file = open("./config.json", "r")
|
||||
config = json.load(config_file)
|
||||
config_file.close()
|
||||
|
||||
for url in config.keys():
|
||||
print("Updating", url)
|
||||
rss = feedparser.parse(url)
|
||||
|
||||
rss.entries.sort(key=lambda entry: entry.published_parsed)
|
||||
|
||||
if config[url] is None:
|
||||
config[url] = 0.0
|
||||
if time.mktime(rss.entries[-1].published_parsed) <= config[url]:
|
||||
continue
|
||||
|
||||
for entry in rss.entries:
|
||||
if time.mktime(rss.feed.published_parsed) <= config[url]:
|
||||
continue
|
||||
|
||||
notes = split_notes("[" + entry.title + "](" + entry.link + ")\n" + process_html(
|
||||
entry.content[0].value) + "\n\n" + entry.link, 500)
|
||||
[print(note) for note in notes]
|
||||
print()
|
||||
|
||||
config[url] = time.mktime(rss.entries[-1].published_parsed)
|
||||
|
||||
print("Saving config", config)
|
||||
config_file = open("./config.json", "w")
|
||||
json.dump(config, config_file, indent=4)
|
||||
config_file.close()
|
16
requirements.txt
Normal file
16
requirements.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
beautifulsoup4==4.12.3
|
||||
blurhash==1.1.4
|
||||
certifi==2024.8.30
|
||||
charset-normalizer==3.4.0
|
||||
decorator==5.1.1
|
||||
feedparser==6.0.11
|
||||
idna==3.10
|
||||
markdownify==0.13.1
|
||||
Mastodon.py==1.8.1
|
||||
python-dateutil==2.9.0.post0
|
||||
python-magic==0.4.27
|
||||
requests==2.32.3
|
||||
sgmllib3k==1.0.0
|
||||
six==1.16.0
|
||||
soupsieve==2.6
|
||||
urllib3==2.2.3
|
Loading…
Reference in a new issue