luciaa.at/cgbe.py

394 lines
16 KiB
Python

import re
import jinja2
import json
import pathlib
from datetime import datetime
import mistune
import argparse
# Globals
global_config = {
"paths": {
"entries_folder": "entry_data/",
"templates_folder": "templates/",
"templates": {
"blog_entry": "blog_entry.html",
"overview": "overview.html",
"tag_overview": "tag_overview.html"
},
"generated_folder": "generated/",
"generated": {
"overview": "overview.html"
}
},
"date_time": {
"use_unix_time": False,
"exclusively_use_unix_time": False,
"date_ordering": "YMD",
"date_seperator": "-",
"show_time": False,
"time_format": "24h",
"show_seconds": False,
"displayed_timezone": None
},
"defaults": {
"author": None,
"date_time": None
}
}
verbose = False
opt = dict()
class Entry:
def __init__(self, href=None, date=None, time=None, author=None, tags=None, content_warnings=None, heading=None,
html=None):
self.href = href
self.date = date
self.time = time
self.author = author
self.tags = tags
self.content_warnings = content_warnings
self.heading = heading
self.html = html
def __str__(self):
return f"Hyperlink: {self.href}, Date: {self.date}, Author: {self.author}, Tags: {self.tags}, " \
f"Content Warnings: {self.content_warnings}.\nHeading: {self.heading}\nUnformatted text:\n{self.html}"
template_path = pathlib.Path(global_config["paths"]["templates_folder"])
jenv = jinja2.Environment(
loader=jinja2.FileSystemLoader(template_path),
autoescape=False, trim_blocks=True, lstrip_blocks=True,
keep_trailing_newline=False)
def render_template(template_name, output_path, **kwargs):
template = jenv.get_template(template_name)
with open(output_path, "w") as out_file:
out_file.write(template.render(**kwargs))
def apply_config():
try:
raw_json = open("configs/cgbe.json", "r", encoding="utf-8")
config_data = json.load(raw_json)
for cur_dict in config_data.items():
for dict_item in cur_dict[1].items():
global_config[cur_dict[0]][dict_item[0]] = dict_item[1]
if verbose:
print("The following configuration was found and has been applied:")
print(global_config)
except FileNotFoundError:
print("ERROR: Config file doesn't exist in expected location.")
print("Writing new config file.")
data_to_write = json.dumps(global_config, indent=4)
write_file = open("configs/cgbe.json", "w", encoding="utf-8")
write_file.write(data_to_write)
write_file.close()
print("SUCCESS: config file written")
def date_time_handling(str_date_time):
try:
date_time = datetime.fromisoformat(str_date_time)
return date_time
except ValueError:
print(
f"ERROR: Provided datetime string invalid. Expected ISO8601 formatted datetime. Received string: "
f"{str_date_time}")
if verbose:
print("Defaulting to provided default time")
except TypeError:
print(f"ERROR: Provided datetime isn't a string. Received type: {type(str_date_time)}")
if verbose:
print("Defaulting to provided default time")
if global_config["defaults"]["date_time"]:
try:
if global_config["defaults"]["date_time"].lower() == "now":
date_time = datetime.now()
elif global_config["defaults"]["date_time"] == "0" or global_config["defaults"]["time"].lower() == "unix_0":
date_time = datetime.fromtimestamp(0)
elif global_config["defaults"]["date_time"] is None:
date_time = None
if verbose:
print("Default time was set to None. No Datetime Value will be provided")
else:
raise ValueError()
if verbose and date_time:
print(
f"Default time was set to {global_config['defaults']['date_time'].lower()}. New ISO Datetime "
f"{date_time}")
except ValueError:
print(f"Invalid value {str(global_config['defaults']['date_time'])} was provided.")
else:
date_time = None
if verbose:
print("No default time was set. Date will be left empty")
return date_time
def format_datetime(date_time_to_format):
formatted_date_time = ""
time_str = ""
# handles UNIX timestamps if they are used
if global_config["date_time"]["use_unix_time"]:
unix_stamp = str(int(date_time_to_format.timestamp()))
# first formats it to an int to get rid of floating point
if global_config["date_time"]["exclusively_use_unix_time"]:
return unix_stamp
# convert given seperator(s) into a list with length 3 (or longer - further elements will be ignored). Those are
# used in the date formating afterward.
if type(global_config["date_time"]["date_seperator"]) is list:
if len(global_config["date_time"]["date_seperator"]) == 2:
separators = global_config["date_time"]["date_seperator"] + [None]
else:
separators = global_config["date_time"]["date_seperator"]
elif global_config["date_time"]["date_seperator"]:
separators = [global_config["date_time"]["date_seperator"], global_config["date_time"]["date_seperator"], None]
else:
separators = [None, None, None]
# formats the datetime object given into this function according to one of the possible orderings, using the
# previously established separators.
try:
if global_config["date_time"]["date_ordering"].lower() == "ymd":
formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}"
f"{date_time_to_format.month:02d}{str(separators[1] or '')}"
f"{date_time_to_format.day:02d}{str(separators[2] or '')}")
elif global_config["date_time"]["date_ordering"].lower() == "dmy":
formatted_date_time = (f"{date_time_to_format.day:02d}{str(separators[0] or '')}"
f"{date_time_to_format.month:02d}{str(separators[1] or '')}"
f"{date_time_to_format.year:04d}{str(separators[2] or '')}")
elif global_config["date_time"]["date_ordering"].lower() == "mdy":
formatted_date_time = (f"{date_time_to_format.month:02d}{str(separators[0] or '')}"
f"{date_time_to_format.day:02d}{str(separators[1] or '')}"
f"{date_time_to_format.year:04d}{str(separators[2] or '')}")
else:
if type(global_config["date_time"]["date_ordering"]) is str:
raise ValueError(
f"ERROR: Date format string of either \"YMD\", \"DMY\" or \"MDV\" was expected. Received "
f"{global_config['date_time']['date_ordering']}")
else:
raise TypeError(
f"ERROR: Date format wasn't provided as str. Received type: "
f"{type(global_config['date_time']['date_ordering'])}")
# TODO: beautify this
except ValueError:
print("Falling back to YMD formating.")
formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}"
f"{date_time_to_format.month:02d}{str(separators[1] or '')}{date_time_to_format.day:02d}"
f"{str(separators[2] or '')}")
except TypeError:
print("Falling back to YMD formating.")
formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}"
f"{date_time_to_format.month:02d}{str(separators[1] or '')}{date_time_to_format.day:02d}"
f"{str(separators[2] or '')}")
if global_config["date_time"]["show_time"]:
if global_config["date_time"]["show_seconds"]:
time_str = (f"{date_time_to_format.hour:02d}:{date_time_to_format.minute:02d}:"
f"{date_time_to_format.second:02d}")
else:
time_str = f"{date_time_to_format.hour:02d}:{date_time_to_format.minute:02d}"
# TODO: Add check if time was also provided (Regex maybe?)
if global_config["date_time"]["displayed_timezone"]:
formatted_date_time = f"{formatted_date_time} {global_config['date_time']['displayed_timezone']}"
# TODO: to clean up work here as well
return formatted_date_time
def format_text(text_to_format):
# TODO: implement this with more options. For now, it's unused, instead will be processed in collect_entry_data.
return mistune.html(text_to_format)
def collect_all_blog_combinations():
folder = pathlib.Path(global_config["paths"]["entries_folder"])
md_files = set(
md_file.stem for md_file in folder.glob("*.md")
)
json_files = set(
json_file.stem for json_file in folder.glob("*.json")
)
md_files_without = md_files - json_files
if md_files_without:
print("NOTICE: For the following .md files there's .json missing:", md_files_without)
json_files_without = json_files - md_files
if json_files_without:
print("NOTICE: For the following .json files there's .md missing:", json_files_without)
non_pair_files = md_files_without | json_files_without
file_pairs = (md_files | json_files) - non_pair_files
if verbose:
print(f"The following file combinations were found and will be used for generation:{file_pairs}")
return file_pairs
def collect_entry_data(pair_name):
# JSON config file meta data loading and formating
with open(f"{global_config['paths']['entries_folder']}/{pair_name}.json", "r", encoding="utf-8") as raw_json:
metadata = json.load(raw_json)
formated_datetime = None
if metadata["date"]:
given_datetime = date_time_handling(metadata["date"])
formated_datetime = format_datetime(given_datetime)
entry_data = Entry(href=metadata["href"], date=formated_datetime, author=metadata["author"],
tags=metadata["tags"],
content_warnings=metadata["content_warnings"])
# extracting the entire raw text given
text_with_heading = open(f"{global_config['paths']['entries_folder']}/{pair_name}.md", "r",
encoding="utf-8").read()
# extracting of the main heading for use as the blog title in generated overviews. If there's a heading remove it
# from the text to be formated
with open(f"{global_config['paths']['entries_folder']}/{pair_name}.md", "r", encoding="utf-8") as raw_text:
if heading_match := re.match(r" {,3}# +(.+)", raw_text.readline()):
entry_data.heading = heading_match.group(1)
entry_data.html = mistune.html(text_with_heading[:heading_match.start()] +
text_with_heading[heading_match.end():])
else:
entry_data.html = mistune.html(text_with_heading)
return entry_data
def collect_tags(metadata):
# Takes all metadata of blog entries and collects tags and the count of their occurrences. Sorts and returns it.
found_tag_occurences = {}
for data in metadata:
for tag in data.tags:
if tag in found_tag_occurences:
found_tag_occurences[tag].append(data)
else:
found_tag_occurences[tag] = [data]
found_tag_occurences = dict(sorted(found_tag_occurences.items(), key=lambda x: len(x[1]), reverse=True))
return found_tag_occurences
def generate_blog_overview(overview_data, tag_data):
# generates general overview of the blogs on the page as well as overviews for each tag in use
render_template(global_config["paths"]["templates"]["overview"], global_config["paths"]["generated"]["overview"],
blog_data=overview_data, tag_occurences=tag_data, opt=opt)
def generate_tag_overviews(tag_data):
# generate overviews for each tag
if tag_data:
for tag, occurences in tag_data.items():
render_template("tag_overview.html", f"{global_config['paths']['generated_folder']}tags/{tag}.html", tag=tag,
occurences=occurences, overview_backlink=global_config["paths"]["generated"]["overview"],
opt=opt)
def generate_blog_entries(blog_entry_data):
for entry in blog_entry_data:
render_template("blog_entry.html", f"{global_config['paths']['generated_folder']}{entry.href}", entry=entry,
overview_backlink=global_config["paths"]["generated"]["overview"], opt=opt)
if __name__ == "__main__":
version = ""
version_date = ""
try:
version_history = open("version_history.md")
split_version_history = version_history.read().split()
version = split_version_history[4]
version_date = split_version_history[5][1:-1]
except FileNotFoundError:
if verbose:
print("NOTICE: version_history.md was not found. Perhaps it has been removed or renamed. CGBE will be "
"unable to display version information")
# License information and argument parsing
print(f"""CatGirlBlogEngine (CGBE) {version} - {version_date}
Copyright (C) 2025 Lucia Zehentner
This program comes with ABSOLUTELY NO WARRANTY;
for details provide argument "-w".
This is free software, and you are welcome to redistribute it under certain
conditions; provide argument "-r" for details.
The full license can be displayed by providing the "-l" argument.
For contact data provide argument "-c".
""")
parser = argparse.ArgumentParser()
# Adding optional argument
parser.add_argument("-w", "--warranty", help="Display warranty information", action='store_true')
parser.add_argument("-r", "--redistribution", help="Display conditions of redistribution",
action='store_true')
parser.add_argument("-l", "--license", help="Display full license", action='store_true')
parser.add_argument("-c", "--contact", help="Display contact information", action='store_true')
parser.add_argument("-v", "--verbose", help="Meow a lot about literally everything!",
action='store_true')
args = parser.parse_args()
try:
license_text = open("LICENSE", "r").read().split("\n")
if args.warranty:
print(license_text[588:619])
exit(0)
if args.redistribution:
print(license_text[153:405])
exit(0)
if args.license:
print(license_text)
exit(0)
if args.contact:
print("""CONTACT ME via
eMail: mail@luciaa.at
XMPP: schlecknits@xmpp.yepoleb.at
Matrix: @schlecknits:chat.ohaa.xyz
Fedi: @schlecknits@tyrol.social
Further contact data available at luciaa.at""")
exit(0)
if args.verbose:
print("NOTICE: Verbose mode activated")
except FileNotFoundError:
print("WARNING: LICENSE file missing.")
apply_config()
blog_combinations = collect_all_blog_combinations()
blog_data = []
blog_data_without_date = []
for combination in blog_combinations:
current_data = collect_entry_data(combination)
if current_data.date:
blog_data.append(current_data)
else:
blog_data_without_date.append(current_data)
if args.verbose and blog_data_without_date:
print(f"NOTICE: The following entries do not contain a date and therefore will not be sorted: "
f"{blog_data_without_date}")
blog_data = sorted(blog_data, key=lambda x: x.date, reverse=True)
for data in blog_data_without_date:
blog_data.append(data)
# TODO: remove the temporary opt assignment and find a more permanent solution
opt["date"] = format_datetime(datetime.now())
opt["current_site"] = "blog"
if True:
# TODO: replace "if True" with a configurable variable which determines if tags are used
# TODO: find out similarity between tags, if two are very similar give out a typo warning
# print(f"WARNING: Tags {a} and {b} are very similar. This may be a typo.")
tag_occurences = collect_tags(blog_data)
if tag_occurences:
generate_tag_overviews(tag_occurences)
generate_blog_overview(blog_data, tag_occurences)
else:
generate_blog_overview(blog_data)
generate_blog_entries(blog_data)