394 lines
16 KiB
Python
394 lines
16 KiB
Python
import re
|
|
import jinja2
|
|
import json
|
|
import pathlib
|
|
from datetime import datetime
|
|
import mistune
|
|
import argparse
|
|
|
|
# Globals
|
|
global_config = {
|
|
"paths": {
|
|
"entries_folder": "entry_data/",
|
|
"templates_folder": "templates/",
|
|
"templates": {
|
|
"blog_entry": "blog_entry.html",
|
|
"overview": "overview.html",
|
|
"tag_overview": "tag_overview.html"
|
|
},
|
|
"generated_folder": "generated/",
|
|
"generated": {
|
|
"overview": "overview.html"
|
|
}
|
|
},
|
|
"date_time": {
|
|
"use_unix_time": False,
|
|
"exclusively_use_unix_time": False,
|
|
"date_ordering": "YMD",
|
|
"date_seperator": "-",
|
|
"show_time": False,
|
|
"time_format": "24h",
|
|
"show_seconds": False,
|
|
"displayed_timezone": None
|
|
},
|
|
"defaults": {
|
|
"author": None,
|
|
"date_time": None
|
|
}
|
|
}
|
|
verbose = False
|
|
opt = dict()
|
|
|
|
|
|
class Entry:
|
|
def __init__(self, href=None, date=None, time=None, author=None, tags=None, content_warnings=None, heading=None,
|
|
html=None):
|
|
self.href = href
|
|
self.date = date
|
|
self.time = time
|
|
self.author = author
|
|
self.tags = tags
|
|
self.content_warnings = content_warnings
|
|
self.heading = heading
|
|
self.html = html
|
|
|
|
def __str__(self):
|
|
return f"Hyperlink: {self.href}, Date: {self.date}, Author: {self.author}, Tags: {self.tags}, " \
|
|
f"Content Warnings: {self.content_warnings}.\nHeading: {self.heading}\nUnformatted text:\n{self.html}"
|
|
|
|
|
|
template_path = pathlib.Path(global_config["paths"]["templates_folder"])
|
|
|
|
jenv = jinja2.Environment(
|
|
loader=jinja2.FileSystemLoader(template_path),
|
|
autoescape=False, trim_blocks=True, lstrip_blocks=True,
|
|
keep_trailing_newline=False)
|
|
|
|
|
|
def render_template(template_name, output_path, **kwargs):
|
|
template = jenv.get_template(template_name)
|
|
with open(output_path, "w") as out_file:
|
|
out_file.write(template.render(**kwargs))
|
|
|
|
|
|
def apply_config():
|
|
try:
|
|
raw_json = open("configs/cgbe.json", "r", encoding="utf-8")
|
|
config_data = json.load(raw_json)
|
|
for cur_dict in config_data.items():
|
|
for dict_item in cur_dict[1].items():
|
|
global_config[cur_dict[0]][dict_item[0]] = dict_item[1]
|
|
if verbose:
|
|
print("The following configuration was found and has been applied:")
|
|
print(global_config)
|
|
|
|
except FileNotFoundError:
|
|
print("ERROR: Config file doesn't exist in expected location.")
|
|
print("Writing new config file.")
|
|
data_to_write = json.dumps(global_config, indent=4)
|
|
write_file = open("configs/cgbe.json", "w", encoding="utf-8")
|
|
write_file.write(data_to_write)
|
|
write_file.close()
|
|
print("SUCCESS: config file written")
|
|
|
|
|
|
def date_time_handling(str_date_time):
|
|
try:
|
|
date_time = datetime.fromisoformat(str_date_time)
|
|
return date_time
|
|
except ValueError:
|
|
print(
|
|
f"ERROR: Provided datetime string invalid. Expected ISO8601 formatted datetime. Received string: "
|
|
f"{str_date_time}")
|
|
if verbose:
|
|
print("Defaulting to provided default time")
|
|
except TypeError:
|
|
print(f"ERROR: Provided datetime isn't a string. Received type: {type(str_date_time)}")
|
|
if verbose:
|
|
print("Defaulting to provided default time")
|
|
if global_config["defaults"]["date_time"]:
|
|
try:
|
|
if global_config["defaults"]["date_time"].lower() == "now":
|
|
date_time = datetime.now()
|
|
elif global_config["defaults"]["date_time"] == "0" or global_config["defaults"]["time"].lower() == "unix_0":
|
|
date_time = datetime.fromtimestamp(0)
|
|
elif global_config["defaults"]["date_time"] is None:
|
|
date_time = None
|
|
if verbose:
|
|
print("Default time was set to None. No Datetime Value will be provided")
|
|
else:
|
|
raise ValueError()
|
|
if verbose and date_time:
|
|
print(
|
|
f"Default time was set to {global_config['defaults']['date_time'].lower()}. New ISO Datetime "
|
|
f"{date_time}")
|
|
except ValueError:
|
|
print(f"Invalid value {str(global_config['defaults']['date_time'])} was provided.")
|
|
|
|
else:
|
|
date_time = None
|
|
if verbose:
|
|
print("No default time was set. Date will be left empty")
|
|
return date_time
|
|
|
|
|
|
def format_datetime(date_time_to_format):
|
|
formatted_date_time = ""
|
|
time_str = ""
|
|
|
|
# handles UNIX timestamps if they are used
|
|
if global_config["date_time"]["use_unix_time"]:
|
|
unix_stamp = str(int(date_time_to_format.timestamp()))
|
|
# first formats it to an int to get rid of floating point
|
|
if global_config["date_time"]["exclusively_use_unix_time"]:
|
|
return unix_stamp
|
|
|
|
# convert given seperator(s) into a list with length 3 (or longer - further elements will be ignored). Those are
|
|
# used in the date formating afterward.
|
|
if type(global_config["date_time"]["date_seperator"]) is list:
|
|
if len(global_config["date_time"]["date_seperator"]) == 2:
|
|
separators = global_config["date_time"]["date_seperator"] + [None]
|
|
else:
|
|
separators = global_config["date_time"]["date_seperator"]
|
|
elif global_config["date_time"]["date_seperator"]:
|
|
separators = [global_config["date_time"]["date_seperator"], global_config["date_time"]["date_seperator"], None]
|
|
else:
|
|
separators = [None, None, None]
|
|
|
|
# formats the datetime object given into this function according to one of the possible orderings, using the
|
|
# previously established separators.
|
|
try:
|
|
if global_config["date_time"]["date_ordering"].lower() == "ymd":
|
|
formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}"
|
|
f"{date_time_to_format.month:02d}{str(separators[1] or '')}"
|
|
f"{date_time_to_format.day:02d}{str(separators[2] or '')}")
|
|
elif global_config["date_time"]["date_ordering"].lower() == "dmy":
|
|
formatted_date_time = (f"{date_time_to_format.day:02d}{str(separators[0] or '')}"
|
|
f"{date_time_to_format.month:02d}{str(separators[1] or '')}"
|
|
f"{date_time_to_format.year:04d}{str(separators[2] or '')}")
|
|
elif global_config["date_time"]["date_ordering"].lower() == "mdy":
|
|
formatted_date_time = (f"{date_time_to_format.month:02d}{str(separators[0] or '')}"
|
|
f"{date_time_to_format.day:02d}{str(separators[1] or '')}"
|
|
f"{date_time_to_format.year:04d}{str(separators[2] or '')}")
|
|
else:
|
|
if type(global_config["date_time"]["date_ordering"]) is str:
|
|
raise ValueError(
|
|
f"ERROR: Date format string of either \"YMD\", \"DMY\" or \"MDV\" was expected. Received "
|
|
f"{global_config['date_time']['date_ordering']}")
|
|
else:
|
|
raise TypeError(
|
|
f"ERROR: Date format wasn't provided as str. Received type: "
|
|
f"{type(global_config['date_time']['date_ordering'])}")
|
|
# TODO: beautify this
|
|
except ValueError:
|
|
print("Falling back to YMD formating.")
|
|
formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}"
|
|
f"{date_time_to_format.month:02d}{str(separators[1] or '')}{date_time_to_format.day:02d}"
|
|
f"{str(separators[2] or '')}")
|
|
except TypeError:
|
|
print("Falling back to YMD formating.")
|
|
formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}"
|
|
f"{date_time_to_format.month:02d}{str(separators[1] or '')}{date_time_to_format.day:02d}"
|
|
f"{str(separators[2] or '')}")
|
|
|
|
if global_config["date_time"]["show_time"]:
|
|
if global_config["date_time"]["show_seconds"]:
|
|
time_str = (f"{date_time_to_format.hour:02d}:{date_time_to_format.minute:02d}:"
|
|
f"{date_time_to_format.second:02d}")
|
|
else:
|
|
time_str = f"{date_time_to_format.hour:02d}:{date_time_to_format.minute:02d}"
|
|
|
|
# TODO: Add check if time was also provided (Regex maybe?)
|
|
|
|
if global_config["date_time"]["displayed_timezone"]:
|
|
formatted_date_time = f"{formatted_date_time} {global_config['date_time']['displayed_timezone']}"
|
|
# TODO: to clean up work here as well
|
|
return formatted_date_time
|
|
|
|
|
|
def format_text(text_to_format):
|
|
# TODO: implement this with more options. For now, it's unused, instead will be processed in collect_entry_data.
|
|
return mistune.html(text_to_format)
|
|
|
|
|
|
def collect_all_blog_combinations():
|
|
folder = pathlib.Path(global_config["paths"]["entries_folder"])
|
|
md_files = set(
|
|
md_file.stem for md_file in folder.glob("*.md")
|
|
)
|
|
json_files = set(
|
|
json_file.stem for json_file in folder.glob("*.json")
|
|
)
|
|
|
|
md_files_without = md_files - json_files
|
|
if md_files_without:
|
|
print("NOTICE: For the following .md files there's .json missing:", md_files_without)
|
|
json_files_without = json_files - md_files
|
|
if json_files_without:
|
|
print("NOTICE: For the following .json files there's .md missing:", json_files_without)
|
|
non_pair_files = md_files_without | json_files_without
|
|
file_pairs = (md_files | json_files) - non_pair_files
|
|
|
|
if verbose:
|
|
print(f"The following file combinations were found and will be used for generation:{file_pairs}")
|
|
|
|
return file_pairs
|
|
|
|
|
|
def collect_entry_data(pair_name):
|
|
# JSON config file meta data loading and formating
|
|
with open(f"{global_config['paths']['entries_folder']}/{pair_name}.json", "r", encoding="utf-8") as raw_json:
|
|
metadata = json.load(raw_json)
|
|
formated_datetime = None
|
|
if metadata["date"]:
|
|
given_datetime = date_time_handling(metadata["date"])
|
|
formated_datetime = format_datetime(given_datetime)
|
|
entry_data = Entry(href=metadata["href"], date=formated_datetime, author=metadata["author"],
|
|
tags=metadata["tags"],
|
|
content_warnings=metadata["content_warnings"])
|
|
|
|
# extracting the entire raw text given
|
|
text_with_heading = open(f"{global_config['paths']['entries_folder']}/{pair_name}.md", "r",
|
|
encoding="utf-8").read()
|
|
|
|
# extracting of the main heading for use as the blog title in generated overviews. If there's a heading remove it
|
|
# from the text to be formated
|
|
with open(f"{global_config['paths']['entries_folder']}/{pair_name}.md", "r", encoding="utf-8") as raw_text:
|
|
if heading_match := re.match(r" {,3}# +(.+)", raw_text.readline()):
|
|
entry_data.heading = heading_match.group(1)
|
|
entry_data.html = mistune.html(text_with_heading[:heading_match.start()] +
|
|
text_with_heading[heading_match.end():])
|
|
else:
|
|
entry_data.html = mistune.html(text_with_heading)
|
|
return entry_data
|
|
|
|
|
|
def collect_tags(metadata):
|
|
# Takes all metadata of blog entries and collects tags and the count of their occurrences. Sorts and returns it.
|
|
found_tag_occurences = {}
|
|
for data in metadata:
|
|
for tag in data.tags:
|
|
if tag in found_tag_occurences:
|
|
found_tag_occurences[tag].append(data)
|
|
else:
|
|
found_tag_occurences[tag] = [data]
|
|
found_tag_occurences = dict(sorted(found_tag_occurences.items(), key=lambda x: len(x[1]), reverse=True))
|
|
return found_tag_occurences
|
|
|
|
|
|
def generate_blog_overview(overview_data, tag_data):
|
|
# generates general overview of the blogs on the page as well as overviews for each tag in use
|
|
render_template(global_config["paths"]["templates"]["overview"], global_config["paths"]["generated"]["overview"],
|
|
blog_data=overview_data, tag_occurences=tag_data, opt=opt)
|
|
|
|
|
|
def generate_tag_overviews(tag_data):
|
|
# generate overviews for each tag
|
|
if tag_data:
|
|
for tag, occurences in tag_data.items():
|
|
render_template("tag_overview.html", f"{global_config['paths']['generated_folder']}tags/{tag}.html", tag=tag,
|
|
occurences=occurences, overview_backlink=global_config["paths"]["generated"]["overview"],
|
|
opt=opt)
|
|
|
|
|
|
def generate_blog_entries(blog_entry_data):
|
|
for entry in blog_entry_data:
|
|
render_template("blog_entry.html", f"{global_config['paths']['generated_folder']}{entry.href}", entry=entry,
|
|
overview_backlink=global_config["paths"]["generated"]["overview"], opt=opt)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
version = ""
|
|
version_date = ""
|
|
try:
|
|
version_history = open("version_history.md")
|
|
split_version_history = version_history.read().split()
|
|
version = split_version_history[4]
|
|
version_date = split_version_history[5][1:-1]
|
|
except FileNotFoundError:
|
|
if verbose:
|
|
print("NOTICE: version_history.md was not found. Perhaps it has been removed or renamed. CGBE will be "
|
|
"unable to display version information")
|
|
# License information and argument parsing
|
|
print(f"""CatGirlBlogEngine (CGBE) {version} - {version_date}
|
|
|
|
Copyright (C) 2025 Lucia Zehentner
|
|
|
|
This program comes with ABSOLUTELY NO WARRANTY;
|
|
for details provide argument "-w".
|
|
This is free software, and you are welcome to redistribute it under certain
|
|
conditions; provide argument "-r" for details.
|
|
The full license can be displayed by providing the "-l" argument.
|
|
For contact data provide argument "-c".
|
|
""")
|
|
parser = argparse.ArgumentParser()
|
|
|
|
# Adding optional argument
|
|
parser.add_argument("-w", "--warranty", help="Display warranty information", action='store_true')
|
|
parser.add_argument("-r", "--redistribution", help="Display conditions of redistribution",
|
|
action='store_true')
|
|
parser.add_argument("-l", "--license", help="Display full license", action='store_true')
|
|
parser.add_argument("-c", "--contact", help="Display contact information", action='store_true')
|
|
parser.add_argument("-v", "--verbose", help="Meow a lot about literally everything!",
|
|
action='store_true')
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
license_text = open("LICENSE", "r").read().split("\n")
|
|
if args.warranty:
|
|
print(license_text[588:619])
|
|
exit(0)
|
|
if args.redistribution:
|
|
print(license_text[153:405])
|
|
exit(0)
|
|
if args.license:
|
|
print(license_text)
|
|
exit(0)
|
|
if args.contact:
|
|
print("""CONTACT ME via
|
|
eMail: mail@luciaa.at
|
|
XMPP: schlecknits@xmpp.yepoleb.at
|
|
Matrix: @schlecknits:chat.ohaa.xyz
|
|
Fedi: @schlecknits@tyrol.social
|
|
|
|
Further contact data available at luciaa.at""")
|
|
exit(0)
|
|
if args.verbose:
|
|
print("NOTICE: Verbose mode activated")
|
|
except FileNotFoundError:
|
|
print("WARNING: LICENSE file missing.")
|
|
|
|
apply_config()
|
|
blog_combinations = collect_all_blog_combinations()
|
|
blog_data = []
|
|
blog_data_without_date = []
|
|
for combination in blog_combinations:
|
|
current_data = collect_entry_data(combination)
|
|
if current_data.date:
|
|
blog_data.append(current_data)
|
|
else:
|
|
blog_data_without_date.append(current_data)
|
|
if args.verbose and blog_data_without_date:
|
|
print(f"NOTICE: The following entries do not contain a date and therefore will not be sorted: "
|
|
f"{blog_data_without_date}")
|
|
blog_data = sorted(blog_data, key=lambda x: x.date, reverse=True)
|
|
for data in blog_data_without_date:
|
|
blog_data.append(data)
|
|
|
|
# TODO: remove the temporary opt assignment and find a more permanent solution
|
|
opt["date"] = format_datetime(datetime.now())
|
|
opt["current_site"] = "blog"
|
|
|
|
if True:
|
|
# TODO: replace "if True" with a configurable variable which determines if tags are used
|
|
# TODO: find out similarity between tags, if two are very similar give out a typo warning
|
|
# print(f"WARNING: Tags {a} and {b} are very similar. This may be a typo.")
|
|
tag_occurences = collect_tags(blog_data)
|
|
if tag_occurences:
|
|
generate_tag_overviews(tag_occurences)
|
|
generate_blog_overview(blog_data, tag_occurences)
|
|
|
|
else:
|
|
generate_blog_overview(blog_data)
|
|
generate_blog_entries(blog_data)
|