import re import jinja2 import json import pathlib from datetime import datetime import mistune import argparse # Globals global_config = { "paths": { "entries_folder": "entry_data/", "templates_folder": "templates/", "templates": { "blog_entry": "blog_entry.html", "overview": "overview.html", "tag_overview": "tag_overview.html" }, "generated_folder": "generated/", "generated": { "overview": "overview.html" } }, "date_time": { "use_unix_time": False, "exclusively_use_unix_time": False, "date_ordering": "YMD", "date_seperator": "-", "show_time": False, "time_format": "24h", "show_seconds": False, "displayed_timezone": None }, "defaults": { "author": None, "date_time": None } } verbose = False opt = dict() class Entry: def __init__(self, href=None, date=None, time=None, author=None, tags=None, content_warnings=None, heading=None, html=None): self.href = href self.date = date self.time = time self.author = author self.tags = tags self.content_warnings = content_warnings self.heading = heading self.html = html def __str__(self): return f"Hyperlink: {self.href}, Date: {self.date}, Author: {self.author}, Tags: {self.tags}, " \ f"Content Warnings: {self.content_warnings}.\nHeading: {self.heading}\nUnformatted text:\n{self.html}" template_path = pathlib.Path(global_config["paths"]["templates_folder"]) jenv = jinja2.Environment( loader=jinja2.FileSystemLoader(template_path), autoescape=False, trim_blocks=True, lstrip_blocks=True, keep_trailing_newline=False) def render_template(template_name, output_path, **kwargs): template = jenv.get_template(template_name) with open(output_path, "w") as out_file: out_file.write(template.render(**kwargs)) def apply_config(): try: raw_json = open("configs/cgbe.json", "r", encoding="utf-8") config_data = json.load(raw_json) for cur_dict in config_data.items(): for dict_item in cur_dict[1].items(): global_config[cur_dict[0]][dict_item[0]] = dict_item[1] if verbose: print("The following configuration was found and has been applied:") print(global_config) except FileNotFoundError: print("ERROR: Config file doesn't exist in expected location.") print("Writing new config file.") data_to_write = json.dumps(global_config, indent=4) write_file = open("configs/cgbe.json", "w", encoding="utf-8") write_file.write(data_to_write) write_file.close() print("SUCCESS: config file written") def date_time_handling(str_date_time): try: date_time = datetime.fromisoformat(str_date_time) return date_time except ValueError: print( f"ERROR: Provided datetime string invalid. Expected ISO8601 formatted datetime. Received string: " f"{str_date_time}") if verbose: print("Defaulting to provided default time") except TypeError: print(f"ERROR: Provided datetime isn't a string. Received type: {type(str_date_time)}") if verbose: print("Defaulting to provided default time") if global_config["defaults"]["date_time"]: try: if global_config["defaults"]["date_time"].lower() == "now": date_time = datetime.now() elif global_config["defaults"]["date_time"] == "0" or global_config["defaults"]["time"].lower() == "unix_0": date_time = datetime.fromtimestamp(0) elif global_config["defaults"]["date_time"] is None: date_time = None if verbose: print("Default time was set to None. No Datetime Value will be provided") else: raise ValueError() if verbose and date_time: print( f"Default time was set to {global_config['defaults']['date_time'].lower()}. New ISO Datetime " f"{date_time}") except ValueError: print(f"Invalid value {str(global_config['defaults']['date_time'])} was provided.") else: date_time = None if verbose: print("No default time was set. Date will be left empty") return date_time def format_datetime(date_time_to_format): formatted_date_time = "" time_str = "" # handles UNIX timestamps if they are used if global_config["date_time"]["use_unix_time"]: unix_stamp = str(int(date_time_to_format.timestamp())) # first formats it to an int to get rid of floating point if global_config["date_time"]["exclusively_use_unix_time"]: return unix_stamp # convert given seperator(s) into a list with length 3 (or longer - further elements will be ignored). Those are # used in the date formating afterward. if type(global_config["date_time"]["date_seperator"]) is list: if len(global_config["date_time"]["date_seperator"]) == 2: separators = global_config["date_time"]["date_seperator"] + [None] else: separators = global_config["date_time"]["date_seperator"] elif global_config["date_time"]["date_seperator"]: separators = [global_config["date_time"]["date_seperator"], global_config["date_time"]["date_seperator"], None] else: separators = [None, None, None] # formats the datetime object given into this function according to one of the possible orderings, using the # previously established separators. try: if global_config["date_time"]["date_ordering"].lower() == "ymd": formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}" f"{date_time_to_format.month:02d}{str(separators[1] or '')}" f"{date_time_to_format.day:02d}{str(separators[2] or '')}") elif global_config["date_time"]["date_ordering"].lower() == "dmy": formatted_date_time = (f"{date_time_to_format.day:02d}{str(separators[0] or '')}" f"{date_time_to_format.month:02d}{str(separators[1] or '')}" f"{date_time_to_format.year:04d}{str(separators[2] or '')}") elif global_config["date_time"]["date_ordering"].lower() == "mdy": formatted_date_time = (f"{date_time_to_format.month:02d}{str(separators[0] or '')}" f"{date_time_to_format.day:02d}{str(separators[1] or '')}" f"{date_time_to_format.year:04d}{str(separators[2] or '')}") else: if type(global_config["date_time"]["date_ordering"]) is str: raise ValueError( f"ERROR: Date format string of either \"YMD\", \"DMY\" or \"MDV\" was expected. Received " f"{global_config['date_time']['date_ordering']}") else: raise TypeError( f"ERROR: Date format wasn't provided as str. Received type: " f"{type(global_config['date_time']['date_ordering'])}") # TODO: beautify this except ValueError: print("Falling back to YMD formating.") formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}" f"{date_time_to_format.month:02d}{str(separators[1] or '')}{date_time_to_format.day:02d}" f"{str(separators[2] or '')}") except TypeError: print("Falling back to YMD formating.") formatted_date_time = (f"{date_time_to_format.year:04d}{str(separators[0] or '')}" f"{date_time_to_format.month:02d}{str(separators[1] or '')}{date_time_to_format.day:02d}" f"{str(separators[2] or '')}") if global_config["date_time"]["show_time"]: if global_config["date_time"]["show_seconds"]: time_str = (f"{date_time_to_format.hour:02d}:{date_time_to_format.minute:02d}:" f"{date_time_to_format.second:02d}") else: time_str = f"{date_time_to_format.hour:02d}:{date_time_to_format.minute:02d}" # TODO: Add check if time was also provided (Regex maybe?) if global_config["date_time"]["displayed_timezone"]: formatted_date_time = f"{formatted_date_time} {global_config['date_time']['displayed_timezone']}" # TODO: to clean up work here as well return formatted_date_time def format_text(text_to_format): # TODO: implement this with more options. For now, it's unused, instead will be processed in collect_entry_data. return mistune.html(text_to_format) def collect_all_blog_combinations(): folder = pathlib.Path(global_config["paths"]["entries_folder"]) md_files = set( md_file.stem for md_file in folder.glob("*.md") ) json_files = set( json_file.stem for json_file in folder.glob("*.json") ) md_files_without = md_files - json_files if md_files_without: print("NOTICE: For the following .md files there's .json missing:", md_files_without) json_files_without = json_files - md_files if json_files_without: print("NOTICE: For the following .json files there's .md missing:", json_files_without) non_pair_files = md_files_without | json_files_without file_pairs = (md_files | json_files) - non_pair_files if verbose: print(f"The following file combinations were found and will be used for generation:{file_pairs}") return file_pairs def collect_entry_data(pair_name): # JSON config file meta data loading and formating with open(f"{global_config['paths']['entries_folder']}/{pair_name}.json", "r", encoding="utf-8") as raw_json: metadata = json.load(raw_json) formated_datetime = None if metadata["date"]: given_datetime = date_time_handling(metadata["date"]) formated_datetime = format_datetime(given_datetime) entry_data = Entry(href=metadata["href"], date=formated_datetime, author=metadata["author"], tags=metadata["tags"], content_warnings=metadata["content_warnings"]) # extracting the entire raw text given text_with_heading = open(f"{global_config['paths']['entries_folder']}/{pair_name}.md", "r", encoding="utf-8").read() # extracting of the main heading for use as the blog title in generated overviews. If there's a heading remove it # from the text to be formated with open(f"{global_config['paths']['entries_folder']}/{pair_name}.md", "r", encoding="utf-8") as raw_text: if heading_match := re.match(r" {,3}# +(.+)", raw_text.readline()): entry_data.heading = heading_match.group(1) entry_data.html = mistune.html(text_with_heading[:heading_match.start()] + text_with_heading[heading_match.end():]) else: entry_data.html = mistune.html(text_with_heading) return entry_data def collect_tags(metadata): # Takes all metadata of blog entries and collects tags and the count of their occurrences. Sorts and returns it. found_tag_occurences = {} for data in metadata: for tag in data.tags: if tag in found_tag_occurences: found_tag_occurences[tag].append(data) else: found_tag_occurences[tag] = [data] found_tag_occurences = dict(sorted(found_tag_occurences.items(), key=lambda x: len(x[1]), reverse=True)) return found_tag_occurences def generate_blog_overview(overview_data, tag_data): # generates general overview of the blogs on the page as well as overviews for each tag in use render_template(global_config["paths"]["templates"]["overview"], global_config["paths"]["generated"]["overview"], blog_data=overview_data, tag_occurences=tag_data, opt=opt) def generate_tag_overviews(tag_data): # generate overviews for each tag if tag_data: for tag, occurences in tag_data.items(): render_template("tag_overview.html", f"{global_config['paths']['generated_folder']}tags/{tag}.html", tag=tag, occurences=occurences, overview_backlink=global_config["paths"]["generated"]["overview"], opt=opt) def generate_blog_entries(blog_entry_data): for entry in blog_entry_data: render_template("blog_entry.html", f"{global_config['paths']['generated_folder']}{entry.href}", entry=entry, overview_backlink=global_config["paths"]["generated"]["overview"], opt=opt) if __name__ == "__main__": version = "" version_date = "" try: version_history = open("version_history.md") split_version_history = version_history.read().split() version = split_version_history[4] version_date = split_version_history[5][1:-1] except FileNotFoundError: if verbose: print("NOTICE: version_history.md was not found. Perhaps it has been removed or renamed. CGBE will be " "unable to display version information") # License information and argument parsing print(f"""CatGirlBlogEngine (CGBE) {version} - {version_date} Copyright (C) 2025 Lucia Zehentner This program comes with ABSOLUTELY NO WARRANTY; for details provide argument "-w". This is free software, and you are welcome to redistribute it under certain conditions; provide argument "-r" for details. The full license can be displayed by providing the "-l" argument. For contact data provide argument "-c". """) parser = argparse.ArgumentParser() # Adding optional argument parser.add_argument("-w", "--warranty", help="Display warranty information", action='store_true') parser.add_argument("-r", "--redistribution", help="Display conditions of redistribution", action='store_true') parser.add_argument("-l", "--license", help="Display full license", action='store_true') parser.add_argument("-c", "--contact", help="Display contact information", action='store_true') parser.add_argument("-v", "--verbose", help="Meow a lot about literally everything!", action='store_true') args = parser.parse_args() try: license_text = open("LICENSE", "r").read().split("\n") if args.warranty: print(license_text[588:619]) exit(0) if args.redistribution: print(license_text[153:405]) exit(0) if args.license: print(license_text) exit(0) if args.contact: print("""CONTACT ME via eMail: mail@luciaa.at XMPP: schlecknits@xmpp.yepoleb.at Matrix: @schlecknits:chat.ohaa.xyz Fedi: @schlecknits@tyrol.social Further contact data available at luciaa.at""") exit(0) if args.verbose: print("NOTICE: Verbose mode activated") except FileNotFoundError: print("WARNING: LICENSE file missing.") apply_config() blog_combinations = collect_all_blog_combinations() blog_data = [] blog_data_without_date = [] for combination in blog_combinations: current_data = collect_entry_data(combination) if current_data.date: blog_data.append(current_data) else: blog_data_without_date.append(current_data) if args.verbose and blog_data_without_date: print(f"NOTICE: The following entries do not contain a date and therefore will not be sorted: " f"{blog_data_without_date}") blog_data = sorted(blog_data, key=lambda x: x.date, reverse=True) for data in blog_data_without_date: blog_data.append(data) # TODO: remove the temporary opt assignment and find a more permanent solution opt["date"] = format_datetime(datetime.now()) opt["current_site"] = "blog" if True: # TODO: replace "if True" with a configurable variable which determines if tags are used # TODO: find out similarity between tags, if two are very similar give out a typo warning # print(f"WARNING: Tags {a} and {b} are very similar. This may be a typo.") tag_occurences = collect_tags(blog_data) if tag_occurences: generate_tag_overviews(tag_occurences) generate_blog_overview(blog_data, tag_occurences) else: generate_blog_overview(blog_data) generate_blog_entries(blog_data)