commit a785270fda92ec283fbc80e94ee4ee03638495d6
Author: The Wobbler <emil@i21k.de>
Date:   Wed Apr 9 17:15:25 2025 +0200

    Got extracting of YAML-frontmatter, headings, tags and wikilinks working.

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d569ea
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+metadata.json
+.idea
\ No newline at end of file
diff --git a/omg.py b/omg.py
new file mode 100644
index 0000000..477dda3
--- /dev/null
+++ b/omg.py
@@ -0,0 +1,205 @@
+#!/usr/bin/python3
+
+import os
+import yaml
+import json
+import collections.abc
+
+
+class OMG:
+    """
+    OMG - Obsidian Metadata Generator
+
+    Generates metadata for markdown files.
+    (Like the Obsidian-plugin "Metadata Extractor" does.)
+    """
+
+    def __init__(self, path: str | os.PathLike) -> None:
+        """
+        Generates metadata for markdown files located at the given path.
+        (Like the Obsidian-plugin "Metadata Extractor" does.)
+
+        :param PathLike path: The path where the markdown files are located
+        """
+
+        self.path = path
+
+        if not self.path[-1] == "/":
+            self.path += "/"
+
+        self.md_files = self._parse_all_files(self.path)
+
+    def _parse_all_files(self, root: str | os.PathLike, path: str | os.PathLike = None) -> list:
+        if path is None:
+            path = root
+
+        md_files = []
+
+        for filename in os.listdir(path):
+            if filename in [".git", ".obsidian"]:  # exclude .git and .obsidian
+                continue
+
+            filepath = path + filename
+
+            if os.path.isdir(filepath):
+                md_files.extend(self._parse_all_files(root, filepath + "/"))  # recurse into subfolders
+                continue
+
+            if not filename.endswith(".md"):  # only parse markdown files
+                continue
+
+            file_metadata = {"fileName": filename[:-3], "relativePath": os.path.relpath(filepath, root)}
+            file_metadata.update(self._parse_yaml_frontmatter(filepath))  # add yaml frontmatter
+
+            file_metadata = recursive_update(file_metadata, self._parse_md_contents(filepath))
+
+            md_files.append(file_metadata)
+
+        return md_files
+
+    def _parse_yaml_frontmatter(self, path: str | os.PathLike) -> dict:
+        file = open(path)
+
+        frontmatter_header = file.read(4)
+        if not frontmatter_header == "---\n":  # file has no frontmatter
+            file.close()
+            return {}
+
+        frontmatter = file.read().split("\n---\n")[0]
+
+        file.close()
+
+        frontmatter_data = yaml.safe_load(frontmatter)
+
+        tags_lower = []
+        for tag in frontmatter_data["tags"]:
+            tags_lower.append(tag.lower())
+
+        frontmatter_data["tags"] = tags_lower
+
+        return frontmatter_data
+
+    def _parse_md_contents(self, path: str | os.PathLike):
+        file = open(path)
+        content = file.read()
+        file.close()
+
+        file_metadata = {
+            "headings": [],
+            "tags": [],
+            "links": []
+        }
+
+        for line in content.split("\n"):
+            if line == "":
+                continue
+
+            # ==== headings ====
+            if line.startswith("#"):  # heading or tag
+                tokens = line.split()
+                hashtags = tokens[0]
+
+                # all chars are "#" and there is text after a space (is definitely a heading)
+                if len(set(hashtags)) == 1 and len(tokens) > 1:
+                    heading_text = line[len(hashtags):].strip()
+                    heading = {
+                        "heading": heading_text,
+                        "level": min(len(hashtags), 6)
+                    }
+
+                    file_metadata["headings"].append(heading)
+
+            # ==== tags ====
+            tags = []
+
+            if " #" in line:
+                tags = line.split(" #")
+                tags = tags[1:]
+
+            if line[0] == "#":
+                if not line[1] in ["#", " "]:
+                    tags.append(line[2:line.find(" #")])
+
+            for tag in tags:
+                if tag[0] in [" ", "#"]:
+                    continue
+
+                tag = tag.strip("#")
+
+                if " " in tag:
+                    tag = tag.split()[0]
+
+                if not tag == "":
+                    file_metadata["tags"].append(tag.lower())
+
+            # ==== wikilinks ====
+            if not "[[" in line or not "]]" in line:
+                continue
+
+            links = line.split("[[")
+
+            for link in links:
+                if not "]]" in link or link.startswith("#"):  # link has no end or is leading to a heading
+                    continue
+
+                link = link.split("]]")[0]
+
+                tokens = link.split("|")  # ["relPath, "link"]
+
+                link = tokens[0].split("/")[-1]
+                relpath = os.path.relpath(os.path.join(path, "../" + tokens[0]), self.path) + ".md"
+
+                link_data = {"link": link, "relativePath": relpath}
+
+                if len(tokens) > 1 and not tokens[1] == link:
+                    link_data["displayText"] = tokens[1]
+
+                file_metadata["links"].append(link_data)
+
+        return file_metadata
+
+    def dump(self, path: str | os.PathLike=None, indent: any=2):
+        if path is None:
+            path = self.path + "metadata.json"
+
+        file = open(path, "w")
+
+        json.dump(self.md_files, file, indent=indent)
+
+        file.close()
+
+
+def recursive_update(a: collections.abc.Mapping, b: collections.abc.Mapping) -> dict:
+    """
+    Improvement of the builtin function ``dict.update()`` which also updates sub-dicts and lists recursively.
+
+    :param dict a: Dict to update
+    :param dict b: Dict containing values to add
+    :return: The updated dict
+    """
+
+    for key, value in b.items():
+        if isinstance(value, collections.abc.Mapping):
+            if key in a:
+                a[key] = recursive_update(a[key], value)
+
+            else:
+                a[key] = value
+
+        elif isinstance(value, list):
+            if key in a:
+                a[key].extend(value)
+
+            else:
+                a[key] = value
+
+        else:
+            a[key] = value
+
+    return a
+
+
+if __name__ == "__main__":
+    bla = OMG("/home/emil/Dokumente/Obsidian/Gulm")
+
+    print(json.dumps(bla.md_files, indent=2))
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4818cc5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+pyyaml
\ No newline at end of file