#!/usr/bin/python3 import os import sys import yaml import json import collections.abc class OMG: """ OMG - Obsidian Metadata Generator Generates metadata for markdown files. (Like the Obsidian-plugin "Metadata Extractor" does.) """ def __init__(self, path: str | os.PathLike) -> None: """ Generates metadata for markdown files located at the given path. (Like the Obsidian-plugin "Metadata Extractor" does.) :param PathLike path: The path where the markdown files are located """ self.path = path if not self.path[-1] == "/": self.path += "/" self.md_files = self._parse_all_files(self.path) def _parse_all_files(self, root: str | os.PathLike, path: str | os.PathLike = None) -> list: if path is None: path = root md_files = [] for filename in os.listdir(path): if filename in [".git", ".obsidian"]: # exclude .git and .obsidian continue filepath = path + filename if os.path.isdir(filepath): md_files.extend(self._parse_all_files(root, filepath + "/")) # recurse into subfolders continue if not filename.endswith(".md"): # only parse markdown files continue file_metadata = {"fileName": filename[:-3], "relativePath": os.path.relpath(filepath, root)} file_metadata.update(self._parse_yaml_frontmatter(filepath)) # add yaml frontmatter file_metadata = recursive_update(file_metadata, self._parse_md_contents(filepath)) md_files.append(file_metadata) return md_files def _parse_yaml_frontmatter(self, path: str | os.PathLike) -> dict: file = open(path) frontmatter_header = file.read(4) if not frontmatter_header == "---\n": # file has no frontmatter file.close() return {} frontmatter = file.read().split("\n---\n")[0] file.close() frontmatter_data = yaml.safe_load(frontmatter) tags_lower = [] for tag in frontmatter_data["tags"]: tags_lower.append(tag.lower()) frontmatter_data["tags"] = tags_lower return frontmatter_data def _parse_md_contents(self, path: str | os.PathLike): file = open(path) content = file.read() file.close() file_metadata = { "headings": [], "tags": [], "links": [] } for line in content.split("\n"): if line == "": continue # ==== headings ==== if line.startswith("#"): # heading or tag tokens = line.split() hashtags = tokens[0] # all chars are "#" and there is text after a space (is definitely a heading) if len(set(hashtags)) == 1 and len(tokens) > 1: heading_text = line[len(hashtags):].strip() heading = { "heading": heading_text, "level": min(len(hashtags), 6) } file_metadata["headings"].append(heading) # ==== tags ==== tags = [] if " #" in line: tags = line.split(" #") tags = tags[1:] if line[0] == "#": if not line[1] in ["#", " "]: tags.append(line[2:line.find(" #")]) for tag in tags: if tag[0] in [" ", "#"]: continue tag = tag.strip("#") if " " in tag: tag = tag.split()[0] if not tag == "": file_metadata["tags"].append(tag.lower()) # ==== markdown links ==== if "[" in line and "]" in line and "(" in line and ")" in line: for link in line.split("["): if not "]" in link: continue tokens = link.split("]") display_text = tokens[0] url = tokens[1] if url == "" or not url[0] == "(" or not url[-1] == ")": # link has no proper url enclosure continue url = url[1:-1] url = url.split()[0] link = url.split("/")[-1].strip() relpath = os.path.relpath(os.path.join(path, "../" + url), self.path).strip() if not relpath.endswith(".md"): relpath += ".md" link = {"link": link, "relativePath": relpath, "displayText": display_text} file_metadata["links"].append(link) # ==== wikilinks ==== if not "[[" in line or not "]]" in line: continue links = line.split("[[") for link in links: if not "]]" in link or link.startswith("#"): # link has no end or is leading to a heading continue link = link.split("]]")[0] tokens = link.split("|") # ["relPath, "link"] link = tokens[0].split("/")[-1].strip().replace("\\", "") relpath = os.path.relpath( os.path.join(path, "../" + tokens[0]), self.path ).strip().replace("\\", "") if not relpath.endswith(".md"): relpath += ".md" link_data = {"link": link, "relativePath": relpath} if len(tokens) > 1 and not tokens[1] == link: link_data["displayText"] = tokens[1] file_metadata["links"].append(link_data) return file_metadata def dump(self, path: str | os.PathLike=None, indent: any=2): if path is None: path = self.path + "metadata.json" file = open(path, "w") json.dump(self.md_files, file, indent=indent) file.close() def recursive_update(a: collections.abc.Mapping, b: collections.abc.Mapping) -> dict: """ Improvement of the builtin function ``dict.update()`` which also updates sub-dicts and lists recursively. :param dict a: Dict to update :param dict b: Dict containing values to add :return: The updated dict """ for key, value in b.items(): if isinstance(value, collections.abc.Mapping): if key in a: a[key] = recursive_update(a[key], value) else: a[key] = value elif isinstance(value, list): if key in a: a[key].extend(value) else: a[key] = value else: a[key] = value return a if __name__ == "__main__": if len(sys.argv) > 1: vault_path = sys.argv[1] metadata_path = None if len(sys.argv) > 2: metadata_path = sys.argv[2] metadata = OMG(vault_path) metadata.dump(metadata_path)