Got extracting of YAML-frontmatter, headings, tags and wikilinks working.
This commit is contained in:
commit
a785270fda
3 changed files with 208 additions and 0 deletions
205
omg.py
Normal file
205
omg.py
Normal file
|
@ -0,0 +1,205 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import json
|
||||
import collections.abc
|
||||
|
||||
|
||||
class OMG:
|
||||
"""
|
||||
OMG - Obsidian Metadata Generator
|
||||
|
||||
Generates metadata for markdown files.
|
||||
(Like the Obsidian-plugin "Metadata Extractor" does.)
|
||||
"""
|
||||
|
||||
def __init__(self, path: str | os.PathLike) -> None:
|
||||
"""
|
||||
Generates metadata for markdown files located at the given path.
|
||||
(Like the Obsidian-plugin "Metadata Extractor" does.)
|
||||
|
||||
:param PathLike path: The path where the markdown files are located
|
||||
"""
|
||||
|
||||
self.path = path
|
||||
|
||||
if not self.path[-1] == "/":
|
||||
self.path += "/"
|
||||
|
||||
self.md_files = self._parse_all_files(self.path)
|
||||
|
||||
def _parse_all_files(self, root: str | os.PathLike, path: str | os.PathLike = None) -> list:
|
||||
if path is None:
|
||||
path = root
|
||||
|
||||
md_files = []
|
||||
|
||||
for filename in os.listdir(path):
|
||||
if filename in [".git", ".obsidian"]: # exclude .git and .obsidian
|
||||
continue
|
||||
|
||||
filepath = path + filename
|
||||
|
||||
if os.path.isdir(filepath):
|
||||
md_files.extend(self._parse_all_files(root, filepath + "/")) # recurse into subfolders
|
||||
continue
|
||||
|
||||
if not filename.endswith(".md"): # only parse markdown files
|
||||
continue
|
||||
|
||||
file_metadata = {"fileName": filename[:-3], "relativePath": os.path.relpath(filepath, root)}
|
||||
file_metadata.update(self._parse_yaml_frontmatter(filepath)) # add yaml frontmatter
|
||||
|
||||
file_metadata = recursive_update(file_metadata, self._parse_md_contents(filepath))
|
||||
|
||||
md_files.append(file_metadata)
|
||||
|
||||
return md_files
|
||||
|
||||
def _parse_yaml_frontmatter(self, path: str | os.PathLike) -> dict:
|
||||
file = open(path)
|
||||
|
||||
frontmatter_header = file.read(4)
|
||||
if not frontmatter_header == "---\n": # file has no frontmatter
|
||||
file.close()
|
||||
return {}
|
||||
|
||||
frontmatter = file.read().split("\n---\n")[0]
|
||||
|
||||
file.close()
|
||||
|
||||
frontmatter_data = yaml.safe_load(frontmatter)
|
||||
|
||||
tags_lower = []
|
||||
for tag in frontmatter_data["tags"]:
|
||||
tags_lower.append(tag.lower())
|
||||
|
||||
frontmatter_data["tags"] = tags_lower
|
||||
|
||||
return frontmatter_data
|
||||
|
||||
def _parse_md_contents(self, path: str | os.PathLike):
|
||||
file = open(path)
|
||||
content = file.read()
|
||||
file.close()
|
||||
|
||||
file_metadata = {
|
||||
"headings": [],
|
||||
"tags": [],
|
||||
"links": []
|
||||
}
|
||||
|
||||
for line in content.split("\n"):
|
||||
if line == "":
|
||||
continue
|
||||
|
||||
# ==== headings ====
|
||||
if line.startswith("#"): # heading or tag
|
||||
tokens = line.split()
|
||||
hashtags = tokens[0]
|
||||
|
||||
# all chars are "#" and there is text after a space (is definitely a heading)
|
||||
if len(set(hashtags)) == 1 and len(tokens) > 1:
|
||||
heading_text = line[len(hashtags):].strip()
|
||||
heading = {
|
||||
"heading": heading_text,
|
||||
"level": min(len(hashtags), 6)
|
||||
}
|
||||
|
||||
file_metadata["headings"].append(heading)
|
||||
|
||||
# ==== tags ====
|
||||
tags = []
|
||||
|
||||
if " #" in line:
|
||||
tags = line.split(" #")
|
||||
tags = tags[1:]
|
||||
|
||||
if line[0] == "#":
|
||||
if not line[1] in ["#", " "]:
|
||||
tags.append(line[2:line.find(" #")])
|
||||
|
||||
for tag in tags:
|
||||
if tag[0] in [" ", "#"]:
|
||||
continue
|
||||
|
||||
tag = tag.strip("#")
|
||||
|
||||
if " " in tag:
|
||||
tag = tag.split()[0]
|
||||
|
||||
if not tag == "":
|
||||
file_metadata["tags"].append(tag.lower())
|
||||
|
||||
# ==== wikilinks ====
|
||||
if not "[[" in line or not "]]" in line:
|
||||
continue
|
||||
|
||||
links = line.split("[[")
|
||||
|
||||
for link in links:
|
||||
if not "]]" in link or link.startswith("#"): # link has no end or is leading to a heading
|
||||
continue
|
||||
|
||||
link = link.split("]]")[0]
|
||||
|
||||
tokens = link.split("|") # ["relPath, "link"]
|
||||
|
||||
link = tokens[0].split("/")[-1]
|
||||
relpath = os.path.relpath(os.path.join(path, "../" + tokens[0]), self.path) + ".md"
|
||||
|
||||
link_data = {"link": link, "relativePath": relpath}
|
||||
|
||||
if len(tokens) > 1 and not tokens[1] == link:
|
||||
link_data["displayText"] = tokens[1]
|
||||
|
||||
file_metadata["links"].append(link_data)
|
||||
|
||||
return file_metadata
|
||||
|
||||
def dump(self, path: str | os.PathLike=None, indent: any=2):
|
||||
if path is None:
|
||||
path = self.path + "metadata.json"
|
||||
|
||||
file = open(path, "w")
|
||||
|
||||
json.dump(self.md_files, file, indent=indent)
|
||||
|
||||
file.close()
|
||||
|
||||
|
||||
def recursive_update(a: collections.abc.Mapping, b: collections.abc.Mapping) -> dict:
|
||||
"""
|
||||
Improvement of the builtin function ``dict.update()`` which also updates sub-dicts and lists recursively.
|
||||
|
||||
:param dict a: Dict to update
|
||||
:param dict b: Dict containing values to add
|
||||
:return: The updated dict
|
||||
"""
|
||||
|
||||
for key, value in b.items():
|
||||
if isinstance(value, collections.abc.Mapping):
|
||||
if key in a:
|
||||
a[key] = recursive_update(a[key], value)
|
||||
|
||||
else:
|
||||
a[key] = value
|
||||
|
||||
elif isinstance(value, list):
|
||||
if key in a:
|
||||
a[key].extend(value)
|
||||
|
||||
else:
|
||||
a[key] = value
|
||||
|
||||
else:
|
||||
a[key] = value
|
||||
|
||||
return a
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bla = OMG("/home/emil/Dokumente/Obsidian/Gulm")
|
||||
|
||||
print(json.dumps(bla.md_files, indent=2))
|
Loading…
Add table
Add a link
Reference in a new issue