A basic utility for indexing gemini capsules
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

217 lines
8.2 KiB

#!/usr/bin/env python3
import argparse
import glob
import os
import os.path
import stat
from itertools import islice
import yaml
gemindex = []
tag_index = []
# extract titles from first heading in file
def get_title(filename):
with open(filename) as fp:
for line in fp:
if line.startswith("#"):
while line[0] == "#":
line = line[1:]
return (line.strip())
# return true if world readable
def is_world_readable(filename):
st = os.stat(filename)
return st.st_mode & stat.S_IROTH
# extract dates from filenames
def get_date(filename):
basename = os.path.basename(filename)
return basename[0:10]
# find path relative to target
def relate_paths(source, target):
return os.path.relpath(target, os.path.dirname(source))
# generate absolute url for posts (to use in feed)
def absolution(url, index_path, post_path):
paths = [index_path, post_path]
common = os.path.commonprefix(paths)
rel = os.path.relpath(post_path, common)
return url + rel
# generate an atom feed from indexes
def atomizer(cap, entries, tag, gemlog=False):
gr = os.path.expanduser(cap['gemroot'])
url = cap['baseURL']
if gemlog:
local_path = gr + os.path.dirname(tag['index'])
gemini_path = url + os.path.dirname(tag['index'])
else:
local_path = gr + tag['folder']
gemini_path = url + tag['folder']
atompath = local_path + "atom.xml"
atomurl = gemini_path + "atom.xml"
feed_header = "<?xml version='1.0' encoding='UTF-8'?>\n<feed xmlns='http://www.w3.org/2005/Atom'>\n"
feed_id = " <id>" + gemini_path + "</id>\n"
feed_title = " <title>" + tag['title'] + "</title>\n"
feed_updated = " <updated>" + entries[0]['date'] + "T00:00:00+00:00" + "</updated>\n"
feed_link = " <link href='" + atomurl + "' rel='self'/>\n <link href='" + gemini_path + "' rel='alternate'/>\n"
feed_generator = " <generator uri='gemini://namu.blue/~mieum/git/twindexer/'>twindexer</generator>\n"
feed_author = " <author>\n <name>" + cap['author'] + "</name>\n <email>" + cap['email'] + "</email>\n <uri>" + url + "</uri>\n </author>\n"
feed_rights = " <rights>" + cap['license'] + "</rights>\n"
feed_preable = [feed_header, feed_id, feed_title, feed_updated, feed_link, feed_author, feed_rights, feed_generator]
print("(twindexer) Writing feed for '" + tag['title'] + "'")
with open(atompath, 'w') as atom:
for line in feed_preable:
atom.write(line)
for entry in islice(entries, 10):
entry_url = absolution(url, gr, entry['path'])
atom.write(" <entry>\n <title>" + entry['title'] + "</title>\n <updated>" + entry['date'] + "T00:00:00+00:00" + "</updated>\n <id>" + entry_url + "</id>\n <link href='" + entry_url + "' rel='alternate'/>\n </entry>\n")
atom.write("</feed>")
atom.close()
# find all world readable gemtext files
def find_files(directory):
files = []
for extension in ("gmi", "gemini"):
glob_pattern = os.path.join(directory, "*.{}".format(extension))
files.extend(glob.glob(glob_pattern))
index = os.path.join(directory, "index.{}".format(extension))
if index in files:
files.remove(index)
return files
# sort dictionaries by date
def sort_time(d):
return d['date']
def inventory_posts(cap, tl):
print("(twindexer) Taking inventory")
gr = os.path.expanduser(cap['gemroot'])
url = cap['baseURL']
for tag in tl:
path = gr + tl[tag]['folder']
posts = find_files(path)
for post in posts:
if is_world_readable(post):
pub_date = get_date(post)
linkline = {'path': path + os.path.basename(post), 'date': pub_date, 'tag': tl[tag]['tag'], 'title': get_title(post)}
gemindex.append(linkline)
tag_index.append(linkline)
else:
continue
# sort and write tag index and clear the list
tag_index.sort(key=sort_time, reverse=True)
write_tag_index(cap, tag_index, tl[tag])
tag_index.clear()
# write indexes for individual tags
def write_tag_index(cap, posts, tag):
gr = os.path.expanduser(cap['gemroot'])
path = gr + tag['folder']
index = gr + tag['index']
print("(twindexer) Writing index for '" + tag['title'] + "'")
with open(index, 'w') as outfile:
if 'header' in tag:
if os.path.isfile(os.path.expanduser(tag['header'])):
with open(os.path.expanduser(tag['header']), 'r') as infile:
outfile.write(infile.read())
else:
outfile.write(tag['header'])
if 'title' in tag:
outfile.write("# " + tag['title'] + "\n\n\n")
for post in posts:
if 'separator' in tag:
sep = tag['separator']
else:
sep = "-"
postpath = relate_paths(index, post['path'])
outfile.write("=> " + postpath + " " + post['date'] + " " + sep + " " + post['title'] + "\n")
if 'feed' in tag:
if tag['feed'] == True:
atomizer(cap, posts, tag)
feed_path = relate_paths(index, os.path.expanduser(path + "atom.xml"))
outfile.write("\n=> " + feed_path + " feed\n")
if 'footer' in tag:
if os.path.isfile(os.path.expanduser(tag['footer'])):
with open(os.path.expanduser(tag['footer'])) as infile:
outfile.write(infile.read())
else:
outfile.write(tag['footer'])
outfile.close()
def write_gemlog_index(cap, gl, tl):
print("(twindexer) Writing index for '" + gl['title'] + "'")
gemindex.sort(key=sort_time, reverse=True)
gr = os.path.expanduser(cap['gemroot'])
gempath = gr + gl['index']
with open(os.path.expanduser(gempath), 'w') as outfile:
if 'header' in gl:
if os.path.isfile(os.path.expanduser(gl['header'])):
with open(os.path.expanduser(gl['header']), 'r') as infile:
outfile.write(infile.read())
else:
outfile.write(gl['header'])
if 'title' in gl:
outfile.write("# " + gl['title'] + "\n\n\n")
outfile.write("Tags:" + "\n")
for tag in tl:
tagpath = relate_paths(gempath, gr + tl[tag]['folder']) + "/"
outfile.write("=> " + tagpath + " " + tl[tag]['tag'] + " " + tl[tag]['title'] + "\n")
outfile.write("\n")
for post in gemindex:
postpath = relate_paths(gempath, post['path'])
outfile.write("=> " + postpath + " " + post['date'] + " " + post['tag'] + " " + post['title'] + "\n")
if 'feed' in gl:
if gl['feed'] == True:
atomizer(cap, gemindex, gl, gemlog=True)
outfile.write("\n=> atom.xml feed\n")
if 'footer' in gl:
if os.path.isfile(os.path.expanduser(gl['footer'])):
with open(os.path.expanduser(gl['footer']), 'r') as infile:
outfile.write(infile.read())
else:
outfile.write(gl['footer'])
outfile.close()
def main():
parser = argparse.ArgumentParser(
description='A simple gemlog indexer')
parser.add_argument('-c', dest='config', type=str, help="specify path of configuration file")
args = parser.parse_args()
if not args.config:
print("""
T W I N D E X E R
^^^^^^^^^^^^^^^^^
A simple tag-based gemlog indexer.
Please specify which configuration
to run using the '-c' flag.
""")
raise SystemExit
else:
if not os.path.isfile(os.path.expanduser(args.config)):
print("Hmm, are you sure that's your config?")
raise SystemExit
else:
with open(os.path.expanduser(args.config)) as c:
config = yaml.safe_load(c)
capsule = config['capsule']
gemlog = config['gemlog']
taglist = config['taglist']
# populate lists with posts, and write tag indexes/feeds
inventory_posts(capsule, taglist)
# sort and write gemlog index/feed
write_gemlog_index(capsule, gemlog, taglist)
print("(twindexer) Done! Capsule is ready for launch!")
if __name__ == '__main__':
main()