#!/usr/bin/python3 """ A custom CGI for searching pelican blogs with suitable templates using xapian. To create the index: env BLOG_DIR=path_to_blog blogsearch Note that the database will be created within the document dir and is thus world-readable with common setups. I don't consider that a problem. Make sure BLOG_DIR is also set in whatever executes the CGI (e.g., using SetEnv in apache). To have persistent indexes only updated as necessary, have DELETE_OUTPUT_DIRECTORY = False in pelican's publishconf.py. You'll also need the articlemtime.py plugin for incremental index updates. See https://blog-g.vo.org/a-local-search-engine-for-pelican-based-blogs-en.html for more deployment hints. Dependencies: python3-bs4, python3-xapian, python3-lxml """ import cgi import fnmatch import functools import json import os import re import sys import bs4 import xapian IS_CGI = "SERVER_SOFTWARE" in os.environ DB_NAME = ".xapian_db" ################ Micro templating start def escapePCDATA(val): if val is None: return "" return str(val ).replace("&", "&" ).replace('<', '<' ).replace('>', '>' ).replace("\0", "&x00;") def escapeAttrVal(val): """returns val with escapes for double-quoted attribute values. """ if val is None: return '""' return escapePCDATA(val).replace('"', '"') class Template(object): """a *very* basic and ad-hoc template engine. It works on HTML strings, with the following constructs expanded: * $[key] -- value for key, escaped for double-quoted att values * $(key) -- value for key, escaped for PCDATA * $|func| -- replace with the value of func(vars) * $!raw! -- value for key, non-escaped (other template ops are expanded) * $$ -- a $ char. Use either unicode strings or plain ASCII """ def __init__(self, source): self.source = str(source) def render(self, vars): """returns a string with the template filled using vars. vars is a dictionary mapping keys to unicode-able objects. You'll get back a unicode string that you must encode before spitting it out to the web. """ return re.sub(r"\$\$", "$", re.sub(r"\$\|([a-zA-Z0-9_]+)\|", lambda mat: globals()[mat.group(1)](vars), re.sub(r"\$$([a-zA-Z0-9_]+)$", lambda mat: escapePCDATA(vars.get(mat.group(1), "")), re.sub(r"\$\[([a-zA-Z0-9_]+)\]", lambda mat: escapeAttrVal(vars.get(mat.group(1), "")), re.sub(r"\$!([a-zA-ZÄÖÜäöüß0-9_]+)!", lambda mat: str(vars.get(mat.group(1), "")), self.source))))) def serve(self, vars): """emits a basic CGI response for this template. """ payload = self.render(vars ).replace("\n", "\r\n").encode("utf-8") sys.stdout.buffer.write(( "content-type: text/html;charset=utf-8\r\n" "content-length: %d\r\n\r\n"%len(payload)).encode("ascii")) sys.stdout.buffer.write(payload) ################ My templates, pre-compiled RESP_TEMPLATE = Template(""" Search...

$(title)

Failed

Sorry

Whatever you tried, it didn't work out:

$(msg)

Nothing, sorry. " '' 'Use wildcards or go to the main page.