radiant2txt (Source)

#! /usr/bin/env python
"Convert a RadiantCMS SQLite3 db file into separate page and header text files"
import optparse, os
op = optparse.OptionParser()
op.add_option("-o", "--out", dest="OUTDIR", default="out")
opts, args = op.parse_args()
import sqlite3
conn = sqlite3.connect(args[0])
conn.row_factory = sqlite3.Row
c = conn.cursor()
import unicodedata
def norm(s):
    return unicodedata.normalize("NFD", s).encode("ascii", "ignore")
import datetime
def date(s):
    return datetime.datetime.strptime(s, "%Y-%m-%d %H:%M:%S").date().isoformat() if s else ""
import textwrap, re
class DocWrapper(textwrap.TextWrapper):
    """Wrap text in a document, processing each paragraph individually"""
    def __init__(self):
        self.tw = textwrap.TextWrapper(width=120, break_long_words=False)
    def wrap(self, text):
        """Override textwrap.TextWrapper to process 'text' properly when
        multiple paragraphs present"""
        para_edge = re.compile(r"(\n\s*\n)", re.MULTILINE)
        paragraphs = para_edge.split(text)
        wrapped_lines = []
        for para in paragraphs:
            if para.isspace():
        return wrapped_lines
dw = DocWrapper()
for page in conn.execute("SELECT * FROM pages"):
    pagename = page["slug"] if page["slug"] != "/" else "index"
    outfile = os.path.join(opts.OUTDIR, "%s.md" % pagename)
    with open(outfile, "w") as f:
        f.write("<!-- \n")
        f.write(".. title: " + norm(page["title"]) + "\n")
        f.write(".. slug: " + pagename + "\n")
        if page["published_at"]:
            f.write(".. date: " + page["published_at"] + "\n")
            f.write(".. date: 2008-06-01 12:00:00\n")
        f.write(".. type: text\n")
        f.write(".. category: blog\n")
        for part in conn.execute("SELECT * FROM page_parts WHERE page_id = ? ORDER BY page_parts.name", (page["id"],)):
            text = dw.fill(norm(part["content"]))
            if text:
                f.write(text + "\n")