Malline:Hakusana/kokoaminen

Wikikirjastosta

Mallineella merkityt hakusanat voi koota hakemistoksi seuraavalla Python-ohjelmalla.

Paketit mwparserfromhell ja pywikibot pitää asentaa ensin.

#!/usr/bin/env python3

import html
import re
import sys

import mwparserfromhell
import pywikibot

usage = f"Käyttö: {sys.argv[0]} <kirjan nimi>"

class Link:
    def __init__(self, target):
        self.text = target
        if target.find("<") != -1:
            self.target = re.sub(r'</?[a-z]+>', '', target)
        else:
            self.target = target

    def __str__(self):
        return f"{self.target}|{self.text}"

    def __lt__(self, other):
        return self.text < other.text


if __name__ == "__main__":
    if len(sys.argv) != 2 or sys.argv[1] == '-h':
        sys.exit(usage)

    book_prefix = sys.argv[1] + "/"

    site = pywikibot.Site('fi', 'wikibooks')
    template = pywikibot.Page(site, "Malline:hakusana")
    index = []
    for page in site.page_embeddedin(template, namespaces=[0], content=True):

        if not page.title().startswith(book_prefix):
            continue

        wikicode = mwparserfromhell.parse(page.text)
        for section in wikicode.get_sections(levels=[2], include_headings=True, include_lead=True):
            first = section.get(0)
            if type(first) == mwparserfromhell.nodes.heading.Heading:
                sect_head = Link(first.title.strip())
            else:
                sect_head = None

            for template in section.ifilter_templates():
                if template.name == "hakusana":
                    keyword = str(template.params[0])
                    keyword = re.sub(r"</?nowiki>", r"", keyword)
                    keyword = html.unescape(keyword)
                    book, book_page = page.title().split("/", maxsplit=1)

                    index.append((keyword, book, Link(book_page), sect_head))

    index.sort()

    prev = None
    for keyword, book, book_page, sect_head in index:
        if re.match(r"[*:#]|('''*)", keyword):
            keyword = f"<nowiki>{keyword}</nowiki>"
        if keyword != prev:
            print(f";{keyword}")
        if sect_head:
            print(f":[[{book}/{book_page}]] » [[{book}/{book_page.target}#{sect_head}]]")
        else:
            print(f":[[{book}/{book_page}]]")
        prev = keyword