create index.py modifica

Rende possibile l'indicizzazione dei libri.

#!/usr/bin/env python
#! -*- coding: utf-8 -*-

"""This script takes the links of the page specified by the first argument,
searches the pages for the template {{IndexItem|<arg>}},
and prints the links as index onto the page specified by the second argument."""

__author__ = "Pietrodn (http://it.wikipedia.org/wiki/Utente:Pietrodn)"
__date__ = "19 August 2006"
__version__ = "$ Revision: 1.0 $"
__credits__ = """*My father Giuseppedn (http://it.wikipedia.org/wiki/Utente:Giuseppedn), for the idea.
*Guido van Rossum, for Python, a very good programming language.
*Andre Engels (http://en.wikipedia.org/wiki/User:Andre_Engels), for the pywikipedia framework.
*Jimbo Wales, for the Wikipedia."""

import wikipedia # wikipediabot module
import catlib # support for categories
import re # regex module
import sys

def elaborapagine(pagine):
 """It takes a list of pages and returns a dictionary: key words are the keys;
the lists of pages containing them are values."""
 mydict = dict()
 for pagina in pagine:
  if not(pagina.exists()):
   continue
  # Get the wikicode of the page.
  stringa = pagina.get()
  # Get the key words.
  indexitems = re.findall(u"\{\{IndexItem\|(.*?)\}\}", stringa)
  for i in indexitems:
   chiavi = mydict.keys()
   if i in chiavi:
    mydict[i] = mydict[i]+[pagina.title()]
   else:
    mydict[i] = [pagina.title()]
 return mydict

def generaoutput(mydict):
 """This function formats and prints the dictionary as index
onto the page specified by the second argument."""
 # 'linee' is the lines' list to insert in the index's wikicode.
 linee = list()
 # Iterate the key words.
 for i in mydict:
  pgsforitem = mydict[i]
  linea = u"*%s:" % i
  n = 1
  # Iterate the pages' list containing the key word.
  for k in pgsforitem:
   linea = linea + u" \u00BB[[%s#%s|%u]]" % (k, i, n)
   n = n + 1 
  linee.append(linea)
 linee.sort(caseinsensitivecompare)
 return linee

def caseinsensitivecompare(x,y):
 """Make a case-insensitive sort."""
 return cmp(x.lower(), y.lower())

def main():
 """This is the main function."""
 args = wikipedia.handleArgs()
 if len(args) < 2:
  print "There aren't enough arguments."
  exit()
 mainPage = catlib.Category(wikipedia.getSite(), args[0])
 indexPage = wikipedia.Page(wikipedia.getSite(), args[1])
 if not(mainPage.exists()) or not(indexPage.exists()):
  print "The specified pages don't exist."
  exit()
 # Get the pages of the book from the link of the page specified by the first argument.
 pagine = mainPage.articles()
 mydict = elaborapagine(pagine)
 linee = generaoutput(mydict)
 definitivestring = u''
 for i in linee:
  definitivestring = definitivestring + i + '\n'
 choice = wikipedia.inputChoice(u'Can I override the index page (if it exists)?',  ['Yes', 'No'], ['y', 'N'], 'N')
 if choice in ['Y', 'y']:
  # Set the action
  wikipedia.setAction(u'Making index')
  # This writes the wikitext into the page specified by the second argument.
  indexPage.put(definitivestring)
 wikipedia.stopme()

if __name__ == '__main__': main()

categorizzalibro.py modifica

Aggiunge template e categoria ai moduli di un libro.

# -*- coding: utf-8 -*-

import wikipedia
import re

args = wikipedia.handleArgs()
titololibro = args[0]
for i in wikipedia.Page(wikipedia.getSite(), titololibro).linkedPages():
 if (not i.exists()) or (not titololibro in i.title()) or (u'Copertina' in i.title()):
  continue
 regex = u'.*/(.*)'
 mtch = re.search(regex, i.title())
 if mtch == None:
  continue
 titolomodulo = mtch.group(1)
 oldtext = i.get()
 newtext = u'{{' + titololibro + u'}}\n\n' + oldtext + u'\n\n[[Categoria:' + titololibro + u'|' + titolomodulo + u']]'
 print i
 wikipedia.showDiff(oldtext, newtext)
 choice = wikipedia.inputChoice(u'Posso procedere?',  [u'Yes', u'No'], [u'y', u'N'], u'N')
 if choice in [u'Y', u'y']:
  wikipedia.setAction(u'Template + Categoria')
  i.put(newtext)
wikipedia.stopme()

genera_elenchi_books.py modifica

Aggiorna automaticamente gli elenchi generati offline, accedendo ad un database.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re, wikipedia, catlib, MySQLdb

def main():
    args = wikipedia.handleArgs()
    serialBegin = 1 # Da dove deve cominciare? default=1
    all = False
    database = 'itwikibooks';
    for currentArgument in args:
        if currentArgument.startswith("-db:"):
            database = currentArgument[4:] # Se l'utente dice da dove deve cominciare, lo memorizza
        if currentArgument.startswith("-always"):
            all = True
    
    cat_elenchi = catlib.Category(wikipedia.getSite(code='it', fam='wikibooks'), "Categoria:Elenchi generati offline")
    queryreg = re.compile("\|query\s*=\s*<syntaxhighlight lang=\"sql\">(.*?)</syntaxhighlight>", re.DOTALL)
    elencoreg = re.compile("\|\s*elenco\s*=\s*(.*?)\s*\}\}", re.DOTALL)
    datareg = re.compile("\|\s*data\s*=\s*(.*)")
    db = MySQLdb.connect(host='localhost', user='DumpBot', passwd='', db=database)
    #db.set_character_set('utf8')
    
    for i in cat_elenchi.articles():
        wikipedia.output(">>>>> " + i.title() + " <<<<<")
        oldtxt = i.get()
        
        match = re.search(queryreg, oldtxt)
        query = match.group(1).encode('utf-8')
        wikipedia.output(query)
        
        if not all:
            choice = wikipedia.inputChoice(u"Procedo?",  ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
        else:
            choice = 'y'
        if choice in ['A', 'a']:
            all = True
            choice = 'y'
        if choice in ['Y', 'y']:
            cur = db.cursor()
            cur.execute(query)
            results = cur.fetchall()
            cur.close()
            
            nuovoelenco = ''
            for j in results:
                nuovoelenco = nuovoelenco + j[0] + '\n'
            #wikipedia.output(nuovoelenco)
            newtxt = re.sub(elencoreg, unicode("|elenco=\n" + nuovoelenco + "\n}}", 'utf-8'), oldtxt)

            newtxt = re.sub(datareg, "|data={{subst:CURRENTDAY}} {{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}", newtxt)
            wikipedia.showDiff(oldtxt, newtxt)
            wikipedia.setAction(u'Aggiorno elenco')
            i.put(newtxt)    

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()