Progetto:Bot/Programmi in Python per i bot/ia.py

Da Wikisource.
#!/usr/bin/python
# -*- coding: utf-8  -*-

#import pywikibot as bot
import re
from internetarchive import *
import pickle
from os import remove
from shutil import copy
session=get_session()

def grab_item(id):
    metadata={}
    sourceItem=get_item(id)
    for m in "title,creator,year,subject,description,source,language,licenseurl,mediatype".split(","):
        if m in sourceItem.metadata:
            metadata[m]=sourceItem.metadata[m]
    return metadata

def uploadPdf(pdf):
##    meta=u'''identifier:sinoalconfineitws
##title:Sino al confine
##creator:Grazia Deledda
##description:Novel by Grazia Deledda, Fratelli Treves Editori, Milano, 1910
##subject:itwikisource item,italian literature,novels
##date:1910
##language:ita
##licenseurl:http://creativecommons.org/publicdomain/mark/1.0/
##mediatype:texts
##collection:opensource'''
    meta=unicode(open("upload.txt").read(),"utf-8")
    metadati={}
    meta=meta.split("\n")
    for i in range(len(meta)):
        m=meta[i].split(":",1)
        metadati[m[0]]=m[1]
        if m[0] in ["subject","creator","language"] and len(m[1].split(","))>1:
            metadati[m[0]]=metadati[m[0]].split(",")
    item=metadati["identifier"]
    del metadati["identifier"]
    
    if get_item(item).exists:
        print "Item ",item, " exists"
        return
    else:
        copy(pdf,item+".pdf")
        print "L'item viene caricato"
        upload(item,item+".pdf",metadati,verbose=True)
        remove(item+".pdf")
        return (item,metadati)

def metadata(item):
    meta=u'''title:La via del male
creator:Grazia Deledda
description:Romanzo di Grazia Deledda, Nuova Antologia, Roma, 1906
subject:itwikisource item,italian literature,novels
date:1906
language:ita
licenseurl:http://creativecommons.org/publicdomain/mark/1.0/
mediatype:texts
collection:opensource'''
    metadati={}
    meta=meta.split("\n")
    for i in range(len(meta)):
        m=meta[i].split(":",1)
        metadati[m[0]]=m[1]
        if m[0] in ["subject","creator","language"] and len(m[1].split(","))>1:
            metadati[m[0]]=metadati[m[0]].split(",")
    if get_item(item).exists:
        print "Modifico i metadati"
        modify_metadata(item,metadati,verbose=True)
        
    else:
        print "L'item non esiste"
    return

def uploadDjvu(id,djvu):
    metadata=grab_item(id)
    newId=id+"_djvu"
    metadata["description"]+='<br>Derived from files into <a href="https://archive.org/details/%s">%s</a>' %(id,metadata["title"])
    upload(newId,djvu,metadata,verbose=True)

    return

def fixMeta():
    t=carica_pcl("listaOpal","")
    for i in t:
        if t[4]=="xxxx":
            place=raw_input(t[2]+": ")
            pl=re.compile(r"%s: *([^,]+), (\d\d\d\d)" % (place))
            for j in range(len(t)):
                break
    return

# restituisce la lista degli items trovati con la query 
def search(query):
    l=[]
    s=search_items(query)
    for i in s:
        l.append(i["identifier"])
    return l

# carica i metadati di uno o più item come lista di dizionari metadata
def grab_metadata(l):
    if type(l)==str or type(l)==unicode:
        l=[l]
    for i in range(len(l)):
        item=get_item(l[i])
        l[i]=item.metadata
    return l

# restituisce i metadati degli items ottenuti con una query
def search_metadata(query):
    l=search(query)
    lm=grab_metadata(l)
    return lm

# riceve un dizionario metadata e lo trastorma in testo editabile utf-8
def dict2txt(metadata):
    testo=[]
    for m in ["identifier","title","description","subject","publisher","city","year"]:
        if m in metadata:
            testo.append(m+":"+(metadata[m] if type(metadata[m])!=list else ",".join(metadata[m])))
                                              
    for m in metadata:
        if not m in ["identifier","title","description","subject","publisher","city",\
                     "year","addeddate","scanner","curation","ocr","publicdate","uploader","ppi","mediatype"]:
            testo.append(m+":"+(metadata[m] if type(metadata[m])!=list else ",".join(metadata[m])))
    return  "\n".join(testo).encode("utf-8")

#riceve un testo utf-8 o unicode tipo dict2txt e lo trasforma in un dizionario tipo metadata
def txt2dict(testo):
    metadata={}
    try:
        if not type(testo)==unicode:
            testo=unicode(testo,"utf-8")
    except:
        print "Il testo deve essere utf-8 o unicode"
        return False

    testo=testo.strip().split("\n")
    for i in range(len(testo)):
        testo[i]=testo[i].split(":",1)
        if len(testo[i])!=2:
            print "Ogni riga deve avere un carattere : "
            return False
        testo[i][0]=testo[i][0].strip()
        testo[i][1]=testo[i][1].strip()
    
        if testo[0] != "description":
            if "," in testo[i][1]:
                testo[i][1]=testo[i][1].split(",")
                for j in range(len(testo[i][1])):
                    testo[i][1][j]=testo[i][1][j].strip()
        metadata[testo[i][0]]=testo[i][1]
    return metadata

                
    
    
    
            
        

    
# utilities 
# Nuova versione, gestisce i tag annidati; x e' la parte "aspecifica" del
# tag di apertura (es: {{ cercando {{Intestazione| )
def find_stringa(stringa,idi,idf,dc=0,x=None,side="left"):
    if side=="right":
        idip=stringa.rfind(idi)
    else:
        idip=stringa.find(idi)
    idfp=stringa.find(idf,idip+len(idi))+len(idf)
    if idip>-1 and idfp>0:
        if x!=None:
            while stringa[idip:idfp].count(x)>stringa[idip:idfp].count(idf):
                if stringa[idip:idfp].count(x)>stringa[idip:idfp].count(idf):
                    idfp=stringa.find(idf,idfp)+len(idf)
                
        if dc==0:
            vvalore=stringa[idip+len(idi):idfp-len(idf)]
        else:
            vvalore=stringa[idip:idfp]
    else:
        vvalore=""
    return vvalore

def produci_lista(testo,idi,idf,dc=1,inizio=None):
    t=testo[:]
    lista=[]
    while not find_stringa(t,idi,idf,1,inizio)=="":
        el=find_stringa(t,idi,idf,1,inizio)
        t=t.replace(el,"",1)
        if dc==0:
            el=find_stringa(el,idi,idf,0,inizio)
        lista.append(el)
    return lista

def carica_pcl(nome_file, folder="dati/"):
    nome_file=folder+nome_file+".pcl"
    f=open(nome_file)
    contenuto=pickle.load(f)
    f.close()
    return contenuto

def salva_pcl(variabile,nome_file="dato",folder="dati/"):
    nome_file=folder+nome_file+".pcl"
    f=open(nome_file,"w")
    pickle.dump(variabile, f)
    f.close()
    print "Variabile salvata nel file "+nome_file
    return 

'{}'