User:Red panda bot/source

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, unicode_literals

import base64
import hashlib
import io
import re
import json

import pywikibot
from pywikibot import config, textlib
from pywikibot.comms.http import fetch
from pywikibot.specialbots import UploadRobot
from pywikibot.tools import PY2

try:
    import flickrapi  # see: http://stuvel.eu/projects/flickrapi
except ImportError as e:
    flickrapi = e
from scripts import flickrripper

if not PY2:
    from urllib.parse import urlencode
else:
    from urllib import urlencode
#import xml
    
#print(xml.etree.ElementTree.dump(photoInfo.find('photo')))
#print(xml.etree.ElementTree.dump(photoInfo.find('photo').find('tags')))

#result=[]
#for tag in photoInfo.find('photo').find('tags').findall('tag'):
#    result.append(tag.attrib['raw'].lower())
#print(result)

#print(flickrripper.getFilename(photoInfo))
#print(dir(flickrripper))

#see https://www.flickr.com/services/api/flickr.interestingness.getList.html
def interestingness(flickr, date, per_page='1'):
    json_bytes = flickr.do_flickr_call('flickr.interestingness.getList',
        date = date,
        format = 'json',
        extras = 'license',
        per_page = per_page)
    json_str = json_bytes.decode('UTF-8')
    return json.loads(json_str)
    
def insertTags(insert_str, str, pos):
    '''插入字符
    @str 原始字符串
    @insert_str 要插入的字符串
    @pos 要插入的位置(在该位置之前插入)
    '''
    str_list = list(str)
    nPos = str_list.index(pos)
    str_list.insert(nPos, insert_str)
    return "".join(str_list)
 
def isAllowedLicense(license):
    """
    Fork flickrripper.py
    
    Check if the image contains the right license.

    """
    if flickrripper.flickr_allowed_license[int(license)]:
        return True
    else:
        return False
        
def getFlinfoDescription(photo_id):
    """
    Fork flickrripper.py
    
    Get the description from http://wikipedia.ramselehof.de/flinfo.php.

    TODO: Add exception handling, try a couple of times
    """
    parameters = urlencode({'id': photo_id, 'raw': 'on'})

    return fetch(
        'https://tools.wmflabs.org/redpanda/flinfo/flinfo.php?%s' % parameters).text

    
    
def buildDescription(photoInfo, flinfoDescription='', flickrreview=False, reviewer='',
                     addCategory=''):
    """Fork flickrripper.py
    
    Build the final description for the image.

    The description is based on the info from flickrinfo and improved.

    """
    description = flinfoDescription
    datetaken = re.search( r'\|Date=(.*)\n',description).group(1)
    if datetaken:
        datetaken = '{{Taken on|%s}}' % (datetaken)
        description = re.sub(r'\|Date=.*\n', "|Date=%s\n" % (datetaken), description)
    if flickrreview:
        if reviewer:
            description = description.replace(
                '{{flickrreview}}',
                '{{flickrreview|%s|'
                '{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-'
                '{{subst:CURRENTDAY2}}}}' % reviewer)
    if addCategory:
        #description = description.replace('{{subst:unc}}\n', '')
        description = description + addCategory
    if '{{subst:unc}}' not in description:
        # Request category check
        description = description + '{{subst:chc}}\n'
    tags = flickrripper.getTags(photoInfo)
    if tags:
        tags_str = '</code>, <code>'.join(tags)
        tags_str = '|other_fields={{Information field|Name=Flickr tags|Value=<code>%s</code>}}\n' % tags_str
        #re_str = re.search( r'\|other_versions=.*?\n}}',description).group()
        #insertTags(tags_str, re_str, '}')
        description = re.sub(r'\|other_versions=.*?\n}}', "|other_versions=\n%s}}" % (tags_str), description)
        description = description.replace('\r\n', '\n')
    return description
    
#description = buildDescription(photoInfo, flinfoDescription=flickrripper.getFlinfoDescription(photo_id='495671756'), flickrreview=False, reviewer='',
#                     addCategory='[[Category:Flickr files uploaded by ' + config.usernames['commons']['commons'] + ']]\n[[Category:Photos in Flickr Explore]]\n')
    
#print(description) 
    
def processPhoto(flickr, license, photo_id='', flickrreview=False, reviewer='',
                 addCategory=''):
    """Fork flickrripper.py
    
    Process a single Flickr photo.

    For each image:
      * Check the license
      * Check if it isn't already on Commons
      * Build suggested filename
        * Check for name collision and maybe alter it
      * Pull description from Flinfo
      * Show image and description to user
        * Add a nice hotcat lookalike for the adding of categories
        * Filter the categories
      * Upload the image
    """
    if isAllowedLicense(license):
        #print(license)
        site = pywikibot.Site('commons', 'commons')
        if photo_id:
            pywikibot.output(str(photo_id))
            (photoInfo, photoSizes) = flickrripper.getPhoto(flickr, photo_id)
        # Get the url of the largest photo
        photoUrl = flickrripper.getPhotoUrl(photoSizes)
        # Should download the photo only once
        photo = flickrripper.downloadPhoto(photoUrl)

        # Don't upload duplicate images, should add override option
        duplicates = flickrripper.findDuplicateImages(photo)
        if duplicates:
            pywikibot.output('Found duplicate image at {}'
                             .format(duplicates.pop()))
            #filepage = pywikibot.Page(site, u"File:"+flickrripper.getFilename(photoInfo))
            #filepage_text = filepage.text
            #if '[[Category:Photos in Flickr Explore]]' not in filepage_text:
            #    filepage.text = filepage_text + '[[Category:Photos in Flickr Explore]]\n'
            #    filepage.save(u"[[User:Red panda bot|BOT]]: + [[Category:Photos in Flickr Explore]]")
        else:
            filename = flickrripper.getFilename(photoInfo)
            flinfoDescription = getFlinfoDescription(photo_id)
            photoDescription = buildDescription(photoInfo, flinfoDescription,
                                                flickrreview, reviewer,
                                                addCategory)
            #pywikibot.output(photoDescription)
            pywikibot.output(filename)
            bot = UploadRobot(photoUrl,
                              description=photoDescription,
                              useFilename=filename,
                              keepFilename=True,
                              verifyDescription=False,
                              uploadByUrl=True)
            bot.upload_image(debug=False)
            if license == '7':
                page = pywikibot.Page(site, u"User:Red panda bot/license7")
                page_text = page.text
                page.text = page_text + u"* [[:File:" + filename + ']]\n'
                page.save(u"[[User:Red panda bot|BOT]]: + [[:File:" + filename + ']]')
            elif license == '8':
                page = pywikibot.Page(site, u"User:Red panda bot/license8")
                page_text = page.text
                page.text = page_text + u"* [[:File:" + filename + ']]\n'
                page.save(u"[[User:Red panda bot|BOT]]: + [[:File:" + filename + ']]')
            elif license == '10':
                page = pywikibot.Page(site, u"User:Red panda bot/license10")
                page_text = page.text
                page.text = page_text + u"* [[:File:" + filename + ']]\n'
                page.save(u"[[User:Red panda bot|BOT]]: + [[:File:" + filename + ']]')              
            return 1
    else:
        pywikibot.output('Invalid license')
    return 0


#=======================================

addCategory='[[Category:Flickr files uploaded by ' + config.usernames['commons']['commons'] + ']]\n[[Category:Photos in Flickr Explore]]\n'
totalPhotos = 0
uploadedPhotos = 0
flickrreview=False
reviewer=''

flickr = flickrapi.FlickrAPI(config.flickr['api_key'], config.flickr['api_secret'])
#(photoInfo, photoSizes) = flickrripper.getPhoto(flickr, 495671756)

#print(flickrripper.isAllowedLicense(photoInfo))
#print(photoInfo.find('photo').attrib['id'])

#begin in 2004-01-07
for arg in pywikibot.handleArgs():
    if arg.startswith('-start:'):
        date = arg[7:]

#date = '2004-01-11'
data = interestingness(flickr, date, per_page='500')
for photo in data['photos']['photo']:
    photo_id = photo['id']
    license = photo['license']
    uploadedPhotos += processPhoto(flickr, license, photo_id, flickrreview,
                                  reviewer, addCategory)
    totalPhotos += 1
pywikibot.output('Finished running')
pywikibot.output('Total photos: ' + str(totalPhotos))
pywikibot.output('Uploaded photos: ' + str(uploadedPhotos))
pywikibot.output('Date: ' + date)


#print(data['photos']['photo'])