#!/usr/bin/env python # # FlickrTouchr - a simple python script to grab all your photos from flickr, # dump into a directory - organised into folders by set - # along with any favourites you have saved. # # You can then sync the photos to an iPod touch. # # Version: 1.2 # # Original Author: colm - AT - allcosts.net - Colm MacCarthaigh - 2008-01-21 # # Modified by: Dan Benjamin - http://hivelogic.com # # Modified again by Ben Barker to allow for videos and images, plus saving # files with their titles as filenames # # License: Apache 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html # import xml.dom.minidom import webbrowser import urlparse import urllib2 import unicodedata import cPickle import glob import md5 import sys import os import re import commands API_KEY = "e224418b91b4af4e8cdb0564716fa9bd" SHARED_SECRET = "7cddb9c9716501a0" # # Utility functions for dealing with flickr authentication # def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc.encode("utf-8") # # Get the frob based on our API_KEY and shared secret # def getfrob(): # Create our signing string string = SHARED_SECRET + "api_key" + API_KEY + "methodflickr.auth.getFrob" hash = md5.new(string).digest().encode("hex") # Formulate the request url = "http://api.flickr.com/services/rest/?method=flickr.auth.getFrob" url += "&api_key=" + API_KEY + "&api_sig=" + hash #print url try: # Make the request and extract the frob response = urllib2.urlopen(url) # Parse the XML dom = xml.dom.minidom.parse(response) # get the frob frob = getText(dom.getElementsByTagName("frob")[0].childNodes) # Free the DOM dom.unlink() # Return the frob return frob except: raise "Could not retrieve frob" # # Login and get a token # def froblogin(frob, perms): string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "perms" + perms hash = md5.new(string).digest().encode("hex") # Formulate the request url = "http://api.flickr.com/services/auth/?" url += "api_key=" + API_KEY + "&perms=" + perms url += "&frob=" + frob + "&api_sig=" + hash # Tell the user what's happening print "In order to allow FlickrTouchr to read your photos and favourites" print "you need to allow the application. Please press return when you've" print "granted access at the following url (which should have opened" print "automatically)." print print url print print "Waiting for you to press return" # We now have a login url, open it in a web-browser #webbrowser.open_new(url) # Wait for input sys.stdin.readline() print "Thanks" # Now, try and retrieve a token string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "methodflickr.auth.getToken" hash = md5.new(string).digest().encode("hex") # Formulate the request url = "http://api.flickr.com/services/rest/?method=flickr.auth.getToken" url += "&api_key=" + API_KEY + "&frob=" + frob url += "&api_sig=" + hash # See if we get a token try: # Make the request and extract the frob response = urllib2.urlopen(url) #print response # Parse the XML dom = xml.dom.minidom.parse(response) # get the token and user-id token = getText(dom.getElementsByTagName("token")[0].childNodes) nsid = dom.getElementsByTagName("user")[0].getAttribute("nsid") # Free the DOM dom.unlink() # Return the token and userid return (nsid, token) except: raise "Login failed" # # Sign an arbitrary flickr request with a token # def flickrsign(url, token): #query = urlparse.urlparse(url).query query = urlparse.urlparse(url)[4] query += "&api_key=" + API_KEY + "&auth_token=" + token params = query.split('&') # Create the string to hash string = SHARED_SECRET # Sort the arguments alphabettically params.sort() for param in params: string += param.replace('=', '') hash = md5.new(string).digest().encode("hex") # Now, append the api_key, and the api_sig args url += "&api_key=" + API_KEY + "&auth_token=" + token + "&api_sig=" + hash # Return the signed url return url # # Grab the photo from the server # def getphoto(id, token, filename, dir, url, media, format): if media == "photo": #if False: #If we already have it, then skip if os.path.exists(dir+"/"+filename+"."+format): #print "Photo "+filename+" from set "+dir+" already exists. Skipping..." return filename+"."+format #dom.unlink() print "Downloading "+filename+"."+format+" from set "+dir+" ..." # Grab the image file. Use wget if we can try: cmd = 'wget --progress=bar '+url+' --output-document="'+ dir+'/'+filename+'.'+format+'"' os.system(cmd) except: req = urllib2.Request(url) response = urllib2.urlopen(req) data = response.read() # Save the file - writing data as binary (wb) fh = open(dir+"/"+filename+"."+format, "wb") fh.write(data) fh.close() return filename #formatand original url do not work for videos. Oh well. We will get it the slow way. if media == "video": try: # Contruct a request to find the sizes url = "http://api.flickr.com/services/rest/?method=flickr.photos.getSizes" url += "&photo_id=" + id # Sign the request url = flickrsign(url, token) #print url # Make the request response = urllib2.urlopen(url) # Parse the XML dom = xml.dom.minidom.parse(response) # Get the list of sizes sizes = dom.getElementsByTagName("size") # Grab the original if it exists label=sizes[-1].getAttribute("label") check = re.match(".*Original",str(label)) imgurl = sizes[-1].getAttribute("source") cmd="curl -I '"+imgurl+"' 2> /dev/null | grep -o '&fn=[^&]*' | tr -d '&fn=' | grep -o \\\..* | tr -d '.'" print cmd type=commands.getoutput(cmd) #print type # Free the DOM memory dom.unlink() if os.path.exists(dir+"/"+filename+"."+type): print "Video "+filename+"."+type+" already exists. Skipping..." return filename print "Downloading "+filename+" from set "+dir+" ..." try: cmd = "wget --progress=bar "+imgurl+" --output-document='"+ dir+"/"+filename+"."+type+"'" print cmd os.system(cmd) except: #print imgurl #Get the actual data data = response.read() # Save the file - writing data as binary (wb) fh = open(dir+"/"+filename+".mov", "wb") fh.write(data) fh.close() return filename except: print "Failed to retrieve photo id " + id ######## Main Application ########## if __name__ == '__main__': # The first, and only argument needs to be a directory try: os.chdir(sys.argv[1]) except: print "usage: %s directory" % sys.argv[0] sys.exit(1) # First things first, see if we have a cached user and auth-token try: cache = open("touchr.frob.cache", "r") config = cPickle.load(cache) cache.close() # We don't - get a new one except: (user, token) = froblogin(getfrob(), "read") config = { "version":1 , "user":user, "token":token } # Save it for future use cache = open("touchr.frob.cache", "w") cPickle.dump(config, cache) cache.close() # Now, construct a query for the list of photo sets url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList" url += "&user_id=" + config["user"] url = flickrsign(url, config["token"]) #print url # get the result response = urllib2.urlopen(url) #print response # Parse the XML dom = xml.dom.minidom.parse(response) # Get the list of Sets sets = dom.getElementsByTagName("photoset") # For each set - create a url urls = [] for set in sets: pid = set.getAttribute("id") dir = getText(set.getElementsByTagName("title")[0].childNodes) dir = unicodedata.normalize('NFKD', dir.decode("utf-8", "ignore")).encode('ASCII', 'ignore') # Normalize to ASCII # Build the list of photos url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos" url += "&extras=media,original_format,url_sq,url_t,url_s,url_m,url_o&photoset_id=" + pid #print url # Append to our list of urls urls.append( (url , dir) ) # Free the DOM memory dom.unlink() # Add the photos which are not in any set url = "http://api.flickr.com/services/rest/?method=flickr.photos.getNotInSet&extras=media,original_format,url_sq,url_t,url_s,url_m,url_o" urls.append( (url, "No_Set") ) # Add the user's Favourites url = "http://api.flickr.com/services/rest/?method=flickr.favorites.getList&extras=media,original_format,url_sq,url_t,url_s,url_m,url_o" urls.append( (url, "Favourites") ) # Time to get the photos inodes = {} for (setUrl , dir) in urls: #print "entering directory for set "+dir # Create the directory try: os.makedirs(dir) except: pass # Get 500 results per page setUrl += "&per_page=500" pages = page = 1 while page <= pages: request = setUrl + "&page=" + str(page) #print request # Sign the url request = flickrsign(request, config["token"]) #print "two" # Make the request response = urllib2.urlopen(request) #print "three" # Parse the XML try: dom = xml.dom.minidom.parse(response) except: page = page+1 continue # Get the total try: pages = int(dom.getElementsByTagName("photos")[0].getAttribute("pages")) except: try: pages = int(dom.getElementsByTagName("photoset")[0].getAttribute("pages")) except: pages = pages+1 continue pass pass # Grab the photos for photo in dom.getElementsByTagName("photo"): # Grab the id photoid = photo.getAttribute("id") #print photoid # The target target = photo.getAttribute("title").encode("utf8") #print target # The media type media= photo.getAttribute("media").encode("utf8") #print media # The media type format= photo.getAttribute("originalformat").encode("utf8") #print format #The original URL -get the largest available: #(url_sq, url_t, url_s, url_m, url_o) try: url= photo.getAttribute("url_o").encode("utf8") except: try: url= photo.getAttribute("url_m").encode("utf8") except: try: url= photo.getAttribute("url_a").encode("utf8") except: try: url= photo.getAttribute("url_t").encode("utf8") except: url= photo.getAttribute("url_sq").encode("utf8") inodes[photoid] = getphoto(photo.getAttribute("id"), config["token"], target, dir, url, media, format) # Move on the next page page = page + 1