#!/usr/bin/env python
 
#
# FlickrTouchr - a simple python script to grab all your photos from flickr,
# dump into a directory - organised into folders by set -
# along with any favourites you have saved.
#
# You can then sync the photos to an iPod touch.
#
# Version: 1.2
#
# Original Author: colm - AT - allcosts.net - Colm MacCarthaigh - 2008-01-21
#
# Modified by: Dan Benjamin - http://hivelogic.com
#
# Modified again by Ben Barker to allow for videos and images, plus saving
# files with their titles as filenames
#
# License: Apache 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html
#
 
import xml.dom.minidom
import webbrowser
import urlparse
import urllib2
import unicodedata
import cPickle
import glob
import md5
import sys
import os
import re
import commands

API_KEY = "e224418b91b4af4e8cdb0564716fa9bd"
SHARED_SECRET = "7cddb9c9716501a0"
 
#
# Utility functions for dealing with flickr authentication
#
def getText(nodelist):
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc.encode("utf-8")
 
#
# Get the frob based on our API_KEY and shared secret
#
def getfrob():
    # Create our signing string
    string = SHARED_SECRET + "api_key" + API_KEY + "methodflickr.auth.getFrob"
    hash = md5.new(string).digest().encode("hex")
 
    # Formulate the request
    url = "http://api.flickr.com/services/rest/?method=flickr.auth.getFrob"
    url += "&api_key=" + API_KEY + "&api_sig=" + hash
    #print url
    try:
        # Make the request and extract the frob
        response = urllib2.urlopen(url)
    	
        # Parse the XML
        dom = xml.dom.minidom.parse(response)
 
        # get the frob
        frob = getText(dom.getElementsByTagName("frob")[0].childNodes)
 
        # Free the DOM
        dom.unlink()
 
        # Return the frob
        return frob
 
    except:
        raise "Could not retrieve frob"
 
#
# Login and get a token
#
def froblogin(frob, perms):
    string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "perms" + perms
    hash = md5.new(string).digest().encode("hex")
 
    # Formulate the request
    url = "http://api.flickr.com/services/auth/?"
    url += "api_key=" + API_KEY + "&perms=" + perms
    url += "&frob=" + frob + "&api_sig=" + hash
 
    # Tell the user what's happening
    print "In order to allow FlickrTouchr to read your photos and favourites"
    print "you need to allow the application. Please press return when you've"
    print "granted access at the following url (which should have opened"
    print "automatically)."
    print
    print url
    print
    print "Waiting for you to press return"
 
    # We now have a login url, open it in a web-browser
    #webbrowser.open_new(url)
 
    # Wait for input
    sys.stdin.readline()
    print "Thanks"
    # Now, try and retrieve a token
    string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "methodflickr.auth.getToken"
    hash = md5.new(string).digest().encode("hex")
    
    # Formulate the request
    url = "http://api.flickr.com/services/rest/?method=flickr.auth.getToken"
    url += "&api_key=" + API_KEY + "&frob=" + frob
    url += "&api_sig=" + hash
 
    # See if we get a token
    try:
        # Make the request and extract the frob
        response = urllib2.urlopen(url)
        #print response
        # Parse the XML
        dom = xml.dom.minidom.parse(response)
 
        # get the token and user-id
        token = getText(dom.getElementsByTagName("token")[0].childNodes)
        nsid = dom.getElementsByTagName("user")[0].getAttribute("nsid")
 
        # Free the DOM
        dom.unlink()
 
        # Return the token and userid
        return (nsid, token)
    except:
        raise "Login failed"
 
#
# Sign an arbitrary flickr request with a token
#
def flickrsign(url, token):
    #query = urlparse.urlparse(url).query
    query = urlparse.urlparse(url)[4]
    query += "&api_key=" + API_KEY + "&auth_token=" + token
    params = query.split('&')
 
    # Create the string to hash
    string = SHARED_SECRET
    
    # Sort the arguments alphabettically
    params.sort()
    for param in params:
        string += param.replace('=', '')
    hash = md5.new(string).digest().encode("hex")
 
    # Now, append the api_key, and the api_sig args
    url += "&api_key=" + API_KEY + "&auth_token=" + token + "&api_sig=" + hash
    
    # Return the signed url
    return url
 
#
# Grab the photo from the server
#
def getphoto(id, token, filename, dir, url, media, format):

    	if media == "photo":
	#if False:
		#If we already have it, then skip
		if os.path.exists(dir+"/"+filename+"."+format):
			#print "Photo "+filename+" from set "+dir+" already exists. Skipping..."
			return filename+"."+format
		
		#dom.unlink()

		print "Downloading "+filename+"."+format+" from set "+dir+" ..."
        	# Grab the image file. Use wget if we can
        	try:
			cmd = 'wget --progress=bar '+url+' --output-document="'+ dir+'/'+filename+'.'+format+'"'
			os.system(cmd)
		except:
			req = urllib2.Request(url)
			response = urllib2.urlopen(req)
			data = response.read()
			# Save the file - writing data as binary (wb)
			fh = open(dir+"/"+filename+"."+format, "wb")
			fh.write(data)
			fh.close()
    			
		return filename


	#formatand original url do not work for videos. Oh well. We will get it the slow way.
	if media == "video":
		try:
			# Contruct a request to find the sizes
            		url = "http://api.flickr.com/services/rest/?method=flickr.photos.getSizes"
			url += "&photo_id=" + id
            		# Sign the request
           		url = flickrsign(url, token)
    			#print url
            		# Make the request
			response = urllib2.urlopen(url)
        
			# Parse the XML
			dom = xml.dom.minidom.parse(response)
 
			# Get the list of sizes
            		sizes = dom.getElementsByTagName("size")
 	  
                        # Grab the original if it exists
			label=sizes[-1].getAttribute("label")

			check = re.match(".*Original",str(label))
          
			imgurl = sizes[-1].getAttribute("source")
			cmd="curl -I '"+imgurl+"' 2> /dev/null | grep -o '&fn=[^&]*' | tr -d '&fn=' | grep -o \\\..* | tr -d '.'"
	    		print cmd
			type=commands.getoutput(cmd)
			#print type
			# Free the DOM memory
			dom.unlink()

			if os.path.exists(dir+"/"+filename+"."+type):
				print "Video "+filename+"."+type+" already exists. Skipping..."
				return filename
            	    
			print "Downloading "+filename+" from set "+dir+" ..."
			try:
	                        cmd = "wget --progress=bar "+imgurl+" --output-document='"+ dir+"/"+filename+"."+type+"'"
        	                print cmd
				os.system(cmd)
			except:
				#print imgurl
            			#Get the actual data
				data = response.read()
    			
            			# Save the file - writing data as binary (wb)
				fh = open(dir+"/"+filename+".mov", "wb")
				fh.write(data)
				fh.close()
 		        return filename

		except:
			print "Failed to retrieve photo id " + id
    
######## Main Application ##########
if __name__ == '__main__':
 
    # The first, and only argument needs to be a directory
    try:
        os.chdir(sys.argv[1])
    except:
        print "usage: %s directory" % sys.argv[0]
        sys.exit(1)
     
    # First things first, see if we have a cached user and auth-token
    try:
        cache = open("touchr.frob.cache", "r")
        config = cPickle.load(cache)
        cache.close()
 
    # We don't - get a new one
    except:
        (user, token) = froblogin(getfrob(), "read")
        config = { "version":1 , "user":user, "token":token }
 
        # Save it for future use
        cache = open("touchr.frob.cache", "w")
        cPickle.dump(config, cache)
        cache.close()
 
   
    # Now, construct a query for the list of photo sets
    url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList"
    url += "&user_id=" + config["user"]
    url = flickrsign(url, config["token"])
    #print url
    # get the result
    response = urllib2.urlopen(url)
    #print response 
    # Parse the XML
    dom = xml.dom.minidom.parse(response)
 
    # Get the list of Sets
    sets = dom.getElementsByTagName("photoset")
 
    # For each set - create a url
    urls = []
      
    for set in sets:
        pid = set.getAttribute("id")
        dir = getText(set.getElementsByTagName("title")[0].childNodes)
        dir = unicodedata.normalize('NFKD', dir.decode("utf-8", "ignore")).encode('ASCII', 'ignore') # Normalize to ASCII
 	
        # Build the list of photos
        url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos"
        url += "&extras=media,original_format,url_sq,url_t,url_s,url_m,url_o&photoset_id=" + pid
 	#print url
        # Append to our list of urls
        urls.append( (url , dir) )
    
    # Free the DOM memory
    dom.unlink()
 
    # Add the photos which are not in any set
    url = "http://api.flickr.com/services/rest/?method=flickr.photos.getNotInSet&extras=media,original_format,url_sq,url_t,url_s,url_m,url_o"
    urls.append( (url, "No_Set") )
 
    # Add the user's Favourites
    url = "http://api.flickr.com/services/rest/?method=flickr.favorites.getList&extras=media,original_format,url_sq,url_t,url_s,url_m,url_o"
    urls.append( (url, "Favourites") )
 
    # Time to get the photos
    inodes = {}
    for (setUrl , dir) in urls:
	#print "entering directory for set "+dir
        # Create the directory
        try:
            os.makedirs(dir)
        except:
            pass
 
        # Get 500 results per page
        setUrl += "&per_page=500"
        pages = page = 1
 
        while page <= pages:
            request = setUrl + "&page=" + str(page)
 	    #print request
            # Sign the url
            request = flickrsign(request, config["token"])
 	    #print "two"
            # Make the request
            response = urllib2.urlopen(request)
 	    #print "three"
            # Parse the XML
  	    try:
            	dom = xml.dom.minidom.parse(response)
 	    except:
		page = page+1
		continue
            # Get the total
            try:
                pages = int(dom.getElementsByTagName("photos")[0].getAttribute("pages"))
            except:
                try:
                    pages = int(dom.getElementsByTagName("photoset")[0].getAttribute("pages"))
                except:
                    pages = pages+1
                    continue
                pass
            pass
            
 
            # Grab the photos
            for photo in dom.getElementsByTagName("photo"):
		# Grab the id
		 
                photoid = photo.getAttribute("id")
		#print photoid
                # The target
                target = photo.getAttribute("title").encode("utf8") 
		#print target
		# The media type
		media= photo.getAttribute("media").encode("utf8")
		#print media
		# The media type
                format= photo.getAttribute("originalformat").encode("utf8")
		#print format
		#The original URL -get the largest available:
		#(url_sq, url_t, url_s, url_m, url_o)
		try:
			url= photo.getAttribute("url_o").encode("utf8")
		except:
			try:
	                        url= photo.getAttribute("url_m").encode("utf8")
			except:
				try:
                        		url= photo.getAttribute("url_a").encode("utf8")
				except:
					try:
                        			url= photo.getAttribute("url_t").encode("utf8")
					except:
                        				url= photo.getAttribute("url_sq").encode("utf8")

                inodes[photoid] = getphoto(photo.getAttribute("id"), config["token"], target, dir, url, media, format)
 		
            # Move on the next page
            page = page + 1