From a9901c02b97b3411dc6ad0f7a9d9c59e6a3b24de Mon Sep 17 00:00:00 2001 From: luxagraf Date: Mon, 4 May 2015 12:56:51 -0400 Subject: updated Flickr crawler to use flickrapi --- app/photos/retriever.py | 527 ++++++++++++++++++++++++------------------------ 1 file changed, 268 insertions(+), 259 deletions(-) (limited to 'app/photos') diff --git a/app/photos/retriever.py b/app/photos/retriever.py index 1a0757e..f5cae68 100644 --- a/app/photos/retriever.py +++ b/app/photos/retriever.py @@ -1,3 +1,4 @@ +import json import datetime import os import io @@ -15,9 +16,9 @@ from photos.models import Photo, PhotoGallery # from https://github.com/alexis-mignon/python-flickr-api # terribly documented, but offers a good clean OOP approach if you're willing to figure it out... import flickr_api +import flickrapi -# Required PIL classes may or may not be available from the root namespace -# depending on the installation +# Required PIL classes may or may not be available from the root namespace depending on the installation try: import Image import ImageFile @@ -41,274 +42,282 @@ EXIF_PARAMS = { 'DateTimeOriginal': '2013:09:03 22:44:25' } +class SyncFlickr(): + + def __init__(self): + self.flickr = flickrapi.FlickrAPI(settings.FLICKR_API_KEY, settings.FLICKR_API_SECRET,format='parsed-json') + + + def sync_sets(self, *args, **kwargs): + p = self.flickr.photosets.getList(user_id='85322932@N00') + disregard = [ + 'POTD 2008', + 'Snow Day', + 'Wedding', + 'Some random stuff', + 'Lilah & Olivia', + '6 months+', + '6-9 months', + '9-18 months', + ] + for photoset in p['photosets']['photoset']: + if photoset['title']['_content'] in disregard: + pass + else: + try: + row = PhotoGallery.objects.get(set_id__exact=photoset['id']) + print(('%s %s %s' % ('already have', row.set_title, 'moving on...'))) + # okay it already exists, but is it up-to-date? + self.get_photos_in_set(photoset['id'],row) + except ObjectDoesNotExist: + s = PhotoGallery.objects.get_or_create( + set_id=force_text(photoset['id']), + set_title=force_text(photoset['title']['_content']), + set_desc=force_text(photoset['description']['_content']), + set_slug=slugify(force_text(photoset['title']['_content'])[:40]), + primary=force_text(photoset['primary']), + pub_date=datetime.datetime.fromtimestamp(float(photoset['date_create'])) + ) + + #get_photos_in_set(photoset, s) + #create the gallery thumbnail image: + #photo = Photo.objects.get(flickr_id__exact=str(photoset['primary'])) + #make_gallery_thumb(photo, s) + -def sync_flickr_photos(*args, **kwargs): - flickr_api.set_keys(api_key=settings.FLICKR_API_KEY, api_secret=settings.FLICKR_API_SECRET) - flickr_api.set_auth_handler("app/photos/flickrauth") - user = flickr_api.test.login() - photos = user.getPhotos(extras="date_upload,date_taken,geo") - # reverse! reverse! - photos.reverse() - for photo in photos: - info = photo.getInfo() + + def get_photos_in_set(self, flickr_id, photoset): + photos = self.flickr.photosets.getPhotos(photoset_id=flickr_id) + for photo in photos['photoset']['photo']: + try: + p = Photo.objects.get(flickr_id__exact=str(photo['id'])) + except ObjectDoesNotExist: + p = self.get_photo(photo['id']) + if p.is_public: + pass #photoset.photos.add(p) + #slideshow_image(p, 1000, 800, 95) + print(p) + + def get_photo(self, photo_id): + photo = self.flickr.photos.getInfo(photo_id=photo_id) + info = photo['photo'] try: - row = Photo.objects.get(flickr_id=info['id'], flickr_secret=info['secret']) - print(('already have ' + info['id'] + ' moving on')) - except ObjectDoesNotExist: - get_photo(photo) - - -def get_photo(photo): - info = photo.getInfo() - geo = photo.getLocation() - location, region = get_geo(float(geo['latitude']), float(geo['longitude'])) - exif = exif_handler(photo.getExif()) - p, created = Photo.objects.get_or_create( - title=info['title'], - flickr_id=info['id'], - flickr_owner=info['owner']['id'], - flickr_server=info['server'], - flickr_secret=info['secret'], - flickr_originalsecret=info['originalsecret'], - flickr_farm=info['farm'], - pub_date=flickr_datetime_to_datetime(info['taken']), - description=info['description'], - exif_aperture=exif['FNumber'], - exif_make=exif['Make'], - exif_model=exif['Model'], - exif_exposure=exif['ExposureTime'], - exif_iso=exif['ISO'], - exif_lens=exif['LensModel'], - exif_focal_length=exif['FocalLength'], - exif_date=flickr_datetime_to_datetime(exif["DateTimeOriginal"].replace(':', '-', 2)), - lat=float(geo['latitude']), - lon=float(geo['longitude']), - region=region, - location=location, - ) - if created: - for tag in info['tags']: - p.tags.add(tag['raw']) - p.save() - make_local_copies(p) - #retina image: - #slideshow_image(p, 2000, 1600, 75) - #normal image - print((p.title)) - return p - - -def sync_sets(*args, **kwargs): - flickr_api.set_keys(api_key=settings.FLICKR_API_KEY, api_secret=settings.FLICKR_API_SECRET) - flickr_api.set_auth_handler("app/photos/flickrauth") - user = flickr_api.test.login() - photosets = user.getPhotosets() - # reverse! reverse! - photosets.reverse() - disregard = [ - 'POTD 2008', - 'Snow Day', - 'Wedding', - 'Some random stuff', - 'Lilah & Olivia', - '6 months+', - '6-9 months', - '9-18 months', - ] - for photoset in photosets: - if photoset['title'] in disregard: - pass - else: + geo = self.flickr.photos.geo.getLocation(photo_id=photo_id) + location, region = self.get_geo(float(geo['photo']['location']['latitude']), float(geo['photo']['location']['longitude'])) + except KeyError: + print("no effing geodata asshat") + exif = self.exif_handler(self.flickr.photos.getExif(photo_id=photo_id)['photo']['exif']) + p, created = Photo.objects.get_or_create( + title=info['title']['_content'], + flickr_id=info['id'], + flickr_owner=info['owner']['nsid'], + flickr_server=info['server'], + flickr_secret=info['secret'], + flickr_originalsecret=info['originalsecret'], + flickr_farm=info['farm'], + pub_date=self.flickr_datetime_to_datetime(exif["DateTimeOriginal"].replace(':', '-', 2)), + description=info['description']['_content'], + exif_aperture=exif['FNumber'], + exif_make=exif['Make'], + exif_model=exif['Model'], + exif_exposure=exif['ExposureTime'], + exif_iso=exif['ISO'], + exif_lens=exif['LensModel'], + exif_focal_length=exif['FocalLength'], + exif_date=self.flickr_datetime_to_datetime(exif["DateTimeOriginal"].replace(':', '-', 2)), + lat=float(geo['photo']['location']['latitude']), + lon=float(geo['photo']['location']['longitude']), + region=region, + location=location, + ) + if created: + for tag in info['tags']['tag']: + p.tags.add(tag['raw']) + p.save() + + local = FlickrImage() + local.make_local_copies(p) + #retina image: + #slideshow_image(p, 2000, 1600, 75) + #normal image + print("grabbing... "+p.title) + return p + + + def sync_flickr_photos(self, *args, **kwargs): + photos = self.flickr.people.getPhotos(user_id="85322932@N00", extras="date_upload,date_taken,geo") + for photo in photos['photos']['photo']: try: - row = PhotoGallery.objects.get(set_id__exact=photoset['id']) - print(('%s %s %s' % ('already have', row.set_title, 'moving on...'))) - # okay it already exists, but is it up-to-date? - #get_photos_in_set(row,set.id) + row = Photo.objects.get(flickr_id=photo['id'], flickr_secret=photo['secret']) + print('already have ' + photo['id'] + ' moving on') except ObjectDoesNotExist: - s = PhotoGallery.objects.create( - set_id=force_text(photoset['id']), - set_title=force_text(photoset['title']), - set_desc=force_text(photoset['description']), - set_slug=slugify(force_text(photoset['title'])), - primary=force_text(photoset['primary']), - pub_date=datetime.datetime.fromtimestamp(float(photoset['date_create'])) - ) - - get_photos_in_set(photoset, s) - #create the gallery thumbnail image: - photo = Photo.objects.get(flickr_id__exact=str(photoset['primary'])) - make_gallery_thumb(photo, s) + p = self.get_photo(photo['id']) + -def get_photos_in_set(flickr_photoset, photoset): - for photo in flickr_photoset.getPhotos(): + """ + ################################################ + ## Various meta data and geo helper functions ## + ################################################ + """ + + def exif_handler(self, data): + converted = {} try: - p = Photo.objects.get(flickr_id__exact=str(photo['id'])) - except ObjectDoesNotExist: - p = get_photo(photo) - if p.is_public: - photoset.photos.add(p) - slideshow_image(p, 1000, 800, 95) + for t in data: + converted[t['tag']] = t['raw']['_content'] + except: + pass + for k, v in list(EXIF_PARAMS.items()): + if k not in converted: + converted[k] = v + return converted -################################################ -## Various meta data and geo helper functions ## -################################################ + def flickr_datetime_to_datetime(self, fdt): + from datetime import datetime + from time import strptime + date_parts = strptime(fdt, '%Y-%m-%d %H:%M:%S') + return datetime(*date_parts[0:6]) -def exif_handler(data): - converted = {} - try: - for t in data: - converted[t['tag']] = t['raw'] - except: - pass - for k, v in list(EXIF_PARAMS.items()): - if k not in converted: - converted[k] = v - return converted - - -def flickr_datetime_to_datetime(fdt): - from datetime import datetime - from time import strptime - date_parts = strptime(fdt, '%Y-%m-%d %H:%M:%S') - return datetime(*date_parts[0:6]) - - -def get_geo(lat, lon): - from locations.models import Location, Region - from django.contrib.gis.geos import Point - pnt_wkt = Point(lon, lat) - try: - location = Location.objects.get(geometry__contains=pnt_wkt) - except Location.DoesNotExist: - location = None - try: - region = Region.objects.get(geometry__contains=pnt_wkt) - except Region.DoesNotExist: - region = None - return location, region - - -####################################################################### -## Photo retrieval functions to pull down images from Flickr servers ## -####################################################################### - - -def slideshow_image(photo, max_width, max_height, quality): - slide_dir = settings.IMAGES_ROOT + '/slideshow/' + photo.pub_date.strftime("%Y") - if not os.path.isdir(slide_dir): - os.makedirs(slide_dir) - - # Is it a retina image or not? - if max_width >= 1001 or max_height >= 801: - filename = '%s/%sx2.jpg' % (slide_dir, photo.flickr_id) - else: - filename = '%s/%s.jpg' % (slide_dir, photo.flickr_id) - - flickr_photo = photo.get_original_url() - fname = urllib.request.urlopen(flickr_photo) - im = io.StringIO(fname.read()) # constructs a StringIO holding the image - img = Image.open(im) - cur_width, cur_height = img.size - #if image landscape - if cur_width > cur_height: - new_width = max_width - #check to make sure we aren't upsizing - if cur_width > new_width: - ratio = float(new_width) / cur_width - x = (cur_width * ratio) - y = (cur_height * ratio) - resized = img.resize((int(x), int(y)), Image.ANTIALIAS) - resized.save(filename, 'JPEG', quality=quality, optimize=True) + def get_geo(self, lat, lon): + from locations.models import Location, Region + from django.contrib.gis.geos import Point + pnt_wkt = Point(lon, lat) + try: + location = Location.objects.get(geometry__contains=pnt_wkt) + except Location.DoesNotExist: + location = None + try: + region = Region.objects.get(geometry__contains=pnt_wkt) + except Region.DoesNotExist: + region = None + return location, region + + + + + + +class FlickrImage(): + """ + ## Photo retrieval functions to pull down images from Flickr servers ## + """ + + def slideshow_image(self, photo, max_width, max_height, quality): + slide_dir = settings.IMAGES_ROOT + '/slideshow/' + photo.pub_date.strftime("%Y") + if not os.path.isdir(slide_dir): + os.makedirs(slide_dir) + + # Is it a retina image or not? + if max_width >= 1001 or max_height >= 801: + filename = '%s/%sx2.jpg' % (slide_dir, photo.flickr_id) else: - img.save(filename) - else: - #image portrait - new_height = max_height - #check to make sure we aren't upsizing - if cur_height > new_height: - ratio = float(new_height) / cur_height - x = (cur_width * ratio) - y = (cur_height * ratio) - resized = img.resize((int(x), int(y)), Image.ANTIALIAS) - resized.save(filename, 'JPEG', quality=quality, optimize=True) + filename = '%s/%s.jpg' % (slide_dir, photo.flickr_id) + + flickr_photo = photo.get_original_url() + fname = urllib.request.urlopen(flickr_photo) + im = io.StringIO(fname.read().decode('UTF-8')) # constructs a StringIO holding the image + img = Image.open(im) + cur_width, cur_height = img.size + #if image landscape + if cur_width > cur_height: + new_width = max_width + #check to make sure we aren't upsizing + if cur_width > new_width: + ratio = float(new_width) / cur_width + x = (cur_width * ratio) + y = (cur_height * ratio) + resized = img.resize((int(x), int(y)), Image.ANTIALIAS) + resized.save(filename, 'JPEG', quality=quality, optimize=True) + else: + img.save(filename) else: - img.save(filename) - photo.slideshowimage_width = photo.get_width - photo.slideshowimage_height = photo.get_height - photo.slideshowimage_margintop = photo.get_margin_top - photo.slideshowimage_marginleft = photo.get_margin_left - photo.save() - #now resize the local copy - - -def make_local_copies(photo): - orig_dir = settings.IMAGES_ROOT + '/flickr/full/' + photo.pub_date.strftime("%Y") - if not os.path.isdir(orig_dir): - os.makedirs(orig_dir) - full = photo.get_original_url() - fname = urllib.request.urlopen(full) - im = io.StringIO(fname.read()) # constructs a StringIO holding the image - img = Image.open(im) - local_full = '%s/%s.jpg' % (orig_dir, photo.flickr_id) - img.save(local_full) - #save large size - large_dir = settings.IMAGES_ROOT + '/flickr/large/' + photo.pub_date.strftime("%Y") - if not os.path.isdir(large_dir): - os.makedirs(large_dir) - large = photo.get_large_url() - fname = urllib.request.urlopen(large) - im = io.StringIO(fname.read()) # constructs a StringIO holding the image - img = Image.open(im) - local_large = '%s/%s.jpg' % (large_dir, photo.flickr_id) - if img.format == 'JPEG': - img.save(local_large) - #save medium size - med_dir = settings.IMAGES_ROOT + '/flickr/med/' + photo.pub_date.strftime("%Y") - if not os.path.isdir(med_dir): - os.makedirs(med_dir) - med = photo.get_medium_url() - fname = urllib.request.urlopen(med) - im = io.StringIO(fname.read()) # constructs a StringIO holding the image - img = Image.open(im) - local_med = '%s/%s.jpg' % (med_dir, photo.flickr_id) - img.save(local_med) - - -def make_gallery_thumb(photo, set): - crop_dir = settings.IMAGES_ROOT + '/gallery_thumbs/' - if not os.path.isdir(crop_dir): - os.makedirs(crop_dir) - remote = photo.get_original_url() - print(remote) - fname = urllib.request.urlopen(remote) - im = io.StringIO(fname.read()) # constructs a StringIO holding the image - img = Image.open(im) - #calculate crop: - cur_width, cur_height = img.size - new_width, new_height = 291, 350 - ratio = max(float(new_width) / cur_width, float(new_height) / cur_height) - x = (cur_width * ratio) - y = (cur_height * ratio) - xd = abs(new_width - x) - yd = abs(new_height - y) - x_diff = int(xd / 2) - y_diff = int(yd / 2) - box = (int(x_diff), int(y_diff), int(x_diff + new_width), int(y_diff + new_height)) - - # create resized file - resized = img.resize((int(x), int(y)), Image.ANTIALIAS).crop(box) - # save resized file - resized_filename = '%s/%s.jpg' % (crop_dir, set.id) - try: + #image portrait + new_height = max_height + #check to make sure we aren't upsizing + if cur_height > new_height: + ratio = float(new_height) / cur_height + x = (cur_width * ratio) + y = (cur_height * ratio) + resized = img.resize((int(x), int(y)), Image.ANTIALIAS) + resized.save(filename, 'JPEG', quality=quality, optimize=True) + else: + img.save(filename) + photo.slideshowimage_width = photo.get_width + photo.slideshowimage_height = photo.get_height + photo.slideshowimage_margintop = photo.get_margin_top + photo.slideshowimage_marginleft = photo.get_margin_left + photo.save() + #now resize the local copy + + + def make_local_copies(self,photo): + orig_dir = settings.IMAGES_ROOT + '/flickr/full/' + photo.pub_date.strftime("%Y") + if not os.path.isdir(orig_dir): + os.makedirs(orig_dir) + full = photo.get_original_url() + fname = urllib.request.urlopen(full) + im = io.StringIO(fname.read().decode('UTF-8')) # constructs a StringIO holding the image + img = Image.open(im) + local_full = '%s/%s.jpg' % (orig_dir, photo.flickr_id) + img.save(local_full) + #save large size + large_dir = settings.IMAGES_ROOT + '/flickr/large/' + photo.pub_date.strftime("%Y") + if not os.path.isdir(large_dir): + os.makedirs(large_dir) + large = photo.get_large_url() + fname = urllib.request.urlopen(large) + im = io.StringIO(fname.read().decode('UTF-8')) # constructs a StringIO holding the image + img = Image.open(im) + local_large = '%s/%s.jpg' % (large_dir, photo.flickr_id) if img.format == 'JPEG': - resized.save(resized_filename, 'JPEG', quality=95, optimize=True) - else: - resized.save(resized_filename) - except IOError as e: - if os.path.isfile(resized_filename): - os.unlink(resized_filename) - raise e - # os.unlink(img) + img.save(local_large) + #save medium size + med_dir = settings.IMAGES_ROOT + '/flickr/med/' + photo.pub_date.strftime("%Y") + if not os.path.isdir(med_dir): + os.makedirs(med_dir) + med = photo.get_medium_url() + fname = urllib.request.urlopen(med) + im = io.StringIO(fname.read().decode('UTF-8')) # constructs a StringIO holding the image + img = Image.open(im) + local_med = '%s/%s.jpg' % (med_dir, photo.flickr_id) + img.save(local_med) + + + def make_gallery_thumb(self, photo, set): + crop_dir = settings.IMAGES_ROOT + '/gallery_thumbs/' + if not os.path.isdir(crop_dir): + os.makedirs(crop_dir) + remote = photo.get_original_url() + print(remote) + fname = urllib.request.urlopen(remote) + im = io.StringIO(fname.read().decode('UTF-8')) # constructs a StringIO holding the image + img = Image.open(im) + #calculate crop: + cur_width, cur_height = img.size + new_width, new_height = 291, 350 + ratio = max(float(new_width) / cur_width, float(new_height) / cur_height) + x = (cur_width * ratio) + y = (cur_height * ratio) + xd = abs(new_width - x) + yd = abs(new_height - y) + x_diff = int(xd / 2) + y_diff = int(yd / 2) + box = (int(x_diff), int(y_diff), int(x_diff + new_width), int(y_diff + new_height)) + + # create resized file + resized = img.resize((int(x), int(y)), Image.ANTIALIAS).crop(box) + # save resized file + resized_filename = '%s/%s.jpg' % (crop_dir, set.id) + try: + if img.format == 'JPEG': + resized.save(resized_filename, 'JPEG', quality=95, optimize=True) + else: + resized.save(resized_filename) + except IOError as e: + if os.path.isfile(resized_filename): + os.unlink(resized_filename) + raise e + os.unlink(img) -- cgit v1.2.3-70-g09d2