i starting out scrapy , i'm trying rename every image download item['title']
here spider:
import scrapy botname.items import botname class botname(scrapy.spider): name = "bot" allowed_domains = "example.com" start_urls = [ "http://example.com&pageno=%s" % page page in xrange(1, 3) ] def parse(self, response): sel in response.xpath('//html'): item = mybotitem() # want name files result of item['title'] = sel.xpath('//h5/text()').extract() item['image_urls'] = sel.xpath('//img/@data-zoom-image').extract() yield item
and pipeline class
class mybotpipeline(object): def process_item(self, item, spider): return item def file_path(self, request, response=none, info=none): image_guid = request.meta['title'][0] log.msg(image_guid, level=log.debug) return 'full/%s' % (image_guid) def get_media_requests(self, item, info): image_url in item['image_urls']: yield request(image_url) def item_completed(self, results, item, info): image_paths = [x['path'] ok, x in results if ok] if not image_paths: raise dropitem("item contains no images") item['image_paths'] = image_paths return item
Comments
Post a Comment