Logo Search packages:      
Sourcecode: harvestman version File versions  Download package

def HarvestMan::connector::HarvestManUrlConnector::__save_url_file (   self,
  urlobj 
) [private]

Download data from the url <url> and write to
the file <filename> 

Definition at line 878 of file connector.py.

00878                                      :
        """ Download data from the url <url> and write to
        the file <filename> """

        url = urlobj.get_full_url()

        res = self.connect(url, urlobj, True, self._cfg.retryfailed)

        # If it was a rules violation, skip it
        if res==5:
            return res
        
        dmgr=GetObject('datamanager')
        
        retval=0
        # Apply word filter
        if not urlobj.starturl:
            if urlobj.is_webpage() and not GetObject('ruleschecker').apply_word_filter(self.__data):
                extrainfo("Word filter prevents download of url =>", url)
                return 5

        # If no need to save html files return from here
        if urlobj.is_webpage() and not self._cfg.html:
            extrainfo("Html filter prevents download of url =>", url)
            return 5
        
        # Find out if we need to update this file
        # by checking with the cache.
        filename = urlobj.get_full_filename()
        # Get last modified time
        timestr = self.get_last_modified_time()
        update, fileverified = False, False
        
        lmt = -1
        if timestr:
            try:
                lmt = time.mktime( strptime(timestr, "%a, %d %b %Y %H:%M:%S GMT"))
            except ValueError, e:
                debug(e)

            if lmt != -1:
                url, filename = urlobj.get_full_url(), urlobj.get_full_filename()
                update, fileverified = dmgr.is_url_uptodate(url, filename, lmt, self.__data)
                # No need to download
                if update and fileverified:
                    extrainfo("Project cache is uptodate =>", url)
                    return 3
        else:
            update, fileverified = dmgr.is_url_cache_uptodate(url, filename, self.get_content_length(), self.__data)
            # No need to download
            if update and fileverified:
                extrainfo("Project cache is uptodate =>", url)
                return 3
        
        # If cache is up to date, but someone has deleted
        # the downloaded files, instruct data manager to
        # write file from the cache.
        if update and not fileverified:
            if dmgr.write_file_from_cache(url):
                return 4
            
        if dmgr.create_local_directory(urlobj) == 0:
            extrainfo('Writing file ', filename)
            retval=self.__write_url( filename )
        else:
            extrainfo("Error in getting data for", url)
            
        return retval


Generated by  Doxygen 1.6.0   Back to index