Logo Search packages:      
Sourcecode: harvestman version File versions  Download package

def HarvestMan::datamgr::harvestManDataManager::dump_headers (   self  ) 

Dump the headers of the web pages
downloaded into a DBM file. New in 1.4.5 

Definition at line 677 of file datamgr.py.

00677                           :
        """ Dump the headers of the web pages
        downloaded into a DBM file. New in 1.4.5 """

        if self._cfg.urlheadersformat == 'dbm':
            import shelve
            
            # File is created in projectdir as
            # <project>-headers.dbm .
            dbmfile = os.path.join(self._cfg.projdir, "".join((self._cfg.project,'-headers.dbm')))
            extrainfo("Writing url headers database",dbmfile,"...")        
            shelf = shelve.open(dbmfile)
            
            links_dict = self.get_links_dictionary()
            for links in links_dict.values():
                for urlobj in links:
                    if urlobj:
                        url = urlobj.get_full_url()
                        # Get headers
                        headers = urlobj.get_url_content_info()
                        if headers:
                            shelf[url] = headers
                        
            shelf.close()
            if os.path.isfile(dbmfile):
                extrainfo("Wrote url headers database",dbmfile)
                return 0
            
            return -1
        else:
            extrainfo("Error: Unrecognized format",self._cfg.urlheadersformat,"for dumping url headers")
            return -1
    


Generated by  Doxygen 1.6.0   Back to index