Logo Search packages:      
Sourcecode: harvestman version File versions  Download package

def HarvestMan::urlparser::HarvestManUrlParser::resolve_protocol (   self  ) 

Resolve the protocol of the url 

Definition at line 180 of file urlparser.py.

00180                               :
        """ Resolve the protocol of the url """

        url2 = self.url.lower()
        for proto in self.protocol_map.keys():
            if url2.find(proto) != -1:
                self.protocol = proto
                self.port = self.protocol_map.get(proto)
                return True
        else:
            # Fix: Use regex for detecting WWW urls.
            # Check for WWW urls. These can begin
            # with a 'www.' or 'www' followed by
            # a single number (www1, www3 etc).
            wwwre = re.compile(r'^www(\d?)\.')

            if wwwre.match(url2):
                self.protocol = 'http://'
                self.url =  "".join((self.protocol, self.url))
                return True
            
            # Urls relative to server might
            # begin with a //. Then prefix the protocol
            # string to them.
            if self.url.find('//') == 0:
                # Pick protocol from base url
                if self.baseurl and self.baseurl.protocol:
                    self.protocol = self.baseurl.protocol
                else:
                    self.protocol = "http://"   
                self.url = "".join((self.protocol, self.url[2:]))
                return True

            # None of these
            # Protocol not resolved, so check
            # base url first, if not found, set
            # default protocol...
            if self.baseurl and self.baseurl.protocol:
                self.protocol = self.baseurl.protocol
            else:
                self.protocol = 'http://'

            self.defproto = True
        
            return False
        


Generated by  Doxygen 1.6.0   Back to index