Logo Search packages:      
Sourcecode: harvestman version File versions  Download package

def HarvestMan::urlparser::HarvestManUrlParser::resolveurl (   self  ) 

Resolves the url finding out protocol, port, domain etc
. Also resolves relative paths and builds a local file name
for the url based on the root directory path 

Definition at line 226 of file urlparser.py.

00226                         :
        """ Resolves the url finding out protocol, port, domain etc
        . Also resolves relative paths and builds a local file name
        for the url based on the root directory path """

        if len(self.url)==0:
            raise HarvestManUrlParserError, 'Error: Zero Length Url'

        proto = self.resolve_protocol()

        paths = ''
        
        if not proto:
            # Could not resolve protocol, must be a relative url
            if not self.baseurl:
                raise HarvestManUrlParserError, 'Base url should not be empty for relative urls'

            # Set url-relative flag
            self.isrel = True
            # Is relative to server?
            if self.url[0] == '/':
                self.isrels = True
            
            # Split paths
            relpaths = self.url.split(self.URLSEP)

            # Build relative path by checking for "." and ".." strings
            self.rindex = 0
            for ritem in relpaths:
                # If path item is ., .. or empty, increment
                # relpath index.
                if ritem in (self.DOT, self.DOTDOT, ""):
                    self.rindex += 1
                    # If path item is not empty, insert
                    # to relpaths list.
                    if ritem:
                        self.rpath.append(ritem)

                else:
                    # Otherwise, add the rest to paths
                    # with the separator
                    for entry in relpaths[self.rindex:]:
                        paths = "".join((paths, entry, self.URLSEP))

                    # Remove the last entry
                    paths = paths[:-1]
                    
                    # Again Trim if the relative path ends with /
                    # like href = /img/abc.gif/ 
                    if paths[-1] == '/':
                        paths = paths[:-1]
                    break
        else:
            # Absolute path, so 'paths' is the part of it
            # minus the protocol part.
            paths = self.url.replace(self.protocol, '')
            
        # Now compute local directory/file paths

        # For cgi paths, add a url separator at the end 
        if self.cgi:
            paths = "".join((paths, self.URLSEP))

        self.compute_dirpaths(paths)
        self.compute_domain_and_port()


Generated by  Doxygen 1.6.0   Back to index