Logo Search packages:      
Sourcecode: harvestman version File versions  Download package

def HarvestMan::rules::harvestManRulesChecker::__apply_server_filter (   self,
  urlObj 
) [private]

See if we have a filter matching the server of
this url. Return 1 on success(blocked) and 0 on failure
(allowed) 

Definition at line 358 of file rules.py.

00358                                            :
        """ See if we have a filter matching the server of
        this url. Return 1 on success(blocked) and 0 on failure
        (allowed) """

        server = urlObj.get_domain()

        serverinclfilter = self._configobj.serverinclfilter
        serverexclfilter = self._configobj.serverexclfilter

        if not serverexclfilter and not serverinclfilter: return 0

        # We always check inclusion filter first since it is
        # normally more specific than exclusion filter. Someone
        # can request to not fetch any url containing /images/
        # in the path, but still fetch the particular path
        # /preferred/images. It will work only if we check for
        # inclusion first and exclusion later.
        inclcheck,exclcheck=-1,-1
        matchincl, matchexcl=False,False

        url = urlObj.get_full_url()

        if serverinclfilter:
            inclcheck = 1

            for f in serverinclfilter:
                # see if we have a match
                m=re.search(re.compile(f,re.IGNORECASE), server)

                if m:
                    extrainfo('Go-through filter for url ', url, 'found')
                    matchincl=f
                    inclcheck=0
                    break

        if serverexclfilter:
            exclcheck = 1
            for f in serverexclfilter:
                # see if we have a match
                m=re.search(re.compile(f,re.IGNORECASE), server)

                if m:
                    extrainfo('No-pass filter for url ', url, 'found')
                    matchexcl=f
                    self.add_to_filter(url)               
                    exclcheck=1
                    break

        if inclcheck==1:
            extrainfo("Inclfilter does not allow this url", url)
        if exclcheck==0:
            extrainfo("Exclfilter allows this url", url)

        # if exclfilter and inclfilter returns different results
        # (exclfilter denys, inclfilter allows)
        # we check the order of the filters in the global filter. Whichever
        # comes first has precedence.
        if inclcheck == 0 and exclcheck == 1:
            globalfilter=self._configobj.allserverfilters
            try:
                indexincl=globalfilter.index(matchincl)
            except:
                indexincl=-1
            try:
                indexexcl=globalfilter.index(matchexcl)
            except:
                indexexcl=-1

            if indexincl != -1 and indexexcl != -1:
                if indexincl < indexexcl:
                    # inclusion filter has precedence
                    return inclcheck
                else:
                    # exclusion filter has precedence
                    return exclcheck
            else:
                # error, return allow (0)
                return 0
        else:
            # return whichever matched
            if inclcheck != -1:
                return inclcheck
            elif exclcheck != -1:
                return exclcheck
            # none matched, allow it
            else:
                return 0 

        # We wont reach here
        return 0


Generated by  Doxygen 1.6.0   Back to index