Logo Search packages:      
Sourcecode: harvestman version File versions  Download package

def HarvestMan::rules::harvestManRulesChecker::__apply_url_filter (   self,
  url 
) [private]

See if we have a filter matching the url.
Return 1 for blocking the url and 0 for allowing it 

Definition at line 274 of file rules.py.

00274                                      :
        """ See if we have a filter matching the url.
        Return 1 for blocking the url and 0 for allowing it """

        inclfilter = self._configobj.inclfilter
        exclfilter = self._configobj.exclfilter

        # neither filters are enabled, return 0
        if not inclfilter and not exclfilter: return 0

        # We always check inclusion filter first since it is
        # normally more specific than exclusion filter. Someone
        # can request to not fetch any url containing /images/
        # in the path, but still fetch the particular path
        # /preferred/images. It will work only if we check for
        # inclusion first and exclusion later.
        inclcheck,exclcheck=-1,-1
        matchincl, matchexcl=False,False

        if inclfilter:
            inclcheck=1
            # see if we have a match
            for f in inclfilter:
                m=f.search(url)
                if m:
                    extrainfo('Go-through filter for url ', url, 'found')
                    matchincl=True
                    inclcheck=0
                    break

        if exclfilter:
            exclcheck=0
            # see if we have a match
            for f in exclfilter:
                m=f.search(url)
                if m:
                    extrainfo('No-pass filter for url ', url, 'found')
                    matchexcl=True
                    self.add_to_filter(url)               
                    exclcheck=1
                    break

        if inclcheck==1:
            extrainfo("Inclfilter does not allow this url", url)
        if exclcheck==0:
            extrainfo("Exclfilter allows this url", url)

        # if exclfilter and inclfilter returns different results
        # (exclfilter denys, inclfilter allows)
        # we check the order of the filters in the global filter. Whichever
        # comes first has precedence.
        if inclcheck == 0 and exclcheck == 1:
            globalfilter=self._configobj.allfilters
            try:
                indexincl=globalfilter.index(matchincl)
            except:
                indexincl=-1
            try:
                indexexcl=globalfilter.index(matchexcl)
            except:
                indexexcl=-1
            if indexincl != -1 and indexexcl != -1:
                if indexincl < indexexcl:
                    # inclusion filter has precedence
                    return inclcheck
                else:
                    # exclusion filter has precedence
                    return exclcheck
            else:
                # error, return allow (0)
                return 0
        else:
            # return whichever matched
            if inclcheck != -1:
                return inclcheck
            elif exclcheck != -1:
                return exclcheck
            # none matched, allow it
            else:
                return 0 

        # We wont reach here
        return 0


Generated by  Doxygen 1.6.0   Back to index