This file is indexed.

/usr/share/w3af/plugins/grep/pathDisclosure.py is in w3af-console 1.0-rc3svn3489-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
'''
pathDisclosure.py

Copyright 2006 Andres Riancho

This file is part of w3af, w3af.sourceforge.net .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

'''

import core.controllers.outputManager as om

# options
from core.data.options.option import option
from core.data.options.optionList import optionList

from core.controllers.basePlugin.baseGrepPlugin import baseGrepPlugin

import core.data.kb.knowledgeBase as kb
import core.data.kb.vuln as vuln
import core.data.constants.severity as severity
from core.data.constants.common_directories import get_common_directories

import core.data.parsers.urlParser as urlParser
import re


class pathDisclosure(baseGrepPlugin):
    '''
    Grep every page for traces of path disclosure problems.
      
    @author: Andres Riancho ( andres.riancho@gmail.com )
    '''

    def __init__(self):
        baseGrepPlugin.__init__(self)
        
        # Internal variables
        self._already_added = []
        
        # Compile all regular expressions now
        self._path_disc_regex_list = []
        self._compile_regex()
        
    def _compile_regex(self):
        '''
        @return: None, the result is saved in self._path_disc_regex_list
        '''
        for path_disclosure_string in self._get_path_disclosure_strings():
            regex_string = '('+path_disclosure_string + '.*?)[^A-Za-z0-9\._\-\\/\+~]'
            regex = re.compile( regex_string,  re.IGNORECASE)
            self._path_disc_regex_list.append(regex)

    def grep(self, request, response):
        '''
        Identify the path disclosure vulnerabilities.
        
        @parameter request: The HTTP request object.
        @parameter response: The HTTP response object
        @return: None, the result is saved in the kb.
        '''
        if response.is_text_or_html():
            # Decode the realurl
            realurl = urlParser.urlDecode( response.getURL() )
            
            html_string = response.getBody()
            for path_disc_regex in self._path_disc_regex_list:
                
                match_list = path_disc_regex.findall( html_string  )
                filtered_match_list = []
                
                #   Sort by the longest match, this is needed for filtering out some false positives
                #   please read the note below.
                match_list.sort(self._longest)
                
                for match in match_list:
                    
                    # This if is to avoid false positives
                    if not self._wasSent( request, match ) and not \
                    self._attr_value( match, html_string ):
                        
                        # Check for dups
                        if (realurl, match) in self._already_added:
                            continue
                        
                        #   There is a rare bug also, which is triggered in cases like this one:
                        #
                        #   >>> import re
                        #   >>> re.findall('/var/www/.*','/var/www/foobar/htdocs/article.php')
                        #   ['/var/www/foobar/htdocs/article.php']
                        #   >>> re.findall('/htdocs/.*','/var/www/foobar/htdocs/article.php')
                        #   ['/htdocs/article.php']
                        #   >>> 
                        #
                        #   What I need to do here, is to keep the longest match.
                        for realurl_added, match_added in self._already_added:
                            if match_added.endswith( match ):
                                break
                        else:
                        
                            #   Note to self: I get here when "break" is NOT executed.
                            #   It's a new one, report!
                            self._already_added.append( (realurl, match) )
                            
                            v = vuln.vuln()
                            v.setURL( realurl )
                            v.setId( response.id )
                            msg = 'The URL: "' + v.getURL() + '" has a path disclosure '
                            msg += 'vulnerability which discloses: "' + match  + '".'
                            v.setDesc( msg )
                            v.setSeverity(severity.LOW)
                            v.setName( 'Path disclosure vulnerability' )
                            v['path'] = match
                            v.addToHighlight( match )
                            kb.kb.append( self, 'pathDisclosure', v )
        
        self._update_KB_path_list()
    
    def _longest(self, a, b):
        '''
        @parameter a: A string.
        @parameter a: Another string.
        @return: The longest string.
        '''
        return cmp(len(a), len(b))
    
    def _attr_value(self, path_disclosure_string, response_body ):
        '''
        This method was created to remove some false positives.
        
        @return: True if path_disclosure_string is the value of an attribute inside a tag.
        
        Examples:
            path_disclosure_string = '/home/image.png'
            response_body = '....<img src="/home/image.png">...'
            return: True
            
            path_disclosure_string = '/home/image.png'
            response_body = '...<b>Error while processing /home/image.png</b>...'
            return: False
        '''
        regex_res = re.findall('<.+?(["|\']'+ re.escape(path_disclosure_string) +'["|\']).*?>', response_body)
        in_attr = path_disclosure_string in regex_res
        return in_attr
    
    def _update_KB_path_list( self ):
        '''
        If a path disclosure was found, I can create a list of full paths to all URLs ever visited.
        This method updates that list.
        '''
        path_disc_vulns = kb.kb.getData( 'pathDisclosure', 'pathDisclosure' ) 
        if len( path_disc_vulns ) == 0:
            # I can't calculate the list !
            pass
        else:
            # Init the kb variables
            kb.kb.save( self, 'listFiles', [] )
            
            # Note that this list is recalculated every time a new page is accesed
            # this is goood :P
            url_list = kb.kb.getData( 'urls', 'urlList' )
            
            # Now I find the longest match between one of the URLs that w3af has
            # discovered, and one of the path disclosure strings that this plugin has
            # found. I use the longest match because with small match_list I have more
            # probability of making a mistake.
            longest_match = ''
            longest_path_disc_vuln = None
            for path_disc_vuln in path_disc_vulns:
                for url in url_list:
                    path_and_file = urlParser.getPath( url )

                    if path_disc_vuln['path'].endswith( path_and_file ):
                        if len(longest_match) < len(path_and_file):
                            longest_match = path_and_file
                            longest_path_disc_vuln = path_disc_vuln
                        
            # Now I recalculate the place where all the resources are in disk, all this
            # is done taking the longest_match as a reference, so... if we don't have a
            # longest_match, then nothing is actually done
            if longest_match:
                
                # Get the webroot
                webroot = longest_path_disc_vuln['path'].replace( longest_match, '' )
                
                #
                #   This if fixes a strange case reported by Olle
                #           if webroot[0] == '/':
                #           IndexError: string index out of range
                #   That seems to be because the webroot == ''
                #
                if webroot:
                    kb.kb.save( self, 'webroot', webroot )
                    
                    # Check what path separator we should use (linux / windows)
                    if webroot[0] == '/':
                        path_sep = '/'
                    else:
                        # windows
                        path_sep = '\\'
                    
                    # Create the remote locations
                    remote_locations = []
                    for url in url_list:
                        remote_path = urlParser.getPath( url ).replace('/', path_sep)
                        remote_locations.append( webroot + remote_path )
                    remote_locations = list( set( remote_locations ) )
                    
                    kb.kb.save( self, 'listFiles', remote_locations )
        
    def setOptions( self, OptionList ):
        pass
    
    def getOptions( self ):
        '''
        @return: A list of option objects for this plugin.
        '''    
        ol = optionList()
        return ol

    def end(self):
        '''
        This method is called when the plugin wont be used anymore.
        '''
        inform = kb.kb.getData( 'pathDisclosure', 'pathDisclosure' )
        
        tmp = {}
        ids = {}
        for v in inform:
            if v.getURL() in tmp.keys():
                tmp[ v.getURL() ].append( v['path'] )
            else:
                tmp[ v.getURL() ] = [ v['path'], ]
                                
            if v['path'] in ids.keys():
                ids[ v['path'] ].append( v.getId() )
            else:
                ids[ v['path'] ] = [ v.getId(), ]
        
        # Avoid duplicates
        for url in tmp.keys():
            tmp[ url ] = list( set( tmp[ url ] ) )
        
        for url in tmp.keys():
            om.out.information( 'The URL: "' + url + '" has the following path disclosure problems:' )
            for path in tmp[ url ]:
                to_print = '    - ' + path + ' . Found in request with'
                
                list_of_id_list = ids[ path ]
                complete_list = []
                for list_of_id in list_of_id_list:
                    complete_list.extend(list_of_id)
                
                complete_list = list( set( complete_list ) )
                if len(complete_list)==1:
                    to_print += ' id ' + str( complete_list[0] ) + '.'
                else:
                    to_print += ' ids ' + str( complete_list )
                om.out.information( to_print )

    def _get_path_disclosure_strings(self):
        '''
        Return a list of regular expressions to be tested.
        '''
        
        path_disclosure_strings = []
        path_disclosure_strings.append(r"[A-Z]:\\")
        path_disclosure_strings.append(r"file:///?[A-Z]\|")
        path_disclosure_strings.extend( get_common_directories() )
        return path_disclosure_strings

    def getPluginDeps( self ):
        '''
        @return: A list with the names of the plugins that should be runned before the
        current one.
        '''
        return []
    
    def getLongDesc( self ):
        '''
        @return: A DETAILED description of the plugin functions and features.
        '''
        return '''
        This plugin greps every page for path disclosure vulnerabilities like:
        
            - C:\www\files\...
            - /var/www/htdocs/...
            
        The results are saved to the KB, and used by all the plugins that need to know the location
        of a file inside the remote web server.
        '''