/usr/share/w3af/plugins/grep/pathDisclosure.py is in w3af-console 1.0-rc3svn3489-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 | '''
pathDisclosure.py
Copyright 2006 Andres Riancho
This file is part of w3af, w3af.sourceforge.net .
w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.
w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
'''
import core.controllers.outputManager as om
# options
from core.data.options.option import option
from core.data.options.optionList import optionList
from core.controllers.basePlugin.baseGrepPlugin import baseGrepPlugin
import core.data.kb.knowledgeBase as kb
import core.data.kb.vuln as vuln
import core.data.constants.severity as severity
from core.data.constants.common_directories import get_common_directories
import core.data.parsers.urlParser as urlParser
import re
class pathDisclosure(baseGrepPlugin):
'''
Grep every page for traces of path disclosure problems.
@author: Andres Riancho ( andres.riancho@gmail.com )
'''
def __init__(self):
baseGrepPlugin.__init__(self)
# Internal variables
self._already_added = []
# Compile all regular expressions now
self._path_disc_regex_list = []
self._compile_regex()
def _compile_regex(self):
'''
@return: None, the result is saved in self._path_disc_regex_list
'''
for path_disclosure_string in self._get_path_disclosure_strings():
regex_string = '('+path_disclosure_string + '.*?)[^A-Za-z0-9\._\-\\/\+~]'
regex = re.compile( regex_string, re.IGNORECASE)
self._path_disc_regex_list.append(regex)
def grep(self, request, response):
'''
Identify the path disclosure vulnerabilities.
@parameter request: The HTTP request object.
@parameter response: The HTTP response object
@return: None, the result is saved in the kb.
'''
if response.is_text_or_html():
# Decode the realurl
realurl = urlParser.urlDecode( response.getURL() )
html_string = response.getBody()
for path_disc_regex in self._path_disc_regex_list:
match_list = path_disc_regex.findall( html_string )
filtered_match_list = []
# Sort by the longest match, this is needed for filtering out some false positives
# please read the note below.
match_list.sort(self._longest)
for match in match_list:
# This if is to avoid false positives
if not self._wasSent( request, match ) and not \
self._attr_value( match, html_string ):
# Check for dups
if (realurl, match) in self._already_added:
continue
# There is a rare bug also, which is triggered in cases like this one:
#
# >>> import re
# >>> re.findall('/var/www/.*','/var/www/foobar/htdocs/article.php')
# ['/var/www/foobar/htdocs/article.php']
# >>> re.findall('/htdocs/.*','/var/www/foobar/htdocs/article.php')
# ['/htdocs/article.php']
# >>>
#
# What I need to do here, is to keep the longest match.
for realurl_added, match_added in self._already_added:
if match_added.endswith( match ):
break
else:
# Note to self: I get here when "break" is NOT executed.
# It's a new one, report!
self._already_added.append( (realurl, match) )
v = vuln.vuln()
v.setURL( realurl )
v.setId( response.id )
msg = 'The URL: "' + v.getURL() + '" has a path disclosure '
msg += 'vulnerability which discloses: "' + match + '".'
v.setDesc( msg )
v.setSeverity(severity.LOW)
v.setName( 'Path disclosure vulnerability' )
v['path'] = match
v.addToHighlight( match )
kb.kb.append( self, 'pathDisclosure', v )
self._update_KB_path_list()
def _longest(self, a, b):
'''
@parameter a: A string.
@parameter a: Another string.
@return: The longest string.
'''
return cmp(len(a), len(b))
def _attr_value(self, path_disclosure_string, response_body ):
'''
This method was created to remove some false positives.
@return: True if path_disclosure_string is the value of an attribute inside a tag.
Examples:
path_disclosure_string = '/home/image.png'
response_body = '....<img src="/home/image.png">...'
return: True
path_disclosure_string = '/home/image.png'
response_body = '...<b>Error while processing /home/image.png</b>...'
return: False
'''
regex_res = re.findall('<.+?(["|\']'+ re.escape(path_disclosure_string) +'["|\']).*?>', response_body)
in_attr = path_disclosure_string in regex_res
return in_attr
def _update_KB_path_list( self ):
'''
If a path disclosure was found, I can create a list of full paths to all URLs ever visited.
This method updates that list.
'''
path_disc_vulns = kb.kb.getData( 'pathDisclosure', 'pathDisclosure' )
if len( path_disc_vulns ) == 0:
# I can't calculate the list !
pass
else:
# Init the kb variables
kb.kb.save( self, 'listFiles', [] )
# Note that this list is recalculated every time a new page is accesed
# this is goood :P
url_list = kb.kb.getData( 'urls', 'urlList' )
# Now I find the longest match between one of the URLs that w3af has
# discovered, and one of the path disclosure strings that this plugin has
# found. I use the longest match because with small match_list I have more
# probability of making a mistake.
longest_match = ''
longest_path_disc_vuln = None
for path_disc_vuln in path_disc_vulns:
for url in url_list:
path_and_file = urlParser.getPath( url )
if path_disc_vuln['path'].endswith( path_and_file ):
if len(longest_match) < len(path_and_file):
longest_match = path_and_file
longest_path_disc_vuln = path_disc_vuln
# Now I recalculate the place where all the resources are in disk, all this
# is done taking the longest_match as a reference, so... if we don't have a
# longest_match, then nothing is actually done
if longest_match:
# Get the webroot
webroot = longest_path_disc_vuln['path'].replace( longest_match, '' )
#
# This if fixes a strange case reported by Olle
# if webroot[0] == '/':
# IndexError: string index out of range
# That seems to be because the webroot == ''
#
if webroot:
kb.kb.save( self, 'webroot', webroot )
# Check what path separator we should use (linux / windows)
if webroot[0] == '/':
path_sep = '/'
else:
# windows
path_sep = '\\'
# Create the remote locations
remote_locations = []
for url in url_list:
remote_path = urlParser.getPath( url ).replace('/', path_sep)
remote_locations.append( webroot + remote_path )
remote_locations = list( set( remote_locations ) )
kb.kb.save( self, 'listFiles', remote_locations )
def setOptions( self, OptionList ):
pass
def getOptions( self ):
'''
@return: A list of option objects for this plugin.
'''
ol = optionList()
return ol
def end(self):
'''
This method is called when the plugin wont be used anymore.
'''
inform = kb.kb.getData( 'pathDisclosure', 'pathDisclosure' )
tmp = {}
ids = {}
for v in inform:
if v.getURL() in tmp.keys():
tmp[ v.getURL() ].append( v['path'] )
else:
tmp[ v.getURL() ] = [ v['path'], ]
if v['path'] in ids.keys():
ids[ v['path'] ].append( v.getId() )
else:
ids[ v['path'] ] = [ v.getId(), ]
# Avoid duplicates
for url in tmp.keys():
tmp[ url ] = list( set( tmp[ url ] ) )
for url in tmp.keys():
om.out.information( 'The URL: "' + url + '" has the following path disclosure problems:' )
for path in tmp[ url ]:
to_print = ' - ' + path + ' . Found in request with'
list_of_id_list = ids[ path ]
complete_list = []
for list_of_id in list_of_id_list:
complete_list.extend(list_of_id)
complete_list = list( set( complete_list ) )
if len(complete_list)==1:
to_print += ' id ' + str( complete_list[0] ) + '.'
else:
to_print += ' ids ' + str( complete_list )
om.out.information( to_print )
def _get_path_disclosure_strings(self):
'''
Return a list of regular expressions to be tested.
'''
path_disclosure_strings = []
path_disclosure_strings.append(r"[A-Z]:\\")
path_disclosure_strings.append(r"file:///?[A-Z]\|")
path_disclosure_strings.extend( get_common_directories() )
return path_disclosure_strings
def getPluginDeps( self ):
'''
@return: A list with the names of the plugins that should be runned before the
current one.
'''
return []
def getLongDesc( self ):
'''
@return: A DETAILED description of the plugin functions and features.
'''
return '''
This plugin greps every page for path disclosure vulnerabilities like:
- C:\www\files\...
- /var/www/htdocs/...
The results are saved to the KB, and used by all the plugins that need to know the location
of a file inside the remote web server.
'''
|