vlambda博客
学习文章列表

[Python] 某网站Web端爬虫攻防大赛题目交流

比赛网站的链接http://match.yuanrenxue.com/list,比赛目前已经结束

题目序号 题目内容 是否完成
第一题 js 混淆 - 源码乱码 已完成
第二题 js 混淆 - 动态cookie 1 已完成
第三题 访问逻辑 - 推心置腹 已完成
第四题 雪碧图、样式干扰 已完成
第五题 js 混淆 - 乱码增强 已完成
第六题 js 混淆 - 回溯 已完成
第七题 动态字体,随风漂移 已完成
第八题 验证码 - 图文点选 已完成
第九题 js 混淆 - 动态cookie 2 已完成
第十题 js 混淆 - 重放攻击对抗 仅答案
第十一题 app抓取 - so文件协议破解 已完成
第十二题 入门级js 已完成
第十三题 入门级cookie_某乐V1.0版 已完成


第一题【接口-查询参数-值加密】
开打题目后按f12,会出现setInterval函数,直接删除debugger断点,然后就可以继续执行了,点击下一步

在NetWork窗口可以查看到需要的数据来源于【http://match.yuanrenxue.com/api/match/1】接口,而请求中有一个m参数是加密的,也就是说需要找到这个m参数的生成方法,那么在全局中搜索【api/match/1】

[Python] 某网站Web端爬虫攻防大赛题目交流


可以搜索到这个内容,复制这一段内的script代码进行格式化

[Python] 某网站Web端爬虫攻防大赛题目交流


其主要内容大概是这些


window.url = '/api/match/1';request = function() { var timestamp = Date.parse(new Date()); var m = oo0O0(timestamp.toString()) + window.f; var list = { "page": window.page, "m": m + '丨' + timestamp / 1000 }; $.ajax({ url: window.url, dataType: "json", async: false, data: list, type: "GET", beforeSend: function(request) {}, })};

这里可以清楚的看到m参数是由oo0O0函数的结果加上window.f得到的,继续查找一下oo0O0函数

[Python] 某网站Web端爬虫攻防大赛题目交流


也是在当前的页面,只是在不同的script标签下,将这个script标签下的代码进行格式化可以得到下面


function oo0O0(mw) { window.b = ''; for (var i = 0, len = window.a.length; i < len; i++) { console.log(window.a[i]); window.b += String[document.e + document.g](window.a[i][document.f + document.h]() - i - window.c) } var U = ['W5r5W6VdIHZcT8kU', 'WQ8CWRaxWQirAW==']; var J = function(o, E) { o = o - 0x0; var N = U[o]; if (J['bSSGte'] === undefined) { var Y = function(w) { var m = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=', T = String(w)['replace'](/=+$/, ''); var A = ''; for (var C = 0x0, b, W, l = 0x0; W = T['charAt'](l++);~W && (b = C % 0x4 ? b * 0x40 + W : W, C++ % 0x4) ? A += String['fromCharCode'](0xff & b >> (-0x2 * C & 0x6)) : 0x0) { W = m['indexOf'](W) } return A }; var t = function(w, m) { var T = [], A = 0x0, C, b = '', W = ''; w = Y(w); for (var R = 0x0, v = w['length']; R < v; R++) { W += '%' + ('00' + w['charCodeAt'](R)['toString'](0x10))['slice'](-0x2) } w = decodeURIComponent(W); var l; for (l = 0x0; l < 0x100; l++) { T[l] = l } for (l = 0x0; l < 0x100; l++) { A = (A + T[l] + m['charCodeAt'](l % m['length'])) % 0x100, C = T[l], T[l] = T[A], T[A] = C } l = 0x0, A = 0x0; for (var L = 0x0; L < w['length']; L++) { l = (l + 0x1) % 0x100, A = (A + T[l]) % 0x100, C = T[l], T[l] = T[A], T[A] = C, b += String['fromCharCode'](w['charCodeAt'](L) ^ T[(T[l] + T[A]) % 0x100]) } return b }; J['luAabU'] = t, J['qlVPZg'] = {}, J['bSSGte'] = !! [] } var H = J['qlVPZg'][o]; return H === undefined ? (J['TUDBIJ'] === undefined && (J['TUDBIJ'] = !! []), N = J['luAabU'](N, E), J['qlVPZg'][o] = N) : N = H, N }; eval(atob(window['b'])[J('0x0', ']dQW')](J('0x1', 'GTu!'), '\x27' + mw + '\x27')); return ''}

这里有一点是这个oo0O0函数的返回值是一个空值,也就是说m的值完全由window.f决定。
尝试在控制台输出一下window.f

[Python] 某网站Web端爬虫攻防大赛题目交流

一直有输出很烦, 干掉

[Python] 某网站Web端爬虫攻防大赛题目交流


但是当我们执行一次oo0O0函数后,再输出window.f,这时候window.f被修改了



同时会输出一大堆乱码,那就说明oo0O0函数会修改window.f的值,下面详细看看oo0O0函数,其内部并没有出现window.f这个变量,但是在返回值前有一句比较特殊的代码

eval(atob(window['b'])[J('0x0', ']dQW')](J('0x1', 'GTu!'), '\x27' + mw + '\x27'));

这里将window['b']的值进行base64解码后还进行了一些函数的传参运算,那么在控制台输出一下【atob(window['b'])】,看看是什么内容
输出后可以看到是一段script代码,将其进行格式化

var hexcase = 0;var b64pad = "";var chrsz = 16; function hex_md5(a) { return binl2hex(core_md5(str2binl(a), a.length * chrsz))}function b64_md5(a) { return binl2b64(core_md5(str2binl(a), a.length * chrsz))}function str_md5(a) { return binl2str(core_md5(str2binl(a), a.length * chrsz))}function hex_hmac_md5(a, b) { return binl2hex(core_hmac_md5(a, b))}function b64_hmac_md5(a, b) { return binl2b64(core_hmac_md5(a, b))}function str_hmac_md5(a, b) { return binl2str(core_hmac_md5(a, b))}function md5_vm_test() { return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72"}function core_md5(p, k) { p[k >> 5] |= 128 << ((k) % 32); p[(((k + 64) >>> 9) << 4) + 14] = k; var o = 1732584193; var n = -271733879; var m = -1732584194; var l = 271733878; for (var g = 0; g < p.length; g += 16) { var j = o; var h = n; var f = m; var e = l; o = md5_ff(o, n, m, l, p[g + 0], 7, -680976936); l = md5_ff(l, o, n, m, p[g + 1], 12, -389564586); m = md5_ff(m, l, o, n, p[g + 2], 17, 606105819); n = md5_ff(n, m, l, o, p[g + 3], 22, -1044525330); o = md5_ff(o, n, m, l, p[g + 4], 7, -176418897); l = md5_ff(l, o, n, m, p[g + 5], 12, 1200080426); m = md5_ff(m, l, o, n, p[g + 6], 17, -1473231341); n = md5_ff(n, m, l, o, p[g + 7], 22, -45705983); o = md5_ff(o, n, m, l, p[g + 8], 7, 1770035416); l = md5_ff(l, o, n, m, p[g + 9], 12, -1958414417); m = md5_ff(m, l, o, n, p[g + 10], 17, -42063); n = md5_ff(n, m, l, o, p[g + 11], 22, -1990404162); o = md5_ff(o, n, m, l, p[g + 12], 7, 1804660682); l = md5_ff(l, o, n, m, p[g + 13], 12, -40341101); m = md5_ff(m, l, o, n, p[g + 14], 17, -1502002290); n = md5_ff(n, m, l, o, p[g + 15], 22, 1236535329); o = md5_gg(o, n, m, l, p[g + 1], 5, -165796510); l = md5_gg(l, o, n, m, p[g + 6], 9, -1069501632); m = md5_gg(m, l, o, n, p[g + 11], 14, 643717713); n = md5_gg(n, m, l, o, p[g + 0], 20, -373897302); o = md5_gg(o, n, m, l, p[g + 5], 5, -701558691); l = md5_gg(l, o, n, m, p[g + 10], 9, 38016083); m = md5_gg(m, l, o, n, p[g + 15], 14, -660478335); n = md5_gg(n, m, l, o, p[g + 4], 20, -405537848); o = md5_gg(o, n, m, l, p[g + 9], 5, 568446438); l = md5_gg(l, o, n, m, p[g + 14], 9, -1019803690); m = md5_gg(m, l, o, n, p[g + 3], 14, -187363961); n = md5_gg(n, m, l, o, p[g + 8], 20, 1163531501); o = md5_gg(o, n, m, l, p[g + 13], 5, -1444681467); l = md5_gg(l, o, n, m, p[g + 2], 9, -51403784); m = md5_gg(m, l, o, n, p[g + 7], 14, 1735328473); n = md5_gg(n, m, l, o, p[g + 12], 20, -1921207734); o = md5_hh(o, n, m, l, p[g + 5], 4, -378558); l = md5_hh(l, o, n, m, p[g + 8], 11, -2022574463); m = md5_hh(m, l, o, n, p[g + 11], 16, 1839030562); n = md5_hh(n, m, l, o, p[g + 14], 23, -35309556); o = md5_hh(o, n, m, l, p[g + 1], 4, -1530992060); l = md5_hh(l, o, n, m, p[g + 4], 11, 1272893353); m = md5_hh(m, l, o, n, p[g + 7], 16, -155497632); n = md5_hh(n, m, l, o, p[g + 10], 23, -1094730640); o = md5_hh(o, n, m, l, p[g + 13], 4, 681279174); l = md5_hh(l, o, n, m, p[g + 0], 11, -358537222); m = md5_hh(m, l, o, n, p[g + 3], 16, -722881979); n = md5_hh(n, m, l, o, p[g + 6], 23, 76029189); o = md5_hh(o, n, m, l, p[g + 9], 4, -640364487); l = md5_hh(l, o, n, m, p[g + 12], 11, -421815835); m = md5_hh(m, l, o, n, p[g + 15], 16, 530742520); n = md5_hh(n, m, l, o, p[g + 2], 23, -995338651); o = md5_ii(o, n, m, l, p[g + 0], 6, -198630844); l = md5_ii(l, o, n, m, p[g + 7], 10, 11261161415); m = md5_ii(m, l, o, n, p[g + 14], 15, -1416354905); n = md5_ii(n, m, l, o, p[g + 5], 21, -57434055); o = md5_ii(o, n, m, l, p[g + 12], 6, 1700485571); l = md5_ii(l, o, n, m, p[g + 3], 10, -1894446606); m = md5_ii(m, l, o, n, p[g + 10], 15, -1051523); n = md5_ii(n, m, l, o, p[g + 1], 21, -2054922799); o = md5_ii(o, n, m, l, p[g + 8], 6, 1873313359); l = md5_ii(l, o, n, m, p[g + 15], 10, -30611744); m = md5_ii(m, l, o, n, p[g + 6], 15, -1560198380); n = md5_ii(n, m, l, o, p[g + 13], 21, 1309151649); o = md5_ii(o, n, m, l, p[g + 4], 6, -145523070); l = md5_ii(l, o, n, m, p[g + 11], 10, -1120210379); m = md5_ii(m, l, o, n, p[g + 2], 15, 718787259); n = md5_ii(n, m, l, o, p[g + 9], 21, -343485551); o = safe_add(o, j); n = safe_add(n, h); m = safe_add(m, f); l = safe_add(l, e) } return Array(o, n, m, l)}function md5_cmn(h, e, d, c, g, f) { return safe_add(bit_rol(safe_add(safe_add(e, h), safe_add(c, f)), g), d)}function md5_ff(g, f, k, j, e, i, h) { return md5_cmn((f & k) | ((~f) & j), g, f, e, i, h)}function md5_gg(g, f, k, j, e, i, h) { return md5_cmn((f & j) | (k & (~j)), g, f, e, i, h)}function md5_hh(g, f, k, j, e, i, h) { return md5_cmn(f ^ k ^ j, g, f, e, i, h)}function md5_ii(g, f, k, j, e, i, h) { return md5_cmn(k ^ (f | (~j)), g, f, e, i, h)}function core_hmac_md5(c, f) { var e = str2binl(c); if (e.length > 16) { e = core_md5(e, c.length * chrsz) } var a = Array(16), d = Array(16); for (var b = 0; b < 16; b++) { a[b] = e[b] ^ 909522486; d[b] = e[b] ^ 1549556828 } var g = core_md5(a.concat(str2binl(f)), 512 + f.length * chrsz); return core_md5(d.concat(g), 512 + 128)}function safe_add(a, d) { var c = (a & 65535) + (d & 65535); var b = (a >> 16) + (d >> 16) + (c >> 16); return (b << 16) | (c & 65535)}function bit_rol(a, b) { return (a << b) | (a >>> (32 - b))}function str2binl(d) { var c = Array(); var a = (1 << chrsz) - 1; for (var b = 0; b < d.length * chrsz; b += chrsz) { c[b >> 5] |= (d.charCodeAt(b / chrsz) & a) << (b % 32) } return c}function binl2str(c) { var d = ""; var a = (1 << chrsz) - 1; for (var b = 0; b < c.length * 32; b += chrsz) { d += String.fromCharCode((c[b >> 5] >>> (b % 32)) & a) } return d}function binl2hex(c) { var b = hexcase ? "0123456789ABCDEF" : "0123456789abcdef"; var d = ""; for (var a = 0; a < c.length * 4; a++) { d += b.charAt((c[a >> 2] >> ((a % 4) * 8 + 4)) & 15) + b.charAt((c[a >> 2] >> ((a % 4) * 8)) & 15) } return d}function binl2b64(d) { var c = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; var f = ""; for (var b = 0; b < d.length * 4; b += 3) { var e = (((d[b >> 2] >> 8 * (b % 4)) & 255) << 16) | (((d[b + 1 >> 2] >> 8 * ((b + 1) % 4)) & 255) << 8) | ((d[b + 2 >> 2] >> 8 * ((b + 2) % 4)) & 255); for (var a = 0; a < 4; a++) { if (b * 8 + a * 6 > d.length * 32) { f += b64pad } else { f += c.charAt((e >> 6 * (3 - a)) & 63) } } } return f};window.f = hex_md5(mwqqppz)

在函数的最后可以看到window.f是通过hex_md5函数运算得到的
但是这里的【mwqqppz】变量并没有出现的地方,此时就往前看看上一段代码传入的是什么参数
可以看到这里有一些小混淆,此时将上面函数中出现的下方代码输入到控制台,并跟着输入【J('0x0', ']dQW')】和【J('0x1', 'GTu!')】

var U = ['W5r5W6VdIHZcT8kU', 'WQ8CWRaxWQirAW=='];var J = function(o, E) { o = o - 0x0; var N = U[o]; if (J['bSSGte'] === undefined) { var Y = function(w) { var m = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=', T = String(w)['replace'](/=+$/, ''); var A = ''; for (var C = 0x0, b, W, l = 0x0; W = T['charAt'](l++);~W && (b = C % 0x4 ? b * 0x40 + W : W, C++ % 0x4) ? A += String['fromCharCode'](0xff & b >> (-0x2 * C & 0x6)) : 0x0) { W = m['indexOf'](W) } return A }; var t = function(w, m) { var T = [], A = 0x0, C, b = '', W = ''; w = Y(w); for (var R = 0x0, v = w['length']; R < v; R++) { W += '%' + ('00' + w['charCodeAt'](R)['toString'](0x10))['slice'](-0x2) } w = decodeURIComponent(W); var l; for (l = 0x0; l < 0x100; l++) { T[l] = l } for (l = 0x0; l < 0x100; l++) { A = (A + T[l] + m['charCodeAt'](l % m['length'])) % 0x100, C = T[l], T[l] = T[A], T[A] = C } l = 0x0, A = 0x0; for (var L = 0x0; L < w['length']; L++) { l = (l + 0x1) % 0x100, A = (A + T[l]) % 0x100, C = T[l], T[l] = T[A], T[A] = C, b += String['fromCharCode'](w['charCodeAt'](L) ^ T[(T[l] + T[A]) % 0x100]) } return b }; J['luAabU'] = t, J['qlVPZg'] = {}, J['bSSGte'] = !! [] } var H = J['qlVPZg'][o]; return H === undefined ? (J['TUDBIJ'] === undefined && (J['TUDBIJ'] = !! []), N = J['luAabU'](N, E), J['qlVPZg'][o] = N) : N = H, N };

可以得到其解混淆后的两个值
还原一下就变成了

eval(atob(window['b'])["replace"]("mwqqppz", '\x27' + mw + '\x27'));

这时就清楚了。将mwqqppz替换为我们传入的变量进行计算
此时只要将最后一句的
window.f = hex_md5(mwqqppz)
修改为
var mwqqppz = process.argv[2];
console.log(hex_md5(mwqqppz));
就可以给我们进行调用,修改好后另存为01.js
接下来就是简单的写一下接口调用的代码,其中的m值通过用nodejs调用js文件进行计算,需要先安装nodejs

import requestsimport timeimport os def main(): money = 0 number = 0 ts = str(int(time.time())) nodejs = os.popen('node 01 '+ts+'000') m = nodejs.read().replace('\n', '') + '丨' + ts nodejs.close() for page in range(1, 6): url = 'http://match.yuanrenxue.com/api/match/1?page='+str(page)+'&m='+m response = requests.get(url).json() for each in response['data']: money += each['value'] number += 1 print(money) print(number) print(money // number) # 总价:235000 # 总数:50 # 均值:4700 if __name__ == '__main__': main()