skuukzky
文章12
标签0
分类2

文章分类

Tiktok X-Gnarly jsvmp算法还原

Tiktok X-Gnarly jsvmp算法还原

Tiktok X-Gnarly jsvmp算法还原

导言

前几天偶然在群里看到一个github仓库 具体内容大家可以自行看看,总的内容就是对tk的vmpwebmssdk.js进行去混淆和反编译,但当我查看的时候,tk已经进行了更新,并且还多出来了一个X-Gnarly的参数,我功力不够,无法对新版本的进行VM虚拟机的还原,所以本篇文章就只能用千篇一律的插桩和分析进行逆向还原了,如果有高手能够还原VM,期待你的文章。

分析

虽然,无法还原VM,但是去混淆的思路,还是可以借用他的。

附上Deepseek和仓库原作者的还原思路后的AST代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
const fs = require('fs');
const path = require('path');
const parser = require('@babel/parser');
const traverse = require('@babel/traverse').default;
const t = require('@babel/types');
const generator = require('@babel/generator').default;

// 输入输出配置
const inputPath = './ems1.js';
const outputPath = './output.js';

// 读取源代码
const code = fs.readFileSync(inputPath, 'utf-8');

// 解析AST
const ast = parser.parse(code, {
    sourceType: 'unambiguous',
    plugins: ['jsx']
});

const functionMap = new Map();

// ========== 第一步:提取yg数组中的匿名函数,替换为具名引用,并在就地插入具名函数声明 ==========
traverse(ast, {
    VariableDeclarator(path) {
        if (
            t.isIdentifier(path.node.id, { name: 'yg' }) &&
            t.isArrayExpression(path.node.init)
        ) {
            const elements = path.node.init.elements;
            const insertions = [];

            elements.forEach((element, index) => {
                if (t.isFunctionExpression(element) || t.isArrowFunctionExpression(element)) {
                    const funcName = `Ab${index}`;

                    const funcDeclaration = t.functionDeclaration(
                        t.identifier(funcName),
                        element.params,
                        t.isBlockStatement(element.body) ? element.body : t.blockStatement([t.returnStatement(element.body)]),
                        element.generator,
                        element.async
                    );

                    functionMap.set(index, funcName);

                    // 替换数组中的函数为具名函数标识符
                    elements[index] = t.identifier(funcName);

                    // 保存要插入的位置和内容
                    insertions.push(funcDeclaration);
                }
            });

            // 在当前 VariableDeclaration 后面插入所有函数声明
            const declPath = path.findParent(p => p.isVariableDeclaration());
            insertions.reverse().forEach(fn => {
                declPath.insertAfter(fn); // 就地插入每个函数定义
            });
        }
    }
});

// ========== 第二步:替换yg[索引](args) 为 AbX(args) ==========
traverse(ast, {
    CallExpression(path) {
        const callee = path.node.callee;

        if (
            t.isMemberExpression(callee) &&
            t.isIdentifier(callee.object, { name: 'yg' }) &&
            t.isNumericLiteral(callee.property)
        ) {
            const index = callee.property.value;

            if (functionMap.has(index)) {
                path.node.callee = t.identifier(functionMap.get(index));
            }
        }
    }
});

// ========== 生成最终代码 ==========
const output = generator(ast, {
    retainLines: false,
    comments: true,
    jsescOption: { minimal: true }
}).code;

fs.writeFileSync(outputPath, output);
console.log('✅ 处理完成,函数已就地替换!');

这个时候,我们去代码中搜索X-Gnarly

image-20250426100951117

xn是XB.On是XG,所以我们全局搜索 On看看哪里进行了调用,顺便也看看附近有没有xn ,方便我们定位到,我们在把解混淆后的放入网页中替换。一共两处,然后打上断点触发,这里断住了,我们分析参数,

1
2
3
4
5
a = bn(i, t)),
i:'WebIdLastTime=1742522342&aid=1988&app_language=zh-Hans&app_name=tiktok_web&browser_language=zh-CN&browser_name=..... (太长省略)'
t: "" (Get请求)
t:{"magic":538969122,"version":1,"dataType":8,"strData":"38NyodcW9CLhuMe299J/...
和Xb差不多,此请求是Get。所以请求i是params,t是"",如果是Get,则i是参数,t是data

image-20250426101746723

显然就是bn函数生成了,进入bn函数,然后发现是Ab9函数,上下浏览关键的就是这个do循环

image-20250426113449815

在进入h函数就已经是经典的jsvmp桥段了。

插桩

插桩可以看这两篇文章
JS逆向技巧:日志断点如何正确输出 JSON 而不报错

【逆向案例】巨量算数响应体vmp-decode

正巧的是第二篇文章和tk一样同为字节开发的,插桩流程也是一样。

1
2
3
4
5
6
7
(H = W["apply"](V, Q),v[++l] = H)
"对象V: ", JSON.stringify(V,function(k, v) {if (v === window) {return 'window'} return v}), "函数W: ", W.toString(), "参数Q: ", JSON.stringify(Q,function(k, v) {if (v === window) {return 'window'} return v}), "结果H: ", H
	
 H = new (Function.bind["apply"](B, z))();
"函数B: ", B.toString(), "参数z: ", JSON.stringify(z), 结果H: ", H

JSON.stringify(V,function(k, v) {if (v === window) {return 'window'} return v})

image-20250426113847235

还原

先从上到下观察浏览器日志,

发现这里应该是环境检测,格式化检测和自动化检测,下面还有环境检测很多,继续观察。

image-20250426114612815

这里发现了一些东西,3个32位的值(?md5)和时间戳获取,时间戳肯定是毋庸置疑,我们看一下这3个东西是否为md5,可以看到就在上面,经过一个匿名函数,我们放入CyberChef验证一下,依次验证发现分别对应的是,url后缀params的md5和参数data的md5和user-agent的md5

image-20250426114934148

继续查看

image-20250426115435044

这里看到他在push数组,而这4个数组是什么呢,我们看到了 一个关键函数 setUint16() 如果你还不是明白可以问Deepseek或者朋友,这里我去请教了下福生大佬啊,他直接一眼顶真 告诉我这是 Uint8Array

image-20250426115925972

接着问了下gpt 的确如此啊,然后让他转一下方法 就出来了

image-20250426120000274

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
function uInt32sToBytesLE(uint32Array) {
    const resultBytes = [];
    for (const value of uint32Array) {
        // 为每个 32 位整数创建 4 字节的缓冲区
        const buffer = new ArrayBuffer(4);
        const dataView = new DataView(buffer);
        // 按小端序写入整数
        dataView.setUint32(0, value, false);
        // 提取字节并合并到结果数组
        resultBytes.push(...new Uint8Array(buffer));
    }
    return resultBytes;
}

// 测试数据
var e = [556645436]

// 转换为字节数组
const array_test_two = uInt32sToBytesLE(e);
console.log(array_test_two);

这里的e也就是图中的 [{"0":556645436,"1":1,"2":14,"3":"c20e522f3995433f755e282ec81bb40a","4":"7226faf67640367ae589dea1020b0ab9","5":"1b04588d93b1afac45573a43b9e15594","6":1745638813,"7":1245783967,"8":56675889,"9":"5.1.0"}]中的0元素。

继续查看

image-20250426120209511

这里也把url的md5值转Uint8Array也push进数组,剩下就是相同操作了,把上文中数据全部按此操作加入到数组中,最后的得到push完的数组了。

image-20250426120348612

继续查看,发现在把这个数组利用fromCharCode专为字符串。也就是后文中的第一个乱码字符串。

image-20250426120736687

继续查看他这里在取随机数* 固定值 4294967296 在进Math.floor之后 继续转Uint8Array

image-20250426120950823

image-20250426122216362

![image-20250426121110507](/Users/jiangxia/Library/Application Support/typora-user-images/image-20250426121110507.png)

1
2
3
4
5
function generateRandomIntegers() {
    return Array.from({ length: 12 }, () =>
        Math.floor(Math.random() * 4294967296)
    );
}

image-20250426122140635

这里说一下经过函数Ab22的几个参数

第一个上文根据随机数相乘后得到的12个数组,第二个是16暂时我们不知道,第三个参数就是我们152数组转字符串的,最后结果得到是一个乱码。

Ab22

现在我们来探寻一下这个16是什么怎么来的,

来到函数h的部分,在这里打上日志

e,c,JSON.stringify(v)

image-20250426122603556

在Ab22函数中打上断点,防止日志过多,

image-20250426123328227

现在我们观察这个14是怎么来的就好了,现在一步步往上追,这里不带着一步步追了,单看一个部分,就能理解了

我们发现在日志上,他都是和我们这12位数组相关的,这里我是倒着看

image-20250426123748766

截取一些日志

1
2
3
4
5
6
7
8
9
10
11
12
13
14
首先通读一遍. 
					看到了15
 46 74 '[3737565922,15,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
					在经过下一步运算且就多了2  还是有个15
 49 75 '[2,15,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
				 且有了个14,也不清楚怎么来的
 21 78 '[2,14,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
				然后有了个16,应该是2+14来的,14应该是上文的14
 5 79 '[16,14,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
				还是有个15 不清楚怎么来的
 46 81 '[16,15,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
				这里多出来了个0 
 71 82 '[0,15,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
 49 85 '[0,15,3737565922,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'

我们先解决最容易解决的

3737565922,15 是怎么变成2的 我的建议是在各种运算符中插上运算,方便你更好的观察。

这里不卖关子,3737565922&15 是进行&变成了2

多进行几次日志的保存可以发现 15是固定的,那么就可以有个猜测,凡是第二位是15的基本都是要进行&操作的

那这个12呢,其实一共有12个数组,这里是一小部分,往上查找一下,12其实是上一个整体运算得到的结果,

image-20250426130308162

那么16 应该就是 14+2 (3737565922&15) +2(前面的 2 &15 )

那么16,15是怎么变成0的呢?

其实还是和上面一个道理 16&15

那么其实整体就明白了

数组一中的 3737565922 &15 = 2 加上&后的结果上文的 14 结果= 16 & 15 = 0

这里听着可能迷糊

再来一组就好

1
2
3
4
5
6
7
8
9
10
46 74 '[3624428041,15,3624428041,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
3624428041 & 15 = 9
 49 75 '[9,15,3624428041,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
		9 &15 = 9  0上上文运算的0  
 21 78 '[9,0,3624428041,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
			9+0 = 0
 5 79 '[9,0,3624428041,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
 46 81 '[9,15,3624428041,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'
				9& 15 = 9
 71 82 '[9,15,3624428041,4294967296,null,2,32,[7,1245783967],[8,1640124884],[9,"5.1.0"],"5.1.0","$chrome_asyncScriptInfo","__$webdriverAsyncExecutor"]'

上面我已经附上了各种运算结果,最后要注意的是,如果你插桩+=这个,他的结果每次都会在+5 就是最终的结过,得到这个思路后,把它复述给gpt,让他帮我们完成代码。

1
2
3
4
5
6
7
8
9
10
11
const array =  [3447926452,2069153462,3972284619,4133534163,3678609889,97836732,1914591165,4045946446,1756250425,147771365,3737565922,3624428041]
const FIXED = 4294967296;
let n = FIXED & 15; // 初始值为 0

for (let i = 0; i < array.length; i++) {
    const current = array[i] & 15;
    n = (current + n) & 15;
    res = n+5;
}

console.log("最终结果是:", res); // 输出:13

image-20250426131114180

也是和网页上对的上的

继续查看,这次我们从这个生成的乱码字符串入手,会简单很多,直接搜索,然后定位最后一个,这里我们发现他多了个K

可能是拼接的salt,毕竟xb也有拼接,并不奇怪,这里我们搜索开头,把K也加进入进去,

image-20250426132024557

往上翻翻看到了一个substring函数的切割,还看到他把48位数组,就是我们Uint8array转的那些随机数,也进行了转字符串,

也就是”K”+前91位切断+48数组转的字符串+后91位

代码如下。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
array_test = [10,0,0,4,67,143,22,187,1,0,2,0,1,2,0,2,0,14,3,0,32,99,97,53,99,53,57,51,55,97,98,54,49,98,51,57,98,97,51,55,97,56,57,49,102,99,50,99,55,57,54,97,97,4,0,32,100,52,49,100,56,99,100,57,56,102,48,48,98,50,48,52,101,57,56,48,48,57,57,56,101,99,102,56,52,50,55,101,5,0,32,49,98,48,52,53,56,56,100,57,51,98,49,97,102,97,99,52,53,53,55,51,97,52,51,98,57,101,49,53,53,57,52,6,0,4,104,12,96,255,7,0,4,74,65,39,159,8,0,4,97,194,81,212,9,0,5,53,46,49,46,48]
const array_test_two =[180,46,131,205,182,198,84,123,203,64,196,236,211,185,96,246,225,33,67,219,188,222,212,5,189,87,30,114,78,62,40,241,57,65,174,104,229,207,206,8,226,186,198,222,9,98,8,216]

const grable_string = String.fromCharCode(...array_test);
const grable_string_two = String.fromCharCode(...array_test_two);
console.log("grable_string", grable_string.length)
console.log("grable_string_two", grable_string_two.length)
var e = [3447926452,2069153462,3972284619,4133534163,3678609889,97836732,1914591165,4045946446,1756250425,147771365,3737565922,3624428041]

garbled_characters = Ab22(e, 14, grable_string);
console.log(garbled_characters)
console.log("111",garbled_characters.charCodeAt(0))
sub1 = garbled_characters.substring(0, 110)  // substring 是动态
sub2 = garbled_characters.substring(110)
console.log(sub1)

const final_str = 'K' + sub1 + grable_string_two + sub2
console.log(final_str)
// ModiB64  魔改base64和xb一样,只是码表不同而已。
const x_Gnarly = ModiB64("u09tbS3Uxxxxx(脱敏)k8JORCF5/xKHwacP=", final_str)
console.log(x_Gnarly)

image-20250426132842424

发现结果是完全对的上的,只剩下substring的值 ,是怎么取得了。

substring取值

这里纯看日志,插桩+= 和 %=就可以分析出来

(H = W["apply"](V, Q), 下个条件断点 W.toString() == 'function substring() { [native code] }'

插桩就可以了

截取部分日志

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
数组一 = [102,126,80,64,182,9,42,92,254,191,247,183,160,220,98,203,210,35,70,109,222,49,40,131,199,209,86,193,8,149,142,203,226,179,177,113,167,115,41,237,177,184,53,124,112,13,216,87]
数组二 = [148,63,67,189,6,17,96,154,48,195,123,175,103,47,188,168,187,192,207,60,61,254,143,103,48,95,64,160,120,59,115,205,225,61,2,255,194,46,253,224,177,157,162,130,129,20,13,125,47,137,51,173,247,243,70,5,239,66,29,246,93,219,218,97,1,124,54,244,130,134,255,23,227,165,57,16,188,192,205,40,224,98,138,147,251,50,189,220,10,93,74,41,99,160,36,22,23,171,160,252,63,156,154,105,5,43,191,108,46,205,24,62,202,244,38,161,11,73,7,0,118,252,204,190,177,169,195,165,37,204,67,74,121,172,154,118,32,247,121,136,223,79,242,214,41,179,158,253,129,96,25,82]
现在根据我的下文的提示帮我编写js代码 
//数组一 48位数组
 v[l] 0 += n 102 0是固定值
 v[l] 152 += n 1 153 固定153
 v[l] 102 %= n 153 102 %153=  102 
 
 v[l] 102 += n 126  上个结果中的102 + 数组中的第二位 126 = 228
 v[l] 152 += n 1 153 固定153
 v[l] 228 %= n 153   228 % 153 = 75
 
 v[l] 75 += n 80
 v[l] 152 += n 1
 v[l] 155 %= n 153
 
 v[l] 2 += n 64
 v[l] 152 += n 1
 v[l] 66 %= n 153
 
 v[l] 66 += n 182
 v[l] 152 += n 1
 v[l] 248 %= n 153
 
 //然后数组二也是同理 数组二是152位数组

同样,把需求整理给Gpt,让他给出代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

arrays = []
function loopStringChars(str) {
    for (let i = 0; i < str.length; i++) {
        const char = str.charCodeAt(i); // 或直接用 str[i]
        //console.log(`位置 ${i} 的字符: ${char}`);
        arrays.push(char)
    }
    return arrays
}

array_test = [10,0,0,4,67,143,22,187,1,0,2,0,1,2,0,2,0,14,3,0,32,99,97,53,99,53,57,51,55,97,98,54,49,98,51,57,98,97,51,55,97,56,57,49,102,99,50,99,55,57,54,97,97,4,0,32,100,52,49,100,56,99,100,57,56,102,48,48,98,50,48,52,101,57,56,48,48,57,57,56,101,99,102,56,52,50,55,101,5,0,32,49,98,48,52,53,56,56,100,57,51,98,49,97,102,97,99,52,53,53,55,51,97,52,51,98,57,101,49,53,53,57,52,6,0,4,104,12,96,255,7,0,4,74,65,39,159,8,0,4,97,194,81,212,9,0,5,53,46,49,46,48]
const grable_string = String.fromCharCode(...array_test);
var e = [3447926452,2069153462,3972284619,4133534163,3678609889,97836732,1914591165,4045946446,1756250425,147771365,3737565922,3624428041]

garbled_characters = Ab22(e, 14, grable_string);
console.log(garbled_characters)
数组一 = loopStringChars(garbled_characters)
console.log(数组一)
数组二 = [180,46,131,205,182,198,84,123,203,64,196,236,211,185,96,246,225,33,67,219,188,222,212,5,189,87,30,114,78,62,40,241,57,65,174,104,229,207,206,8,226,186,198,222,9,98,8,216]

let v = 0;

// 处理数组一
for (const num of 数组一) {
    v += num;
    v %= 153;
}

// 处理数组二
for (const num of 数组二) {
    v += num;
    v %= 153;
}

console.log(v);

结尾

代码整合运行

一级评论

image-20250426135411259

二级评论

image-20250426140957944

目前整个X-Gnarly就分析完毕了,整体逆向还是很艰辛,感谢有福生大佬,对我进行教学指导,不然靠自己还是过于吃力。

本文作者:skuukzky
本文链接:https://lpy30m.github.io/skuukzky.github.io/2025/04/26/%E9%80%86%E5%90%91/Tiktok-X-Gnarly-jsvmp%E7%AE%97%E6%B3%95%E8%BF%98%E5%8E%9F/
版权声明:本文采用 CC BY-NC-SA 3.0 CN 协议进行许可