银河天使ps2版破解记录
title: 银河天使ps2版破解记录
date: 2024-11-20 00:00:00
id: 5
tags:
- 汉化
python
2024.11.10 游戏解包成功
2024.11.11 确认了汉化的可行度
10号的时候终于把GAps2版的封包解开了,六代通解太他妈爽了!
至于为啥拖了这么久,主要是咱太懒了
简单记录一下过程
其实上个月我就开始研究GA的封包了,研究那个索引文件二合一的初代试玩版,发现ps2版和它的封包很像
我是看贴吧炼狱之矛前辈提到才知道试玩版封包的……
之后找到了Aroduc(英化补丁作者)发的解包工具C源码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
int filename_hash;
int offset;
int size;
} IPFEntry;
int unpack(char* filename) {
FILE* pak; // Handle to the pak
FILE* log; // logfile
FILE* out; // misc. output handle
FILE* in; // p**
char* magic = "IPF ";
char mgc[5];
char* logname;
char outname[50];
char* fname;
char* inname;
//char fdata[10000]; // Ok this was incredibly gay
char* fdata;
unsigned int entrycount, fnum, foff;
//printf("%s\n", strtok(filename,"."));
if ( (pak = fopen(filename,"rb")) != NULL) { // If the file does exist
fgets(mgc,5,pak);
if (strcmp(magic,mgc) == 0) { // If the magic is correct
printf("correct magic\n");
//fscanf(pak,"%u",&entrycount); // read the number of entries
fread(&entrycount,sizeof(unsigned int),1,pak); // read the number of entries
printf("number of entries: %u\n",entrycount);
IPFEntry entries[entrycount]; //define the vector // Will change to a calloc in the long run
fread(entries,sizeof(IPFEntry),entrycount,pak); //read the entries
printf("succesfully read\n");
logname = strdup(strtok(filename,".")); // logfile name
log = fopen(strcat(strdup(logname),".log"),"w");
fprintf(log,"found %u entries\n",entrycount);
int i;
for (i = 0; i < entrycount; i++) {
fprintf(log, "hash: %u\toffset: %u\tsize: %u\n", entries[i].filename_hash, entries[i].offset, entries[i].size);
} // prints a log of the entries, debugging purposes
// Let's print the files
fname = strdup(logname); // copy the file name
int j;
for (j = 0; j < entrycount; j++) {
fnum = entries[j].offset >> 0x1B ; // Shift by 128 MB
foff = entries[j].offset - fnum; // entries[j].offset mod 128 MB
strcpy(fname,logname); // Give fname the file name used
switch (fnum) {
case 0: inname = strcat(strdup(fname),".p00"); break;
case 1: inname = strcat(strdup(fname),".p01"); break;
case 2: inname = strcat(strdup(fname),".p02"); break;
case 3: inname = strcat(strdup(fname),".p03"); break;
case 4: inname = strcat(strdup(fname),".p04"); break; // Yeah, there are better ways. I don't care. For now.
}
//printf("%s\n",inname); // TEST // OK
in = fopen(inname,"rb"); // Open the p** file for reading
fseek(in,(long)foff,SEEK_SET); // Go to the offset
fdata = calloc(entries[j].size,sizeof(char)); // Allocate buffer
fread(fdata,sizeof(char),entries[j].size,in); // Read "size" characters
sprintf(outname,"%u.z0",entries[j].filename_hash); // set filename as the filename_hash
printf("%s\n",outname); //TEST
out = fopen(outname,"wb"); // Open file for writing
fwrite(fdata,sizeof(char),entries[j].size,out); // Write the new file
free(fdata); // Deallocate the space used by the buffer
fclose(in);
fflush(out);
fclose(out); // close the handles
}
}
else printf("incorrect magic\n");
}
return 0;
}
int main(int argc, char* argv[])
{
if (argc == 1) {
printf("Usage: %s filename.pak\n", argv[0]);
}
else {
//printf("%s\n", argv[1]);
unpack(argv[1]);
}
return 0;
}
#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
#include "zlib.h"
int uncompressCustom(char *dest, unsigned int *destLen, char *source, unsigned int sourceLen)
{
int ret;
z_stream strm;
strm.avail_in = sourceLen;
strm.next_in = (Bytef *)source;
strm.next_out = (Bytef *)dest;
strm.avail_out = *destLen;
strm.zalloc = 0;
strm.zfree = 0;
ret = inflateInit2 (&strm,-MAX_WBITS); // ignore zlib header(!) - this is basically the difference between standard uncompress and this code
if ( !ret )
{
if ( (ret = inflate(&strm, Z_FINISH)) == Z_STREAM_END )
{
*destLen = strm.total_out;
return inflateEnd(&strm);
}
else
{
inflateEnd(&strm);
if (ret) return ret;
else return Z_BUF_ERROR;
}
}
return ret;
}
unsigned int byteswap(unsigned int intnum) { // ugly, plz fix
unsigned int ret = 0;
unsigned int mask = 0xFF;
ret |= ( intnum & mask ) << 24; mask = mask << 8;
ret |= ( intnum & mask ) << 8; mask = mask << 8;
ret |= ( intnum & mask ) >> 8; mask = mask << 8;
ret |= ( intnum & mask ) >> 24; return (unsigned int)ret;
}
//int z0_unpack(FILE* source, FILE* dest) {
int z0_unpack(FILE* source, char* destName) {
char z0[3] = "\0\0\0";
char magic[3] = "Z0\0";
unsigned int size;
unsigned int chksum;
unsigned int clen;
char *buf;
char *out;
int ret;
int ex = 0;
FILE *dest = fopen(destName,"wb");
fread(z0,1,2,source); //read the header
if (strcmp(z0,magic) == 0) { // If it's a proper header
//if ( 0 == 0 ) {
fputs("Correct magic!\n",stderr);
fread(&size,4,1,source);
fread(&chksum,4,1,source);
size = byteswap(size); // Fix the values
chksum = ~(byteswap(chksum)); // Fix the values
//map = mmap(0, len, PROT_READ, MAP_SHARED, Boo not supported by Windoze
fseek(source,0,SEEK_END);
clen = ftell(source) - 10; // 10-byte header
fseek(source,10,SEEK_SET); // Skip the header
buf = (char *)malloc(clen+1); // prepare buffer for compressed file
fread(buf,sizeof(char),clen,source); // Read source into buf
out = (char *)malloc(size+1); // prepare buffer for uncompressed file
ret = uncompressCustom(out, &size, buf, clen);
switch (ret) {
case Z_MEM_ERROR: fputs("Not enough memory\n",stderr); break;
case Z_BUF_ERROR: fputs("Not enough room in output buffer\n",stderr); break;
case Z_DATA_ERROR: fputs("Corrupted input stream\n",stderr); break;
case Z_OK: fwrite(out,size,1,dest);
}
}
else {
fputs("Incorrect magic!\n",stderr);
ex = 1;
}
fclose(source);
fflush(dest);
fclose(dest);
if ( ex == 1 ) remove(destName);
return ex;
}
int main(int argc, char* argv[])
{
int ex = 0;
//if (argc == 4) {
if (argc == 3) {
//unsigned int compressed_len = (unsigned int)atoi(argv[3]);
//z0_unpack(fopen(argv[1],"rb"),fopen(argv[2],"wb"),compressed_len - 10);
//ex = z0_unpack(fopen(argv[1],"rb"),fopen(argv[2],"wb"));
ex = z0_unpack(fopen(argv[1],"rb"),argv[2]);
}
else {
fputs("Usage: z0_unpack source_file destination_file\n", stderr);
ex = 1;
}
exit(ex);
}
虽然只是初版,但有核心部分就够了
研究了半天发现我能看懂个毛啊……
哦,好吧还是能看懂一点的,反正正式版的解包操作就是:读取索引表→根据索引表获取单个压缩文件→去除前10个字节以及zlib头解压
但是文件名是怎么获取的?然后我发现试玩版里面用16进制编辑器能看到文件名,正式版看不到,这下蒙圈了,因为源码里面没有提到这个
找了一下规律,索引表中有一项是文件名偏移量用这个就能获取对应的文件名了
为啥正式版没有?一番研究之后,在IksBase.dll发现了一个MakeFileID的函数
文件名也被哈希成了一个不可逆的id
哈希python代码如下
import sys
def make_file_id(lp_string):
# 获取字符串长度并加1
v1 = len(lp_string) + 1
# 复制并转换字符串为小写
string1 = lp_string.lower()
# 初始化变量
v2 = 0
v3 = 0
# 遍历字符串
for i in range(len(string1)):
v6 = ord(string1[i])
# 累加字符的 ASCII 值
v3 += v6
# 计算 v2
v2 = (v6 + (v2 << 8)) & 0xFFFFFFFF # 保持 v2 为 32 位无符号整数
# 检查 v2 是否需要取模
if (v2 & 0xFF800000) != 0:
v2 %= 0xFFF9D7
# 返回最终的文件 ID
return (v2 | (v3 << 24)) & 0xFFFFFFFF
# 测试函数
test_string = sys.argv[1]
file_id = make_file_id(test_string)
print(f"{test_string}: int: {file_id} hex: {hex(file_id)}")
游戏读取文件的过程:文件名→哈希成id→在索引表中找这个id→解压读取
tbl文件中会索引一些文件名,但不是全部,所以Aroduc的解包工具会解出来一些名称为$接一串16进制数字的文件,就是没有找到对应的文件名……
顺带一提,用初代的工具解包ML EL会出现有很多未知文件的原因也是这个,把解包工具中的list.txt换成对应版本的即可,这个文件里面就是名字
所以为啥到正式版封包就拆开了?变成了pak p00两个文,文件名也搞这么麻烦,考虑过我们解包的感受吗😭
写了个脚本,OK初代试玩版解包没问题
而且因为文件名没有哈希成id所以免封包没问题(只有包内的文件名才会哈希,正式版因为有一些不知道名字的文件所以必须打包)
xbox版和正式版封包一致用Aroduc的解包工具就能解开,不过因为会改字符集所以文本显示没成功,而且xbox也不能动态调试,所以就暂时放弃了。
然后我就把目标看向ps2版,6作都没有汉化
谷歌搜了一圈只看到pc英化后有人说要接手ps2的后三作,然而没有下文了……
所以得自己研究了
万幸的是索引表结构和pc初代试玩版基本差不多,文件名也没哈希,省了不少事
但是……里面的文件不是用zlib压缩的啊。。。
对压缩算法我是一窍不通
就这样尝试几次无果后,就先搁置了
不过了一个月我又继续研究了,嘿,我还就不信了,非把你解开不可
谷歌搜了ps2汉化教程,本来就少,关于怎么破解封包的就更少了
用ida静态研究了老长时间也没研究个所以然出来
准备放弃的时候看到pc教程里有一步通过搜索文本断点找解压函数的,就想试试ps2动态能不能行了
pcsx2有个调试台功能,不过挺难用的,汇编代码都不能批量复制
把“人類が星々の”转成Shift-JIS编码HEX,用调试台选字节数组搜索,搜出来4个结果,点进去看了一下有两个是正在播放的文本,另外两个就是脚本文件,虽然pcsx2的调试器不支持改编码页,但看到熟悉的IDS就知道。
在这个地址下写入断点,然后重启游戏,游戏中断在这里
用ghidra看了一下伪代码
undefined4 FUN_0015bca8(byte *param_1,int *param_2,char *param_3,int param_4)
{
byte bVar1;
byte bVar2;
byte bVar3;
int iVar4;
undefined *puVar5;
uint uVar6;
uint uVar7;
byte *pbVar8;
int iVar9;
uint uVar10;
int iVar11;
byte abStack_1020 [4077];
undefined local_33 [35];
if (*param_2 < 1) {
*param_2 = -1;
}
uVar7 = 0xffffffff;
if (7 < param_4) {
iVar4 = -1;
if ((param_4 < 4) ||
((((*param_3 != 'A' || (param_3[1] != 'R')) || (param_3[2] != 'Z')) &&
(((iVar4 = -1, *param_3 != ' ' || (iVar4 = -1, param_3[1] != '3')) ||
(iVar4 = -1, param_3[2] != ';')))))) {
LAB_0015bd70:
iVar4 = iVar4 << 0x18;
}
else {
bVar1 = param_3[3];
uVar7 = bVar1 + 0xd0;
iVar4 = uVar7 * 0x1000000;
if (9 < (uVar7 & 0xff)) {
iVar4 = bVar1 - 0x57;
if (5 < (bVar1 + 0x9f & 0xff)) {
iVar4 = -1;
}
goto LAB_0015bd70;
}
}
uVar7 = 0xffffffff;
if (-1 < iVar4) {
uVar7 = (uint)(byte)param_3[4] | (uint)(byte)param_3[7] << 0x18 |
(uint)(byte)param_3[6] << 0x10 | (uint)(byte)param_3[5] << 8;
}
}
if (uVar7 == 0xffffffff) {
return 0;
}
if (*param_2 < (int)uVar7) {
return 0;
}
iVar4 = -1;
if ((3 < param_4) &&
((((*param_3 == 'A' && (param_3[1] == 'R')) && (param_3[2] == 'Z')) ||
(((iVar4 = -1, *param_3 == ' ' && (iVar4 = -1, param_3[1] == '3')) &&
(iVar4 = -1, param_3[2] == ';')))))) {
bVar1 = param_3[3];
uVar7 = bVar1 + 0xd0;
iVar4 = uVar7 * 0x1000000;
if ((uVar7 & 0xff) < 10) goto LAB_0015be58;
iVar4 = bVar1 - 0x57;
if (5 < (bVar1 + 0x9f & 0xff)) {
iVar4 = -1;
}
}
iVar4 = iVar4 << 0x18;
LAB_0015be58:
if (iVar4 >> 0x18 == 0) {
iVar4 = 8;
if (8 < param_4) {
do {
pbVar8 = (byte *)(param_3 + iVar4);
iVar4 = iVar4 + 1;
*param_1 = *pbVar8 ^ 0x72;
param_1 = param_1 + 1;
} while (iVar4 < param_4);
}
*param_2 = param_4 + -8;
return 1;
}
if (iVar4 >> 0x18 != 1) {
return 0;
}
iVar4 = 0xfed;
puVar5 = local_33;
do {
iVar4 = iVar4 + -1;
*puVar5 = 0;
puVar5 = puVar5 + -1;
} while (-1 < iVar4);
iVar4 = 8;
iVar11 = 0;
uVar7 = 0xfee;
uVar10 = 0;
while( true ) {
while( true ) {
uVar10 = uVar10 >> 1;
uVar6 = uVar10;
if ((uVar10 & 0x100) == 0) {
if (param_4 <= iVar4) {
*param_2 = iVar11;
return 1;
}
pbVar8 = (byte *)(param_3 + iVar4);
iVar4 = iVar4 + 1;
uVar10 = *pbVar8 ^ 0x72 | 0xff00;
uVar6 = *pbVar8 ^ 0x72;
}
if ((uVar6 & 1) != 0) break;
if (param_4 <= iVar4) {
*param_2 = iVar11;
return 1;
}
iVar9 = iVar4 + 1;
bVar1 = param_3[iVar4];
if (param_4 <= iVar9) goto LAB_0015c1e4;
iVar4 = iVar4 + 2;
bVar2 = param_3[iVar9];
iVar9 = 0;
do {
uVar6 = (bVar1 ^ 0x72 | ((bVar2 ^ 0x72) & 0xf0) << 4) + iVar9;
iVar9 = iVar9 + 1;
bVar3 = abStack_1020[uVar6 & 0xfff];
if (*param_2 <= iVar11) {
return 0;
}
pbVar8 = param_1 + iVar11;
iVar11 = iVar11 + 1;
abStack_1020[uVar7] = bVar3;
uVar7 = uVar7 + 1 & 0xfff;
*pbVar8 = bVar3;
} while (iVar9 <= (int)(((bVar2 ^ 0x72) & 0xf) + 2));
}
pbVar8 = (byte *)(param_3 + iVar4);
if (param_4 <= iVar4) break;
iVar4 = iVar4 + 1;
bVar1 = *pbVar8;
if (*param_2 <= iVar11) {
return 0;
}
pbVar8 = param_1 + iVar11;
iVar11 = iVar11 + 1;
abStack_1020[uVar7] = bVar1 ^ 0x72;
uVar7 = uVar7 + 1 & 0xfff;
*pbVar8 = bVar1 ^ 0x72;
}
LAB_0015c1e4:
*param_2 = iVar11;
return 1;
}
得来全不费工夫(……),果然是解压函数
写完解压函数之后加到之前之前的解索引表脚本上
就能顺利解除所有文件了,而且算法还是六代通用的
前4个字节是文件头,后4个字节是原始大小,之后就是压缩部分了
15bca8这个函数有两种解压方式,一种是“ 3;0”文件头,固定八点字节解压方式,不过使用较少,貌似只有特别小的文件才会用到。另一种是使用较多的根据控制字节解压的方式,文件头“ 3;1”,不过封包难住我了,毕竟之前说过本人对算法一窍不通
用第1种压缩方式弄了个压缩脚本,封回去之后001没问题,000会读取失败,估计跟压缩文件在dat文件中的偏移量也有关。
字库也是个麻烦事,没找到文字对应的编码HEX,要真改不了,到时候自制一个码表把中文映射到shift-JIS上。
(本来想看一下cg图,不过是自定义格式的纹理,tex格式,所以就算了吧……pcsx2也有纹理转储功能来着)
不过这些之后再研究吧,还是先把pc版搞完吧,我已经快一个月没有好好P图了,前阵子终于把场景给P了
解包脚本我发github了,感兴趣可以研究一下
https://github.com/icey9527/GalaxyAngelUnpack/blob/main/ps2/gadat.py
评论