Difference between revisions of "YAZ0 (File Format)"
(→Tools) |
m (→Data Groups) |
||
(16 intermediate revisions by 5 users not shown) | |||
Line 1: | Line 1: | ||
− | '''Yaz0''' is a run length encoding (RLE compression) method. In [[Mario Kart Wii]] most of the [[SZS]] files are '''Yaz0 compressed [[U8]] files | + | '''Yaz0''' is a run length encoding (RLE compression) method. In [[Mario Kart Wii]] most of the [[SZS]] files are '''Yaz0 compressed [[U8]] files'''. See »[[File_format#U8_archives|File format: U8 archives]]« for details. |
+ | |||
+ | |||
+ | __TOC__ | ||
+ | |||
== Data structure == | == Data structure == | ||
=== Header === | === Header === | ||
− | + | The header of a Yaz0 file is always 16 bytes long. All numeric values are stored as [[big endian]]. | |
− | The header of a Yaz0 file is always 16 bytes long. All numeric values stored as [[ big endian]] | ||
{| class="wikitable" | {| class="wikitable" | ||
|- | |- | ||
− | ! Offset | + | ! Offset !! Type !! Description |
− | ! Type | ||
− | ! Description | ||
|- | |- | ||
− | | 0x00 || | + | | 0x00 || Char[4] || '''magic'''. Always ''Yaz0'' in ASCII. |
|- | |- | ||
− | | 0x04 || | + | | 0x04 || UInt32 || Size in bytes of the uncompressed data. |
|- | |- | ||
− | | 0x08 || | + | | 0x08 || UInt32[2] || Reserved for special use. Always 0 in [[Mario Kart Wii]]. |
|} | |} | ||
Line 25: | Line 26: | ||
char magic[4]; // always "Yaz0" | char magic[4]; // always "Yaz0" | ||
be32_t uncompressed_size; // total size of uncompressed data | be32_t uncompressed_size; // total size of uncompressed data | ||
− | + | be32_t reserved[2]; // two unsigned integers reserved for special use | |
} | } | ||
__attribute__ ((packed)) yaz0_header_t; | __attribute__ ((packed)) yaz0_header_t; | ||
Line 31: | Line 32: | ||
=== Data Groups === | === Data Groups === | ||
− | + | The complete compressed data is organized in '''data groups'''. Each data group consists of 1 group header byte and 8 '''chunks'''. | |
− | The complete compressed data is organized in '''data groups'''. Each data group consists of 1 group header byte | ||
{| class="wikitable" | {| class="wikitable" | ||
Line 43: | Line 43: | ||
|} | |} | ||
− | Each bit of the group header | + | Each bit of the group header corresponds to one chunk: |
− | * The MSB (most significant bit, 0x80) | + | * The MSB (most significant bit, 0x80) corresponds to chunk 1 |
− | * The LSB (lowest significant bit, 0x01) | + | * The LSB (lowest significant bit, 0x01) corresponds to chunk 8 |
− | A set bit (=1) in the group header means, that the chunk is | + | A set bit (=1) in the group header means, that the chunk is exact 1 byte long. This byte must be copied to the output stream 1:1. A cleared bit (=0) defines, that the chunk is 2 or 3 bytes long interpreted as a backreference to already decompressed data that must be copied. |
{| class="wikitable" | {| class="wikitable" | ||
Line 60: | Line 60: | ||
* '''<tt>RRR</tt>''' is a value between <tt>0x000</tt> and <tt>0xfff</tt>. Go back <tt>RRR+1</tt> bytes in the output stream to find the start of the data to be copied. | * '''<tt>RRR</tt>''' is a value between <tt>0x000</tt> and <tt>0xfff</tt>. Go back <tt>RRR+1</tt> bytes in the output stream to find the start of the data to be copied. | ||
* '''<tt>SIZE</tt>''' is calculated from '''<tt>N</tt>''' (see above) and declares the number of bytes to be copied. | * '''<tt>SIZE</tt>''' is calculated from '''<tt>N</tt>''' (see above) and declares the number of bytes to be copied. | ||
− | * It is important to know | + | * It is important to know, that a chunk may reference itself. For example if <tt>RRR=1</tt> (go back 1+1=2) and <tt>SIZE=10</tt> the previous 2 bytes are copied 10/2=5 times. |
Decoding data groups and chunks is done until the end of the destination data is reached. | Decoding data groups and chunks is done until the end of the destination data is reached. | ||
Line 66: | Line 66: | ||
== Examples == | == Examples == | ||
=== Decompression === | === Decompression === | ||
− | |||
;GNU C example | ;GNU C example | ||
<pre> | <pre> | ||
Line 74: | Line 73: | ||
u8 * dest_end = // pointer to end of destination (last byte +1) | u8 * dest_end = // pointer to end of destination (last byte +1) | ||
− | u8 | + | u8 group_head = 0; // group header byte ... |
− | int | + | int group_head_len = 0; // ... and it's length to manage groups |
+ | |||
+ | while ( src < src_end && dest < dest_end ) | ||
+ | { | ||
+ | if (!group_head_len) | ||
+ | { | ||
+ | //*** start a new data group and read the group header byte. | ||
− | + | group_head = *src++; | |
− | + | group_head_len = 8; | |
− | + | } | |
− | |||
− | |||
− | |||
− | } | ||
− | if ( | + | group_head_len--; |
+ | if ( group_head & 0x80 ) | ||
{ | { | ||
− | // copy 1 byte direct | + | //*** bit in group header byte is set -> copy 1 byte direct |
+ | |||
*dest++ = *src++; | *dest++ = *src++; | ||
} | } | ||
− | else | + | else |
− | { | + | { |
− | // | + | //*** bit in group header byte is not set -> run length encoding |
− | const u8 | + | // read the first 2 bytes of the chunk |
− | const u8 | + | const u8 b1 = *src++; |
+ | const u8 b2 = *src++; | ||
+ | |||
+ | // calculate the source position | ||
const u8 * copy_src = dest - (( b1 & 0x0f ) << 8 | b2 ) - 1; | const u8 * copy_src = dest - (( b1 & 0x0f ) << 8 | b2 ) - 1; | ||
− | int n = b1 >> 4; | + | // calculate the number of bytes to copy. |
− | if (!n) | + | int n = b1 >> 4; |
− | n = *src++ + 0x12; | + | |
+ | if (!n) | ||
+ | n = *src++ + 0x12; // N==0 -> read third byte | ||
else | else | ||
− | n += 2; | + | n += 2; // add 2 to length |
ASSERT( n >= 3 && n <= 0x111 ); | ASSERT( n >= 3 && n <= 0x111 ); | ||
− | if ( copy_src < szs->data | + | // a validity check |
+ | if ( copy_src < szs->data || dest + n > dest_end ) | ||
return ERROR("Corrupted data!\n"); | return ERROR("Corrupted data!\n"); | ||
+ | // copy chunk data. | ||
// don't use memcpy() or memmove() here because | // don't use memcpy() or memmove() here because | ||
// they don't work with self referencing chunks. | // they don't work with self referencing chunks. | ||
Line 113: | Line 123: | ||
*dest++ = *copy_src++; | *dest++ = *copy_src++; | ||
} | } | ||
− | + | ||
+ | // shift group header byte | ||
+ | group_head <<= 1; | ||
} | } | ||
+ | |||
+ | // some assertions to find errors in debugging mode | ||
ASSERT( src <= src_end ); | ASSERT( src <= src_end ); | ||
ASSERT( dest <= dest_end ); | ASSERT( dest <= dest_end ); | ||
Line 121: | Line 135: | ||
== Tools == | == Tools == | ||
− | |||
The following tools can handle compressed U8 files (=SZS files): | The following tools can handle compressed U8 files (=SZS files): | ||
+ | * [[CTools Pack]], by [[MrBean35000vr]] and [[Chadderz]] | ||
* [[SZS Modifier]], by [[MrBean35000vr]] and [[Chadderz]] | * [[SZS Modifier]], by [[MrBean35000vr]] and [[Chadderz]] | ||
− | * [[ | + | * [[Wexos's Toolbox]], by [[Wexos]] |
* [[Wiimms SZS Tools]], by [[Wiimm]] | * [[Wiimms SZS Tools]], by [[Wiimm]] | ||
− | [[Wiimms SZS Tools]] | + | [[Wexos's Toolbox]] and [[Wiimms SZS Tools]] can (de)compress any kind of Yaz0-compressed files. [[CTools]] and [[SZS Modifier]] can only handle [[U8]] files. |
− | [[ | + | [[Category:File Format/Other]] |
Latest revision as of 19:43, 30 May 2021
Yaz0 is a run length encoding (RLE compression) method. In Mario Kart Wii most of the SZS files are Yaz0 compressed U8 files. See »File format: U8 archives« for details.
Data structure
Header
The header of a Yaz0 file is always 16 bytes long. All numeric values are stored as big endian.
Offset | Type | Description |
---|---|---|
0x00 | Char[4] | magic. Always Yaz0 in ASCII. |
0x04 | UInt32 | Size in bytes of the uncompressed data. |
0x08 | UInt32[2] | Reserved for special use. Always 0 in Mario Kart Wii. |
- GNU C example
typedef struct yaz0_header_t { char magic[4]; // always "Yaz0" be32_t uncompressed_size; // total size of uncompressed data be32_t reserved[2]; // two unsigned integers reserved for special use } __attribute__ ((packed)) yaz0_header_t;
Data Groups
The complete compressed data is organized in data groups. Each data group consists of 1 group header byte and 8 chunks.
N | Size | Description |
---|---|---|
1 | 1 byte | the group header byte |
8 | 1-3 bytes | 8 chunks |
Each bit of the group header corresponds to one chunk:
- The MSB (most significant bit, 0x80) corresponds to chunk 1
- The LSB (lowest significant bit, 0x01) corresponds to chunk 8
A set bit (=1) in the group header means, that the chunk is exact 1 byte long. This byte must be copied to the output stream 1:1. A cleared bit (=0) defines, that the chunk is 2 or 3 bytes long interpreted as a backreference to already decompressed data that must be copied.
Size | Data Bytes | Size Calculation | |
---|---|---|---|
2 bytes | NR RR | N = 1..f | SIZE = N+2 (=3..0x11) |
3 bytes | 0R RR NN | N = 00..ff | SIZE = N+0x12 (=0x12..0x111) |
- RRR is a value between 0x000 and 0xfff. Go back RRR+1 bytes in the output stream to find the start of the data to be copied.
- SIZE is calculated from N (see above) and declares the number of bytes to be copied.
- It is important to know, that a chunk may reference itself. For example if RRR=1 (go back 1+1=2) and SIZE=10 the previous 2 bytes are copied 10/2=5 times.
Decoding data groups and chunks is done until the end of the destination data is reached.
Examples
Decompression
- GNU C example
const u8 * src = // pointer to start of source const u8 * src_end = // pointer to end of source (last byte +1) u8 * dest = // pointer to start of destination u8 * dest_end = // pointer to end of destination (last byte +1) u8 group_head = 0; // group header byte ... int group_head_len = 0; // ... and it's length to manage groups while ( src < src_end && dest < dest_end ) { if (!group_head_len) { //*** start a new data group and read the group header byte. group_head = *src++; group_head_len = 8; } group_head_len--; if ( group_head & 0x80 ) { //*** bit in group header byte is set -> copy 1 byte direct *dest++ = *src++; } else { //*** bit in group header byte is not set -> run length encoding // read the first 2 bytes of the chunk const u8 b1 = *src++; const u8 b2 = *src++; // calculate the source position const u8 * copy_src = dest - (( b1 & 0x0f ) << 8 | b2 ) - 1; // calculate the number of bytes to copy. int n = b1 >> 4; if (!n) n = *src++ + 0x12; // N==0 -> read third byte else n += 2; // add 2 to length ASSERT( n >= 3 && n <= 0x111 ); // a validity check if ( copy_src < szs->data || dest + n > dest_end ) return ERROR("Corrupted data!\n"); // copy chunk data. // don't use memcpy() or memmove() here because // they don't work with self referencing chunks. while ( n-- > 0 ) *dest++ = *copy_src++; } // shift group header byte group_head <<= 1; } // some assertions to find errors in debugging mode ASSERT( src <= src_end ); ASSERT( dest <= dest_end );
This code example is taken from Wiimms SZS Tools: SVN repository lib-szs.c line 162
Tools
The following tools can handle compressed U8 files (=SZS files):
- CTools Pack, by MrBean35000vr and Chadderz
- SZS Modifier, by MrBean35000vr and Chadderz
- Wexos's Toolbox, by Wexos
- Wiimms SZS Tools, by Wiimm
Wexos's Toolbox and Wiimms SZS Tools can (de)compress any kind of Yaz0-compressed files. CTools and SZS Modifier can only handle U8 files.