Julius Werner | 98eeb96 | 2019-12-11 15:47:42 -0800 | [diff] [blame] | 1 | /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */ |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 2 | |
Julius Werner | 98eeb96 | 2019-12-11 15:47:42 -0800 | [diff] [blame] | 3 | #include <commonlib/bsd/compression.h> |
| 4 | #include <commonlib/bsd/helpers.h> |
Idwer Vollering | 3c5b803 | 2020-09-11 22:32:51 +0200 | [diff] [blame] | 5 | #include <commonlib/bsd/sysincludes.h> |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 6 | #include <stdint.h> |
| 7 | #include <string.h> |
Maximilian Brune | 99bed46 | 2024-04-15 18:17:54 +0200 | [diff] [blame^] | 8 | #include <endian.h> |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 9 | |
Maximilian Brune | 99bed46 | 2024-04-15 18:17:54 +0200 | [diff] [blame^] | 10 | /* |
| 11 | * RISC-V and older ARM architectures do not mandate support for misaligned access. |
| 12 | * Our le16toh and friends functions assume misaligned access support. Writing the access |
| 13 | * like this causes the compiler to generate instructions using misaligned access (or not) |
| 14 | * depending on the architecture. So there is no performance penalty for platforms supporting |
| 15 | * misaligned access. |
| 16 | */ |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 17 | static uint16_t LZ4_readLE16(const void *src) |
| 18 | { |
Maximilian Brune | 99bed46 | 2024-04-15 18:17:54 +0200 | [diff] [blame^] | 19 | return *((const uint8_t *)src + 1) << 8 |
| 20 | | *(const uint8_t *)src; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 21 | } |
Maximilian Brune | 99bed46 | 2024-04-15 18:17:54 +0200 | [diff] [blame^] | 22 | |
| 23 | static uint32_t LZ4_readLE32(const void *src) |
| 24 | { |
| 25 | return *((const uint8_t *)src + 3) << 24 |
| 26 | | *((const uint8_t *)src + 2) << 16 |
| 27 | | *((const uint8_t *)src + 1) << 8 |
| 28 | | *(const uint8_t *)src; |
| 29 | } |
| 30 | |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 31 | static void LZ4_copy8(void *dst, const void *src) |
| 32 | { |
| 33 | /* ARM32 needs to be a special snowflake to prevent GCC from coalescing the |
| 34 | * access into LDRD/STRD (which don't support unaligned accesses). */ |
| 35 | #ifdef __arm__ /* ARMv < 6 doesn't support unaligned accesses at all. */ |
| 36 | #if defined(__COREBOOT_ARM_ARCH__) && __COREBOOT_ARM_ARCH__ < 6 |
| 37 | int i; |
| 38 | for (i = 0; i < 8; i++) |
| 39 | ((uint8_t *)dst)[i] = ((uint8_t *)src)[i]; |
| 40 | #else |
| 41 | uint32_t x0, x1; |
Benjamin Barenblat | 82ef8ad | 2016-06-17 09:49:24 -0700 | [diff] [blame] | 42 | __asm__ ("ldr %[x0], [%[src]]" |
| 43 | : [x0]"=r"(x0) |
| 44 | : [src]"r"(src), "m"(*(const uint32_t *)src)); |
| 45 | __asm__ ("ldr %[x1], [%[src], #4]" |
| 46 | : [x1]"=r"(x1) |
| 47 | : [src]"r"(src), "m"(*(const uint32_t *)(src + 4))); |
| 48 | __asm__ ("str %[x0], [%[dst]]" |
| 49 | : "=m"(*(uint32_t *)dst) |
| 50 | : [x0]"r"(x0), [dst]"r"(dst)); |
| 51 | __asm__ ("str %[x1], [%[dst], #4]" |
| 52 | : "=m"(*(uint32_t *)(dst + 4)) |
| 53 | : [x1]"r"(x1), [dst]"r"(dst)); |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 54 | #endif |
wxjstz | 0578238 | 2017-06-09 17:10:32 +0800 | [diff] [blame] | 55 | #elif defined(__riscv) |
Jonathan Neuschäfer | 4acb0e7 | 2016-05-27 09:05:02 +0200 | [diff] [blame] | 56 | /* RISC-V implementations may trap on any unaligned access. */ |
| 57 | int i; |
| 58 | for (i = 0; i < 8; i++) |
| 59 | ((uint8_t *)dst)[i] = ((uint8_t *)src)[i]; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 60 | #else |
| 61 | *(uint64_t *)dst = *(const uint64_t *)src; |
| 62 | #endif |
| 63 | } |
| 64 | |
| 65 | typedef uint8_t BYTE; |
| 66 | typedef uint16_t U16; |
| 67 | typedef uint32_t U32; |
| 68 | typedef int32_t S32; |
| 69 | typedef uint64_t U64; |
| 70 | |
Aaron Durbin | 75a62e7 | 2018-09-13 02:10:45 -0600 | [diff] [blame] | 71 | #define FORCE_INLINE static __always_inline |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 72 | #define likely(expr) __builtin_expect((expr) != 0, 1) |
| 73 | #define unlikely(expr) __builtin_expect((expr) != 0, 0) |
| 74 | |
| 75 | /* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */ |
| 76 | #include "lz4.c.inc" /* #include for inlining, do not link! */ |
| 77 | |
| 78 | #define LZ4F_MAGICNUMBER 0x184D2204 |
| 79 | |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 80 | /* Bit field endianness is implementation-defined. Use masks instead. |
| 81 | * https://stackoverflow.com/a/6044223 */ |
| 82 | #define RESERVED0 0x03 |
| 83 | #define HAS_CONTENT_CHECKSUM 0x04 |
| 84 | #define HAS_CONTENT_SIZE 0x08 |
| 85 | #define HAS_BLOCK_CHECKSUM 0x10 |
| 86 | #define INDEPENDENT_BLOCKS 0x20 |
| 87 | #define VERSION 0xC0 |
| 88 | #define VERSION_SHIFT 6 |
| 89 | |
| 90 | #define RESERVED1_2 0x8F |
| 91 | #define MAX_BLOCK_SIZE 0x70 |
| 92 | |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 93 | struct lz4_frame_header { |
| 94 | uint32_t magic; |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 95 | uint8_t flags; |
| 96 | uint8_t block_descriptor; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 97 | /* + uint64_t content_size iff has_content_size is set */ |
| 98 | /* + uint8_t header_checksum */ |
Stefan Reinauer | 6a00113 | 2017-07-13 02:20:27 +0200 | [diff] [blame] | 99 | } __packed; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 100 | |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 101 | #define BH_SIZE 0x7FFFFFFF |
| 102 | #define NOT_COMPRESSED 0x80000000 |
| 103 | |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 104 | struct lz4_block_header { |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 105 | uint32_t raw; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 106 | /* + size bytes of data */ |
| 107 | /* + uint32_t block_checksum iff has_block_checksum is set */ |
Stefan Reinauer | 6a00113 | 2017-07-13 02:20:27 +0200 | [diff] [blame] | 108 | } __packed; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 109 | |
| 110 | size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn) |
| 111 | { |
| 112 | const void *in = src; |
| 113 | void *out = dst; |
| 114 | size_t out_size = 0; |
| 115 | int has_block_checksum; |
| 116 | |
| 117 | { /* With in-place decompression the header may become invalid later. */ |
| 118 | const struct lz4_frame_header *h = in; |
| 119 | |
| 120 | if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t)) |
| 121 | return 0; /* input overrun */ |
| 122 | |
| 123 | /* We assume there's always only a single, standard frame. */ |
Maximilian Brune | 99bed46 | 2024-04-15 18:17:54 +0200 | [diff] [blame^] | 124 | if (LZ4_readLE32(&h->magic) != LZ4F_MAGICNUMBER |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 125 | || (h->flags & VERSION) != (1 << VERSION_SHIFT)) |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 126 | return 0; /* unknown format */ |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 127 | if ((h->flags & RESERVED0) || (h->block_descriptor & RESERVED1_2)) |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 128 | return 0; /* reserved must be zero */ |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 129 | if (!(h->flags & INDEPENDENT_BLOCKS)) |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 130 | return 0; /* we don't support block dependency */ |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 131 | has_block_checksum = h->flags & HAS_BLOCK_CHECKSUM; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 132 | |
| 133 | in += sizeof(*h); |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 134 | if (h->flags & HAS_CONTENT_SIZE) |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 135 | in += sizeof(uint64_t); |
| 136 | in += sizeof(uint8_t); |
| 137 | } |
| 138 | |
| 139 | while (1) { |
Alex Rebert | 70282ae | 2020-02-29 17:36:08 -0500 | [diff] [blame] | 140 | if ((size_t)(in - src) + sizeof(struct lz4_block_header) > srcn) |
| 141 | break; /* input overrun */ |
| 142 | |
Julius Werner | 98eeb96 | 2019-12-11 15:47:42 -0800 | [diff] [blame] | 143 | struct lz4_block_header b = { |
Maximilian Brune | 99bed46 | 2024-04-15 18:17:54 +0200 | [diff] [blame^] | 144 | .raw = LZ4_readLE32((const uint32_t *)in) |
Julius Werner | 98eeb96 | 2019-12-11 15:47:42 -0800 | [diff] [blame] | 145 | }; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 146 | in += sizeof(struct lz4_block_header); |
| 147 | |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 148 | if ((size_t)(in - src) + (b.raw & BH_SIZE) > srcn) |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 149 | break; /* input overrun */ |
| 150 | |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 151 | if (!(b.raw & BH_SIZE)) { |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 152 | out_size = out - dst; |
| 153 | break; /* decompression successful */ |
| 154 | } |
| 155 | |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 156 | if (b.raw & NOT_COMPRESSED) { |
| 157 | size_t size = MIN((uintptr_t)(b.raw & BH_SIZE), (uintptr_t)dst |
Lee Leahy | 49fd42d | 2017-03-10 10:57:00 -0800 | [diff] [blame] | 158 | + dstn - (uintptr_t)out); |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 159 | memcpy(out, in, size); |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 160 | if (size < (b.raw & BH_SIZE)) |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 161 | break; /* output overrun */ |
Lee Leahy | 72c60a4 | 2017-03-10 10:53:36 -0800 | [diff] [blame] | 162 | out += size; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 163 | } else { |
| 164 | /* constant folding essential, do not touch params! */ |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 165 | int ret = LZ4_decompress_generic(in, out, (b.raw & BH_SIZE), |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 166 | dst + dstn - out, endOnInputSize, |
| 167 | full, 0, noDict, out, NULL, 0); |
| 168 | if (ret < 0) |
| 169 | break; /* decompression error */ |
Lee Leahy | 72c60a4 | 2017-03-10 10:53:36 -0800 | [diff] [blame] | 170 | out += ret; |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 171 | } |
| 172 | |
Krystian Hebel | 3c75a8d | 2020-10-08 19:19:42 +0200 | [diff] [blame] | 173 | in += (b.raw & BH_SIZE); |
Julius Werner | 09f2921 | 2015-09-29 13:51:35 -0700 | [diff] [blame] | 174 | if (has_block_checksum) |
| 175 | in += sizeof(uint32_t); |
| 176 | } |
| 177 | |
| 178 | return out_size; |
| 179 | } |
| 180 | |
| 181 | size_t ulz4f(const void *src, void *dst) |
| 182 | { |
| 183 | /* LZ4 uses signed size parameters, so can't just use ((u32)-1) here. */ |
| 184 | return ulz4fn(src, 1*GiB, dst, 1*GiB); |
| 185 | } |