1 /*
2 * Copyright (C) 2009 Karel Zak <kzak@redhat.com>
3 *
4 * This file may be redistributed under the terms of the
5 * GNU Lesser General Public License.
6 */
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <stdint.h>
12 #include <stdbool.h>
13 #include <assert.h>
14 #include <inttypes.h>
15
16 #ifdef HAVE_LINUX_BLKZONED_H
17 #include <linux/blkzoned.h>
18 #endif
19
20 #include "superblocks.h"
21 #include "crc32c.h"
22 #include "sha256.h"
23 #include "xxhash.h"
24
25 enum btrfs_super_block_csum_type {
26 BTRFS_SUPER_BLOCK_CSUM_TYPE_CRC32C = 0,
27 BTRFS_SUPER_BLOCK_CSUM_TYPE_XXHASH = 1,
28 BTRFS_SUPER_BLOCK_CSUM_TYPE_SHA256 = 2,
29 };
30
31 union btrfs_super_block_csum {
32 uint8_t bytes[32];
33 uint32_t crc32c;
34 XXH64_hash_t xxh64;
35 uint8_t sha256[UL_SHA256LENGTH];
36 };
37
38 struct btrfs_super_block {
39 union btrfs_super_block_csum csum;
40 uint8_t fsid[16];
41 uint64_t bytenr;
42 uint64_t flags;
43 uint8_t magic[8];
44 uint64_t generation;
45 uint64_t root;
46 uint64_t chunk_root;
47 uint64_t log_root;
48 uint64_t log_root_transid;
49 uint64_t total_bytes;
50 uint64_t bytes_used;
51 uint64_t root_dir_objectid;
52 uint64_t num_devices;
53 uint32_t sectorsize;
54 uint32_t nodesize;
55 uint32_t leafsize;
56 uint32_t stripesize;
57 uint32_t sys_chunk_array_size;
58 uint64_t chunk_root_generation;
59 uint64_t compat_flags;
60 uint64_t compat_ro_flags;
61 uint64_t incompat_flags;
62 uint16_t csum_type;
63 uint8_t root_level;
64 uint8_t chunk_root_level;
65 uint8_t log_root_level;
66 struct btrfs_dev_item {
67 uint64_t devid;
68 uint64_t total_bytes;
69 uint64_t bytes_used;
70 uint32_t io_align;
71 uint32_t io_width;
72 uint32_t sector_size;
73 uint64_t type;
74 uint64_t generation;
75 uint64_t start_offset;
76 uint32_t dev_group;
77 uint8_t seek_speed;
78 uint8_t bandwidth;
79 uint8_t uuid[16];
80 uint8_t fsid[16];
81 } __attribute__ ((__packed__)) dev_item;
82 uint8_t label[256];
83 uint8_t padding[3541]; /* pad to BTRFS_SUPER_INFO_SIZE for csum calculation */
84 } __attribute__ ((__packed__));
85
86 #define BTRFS_SUPER_INFO_SIZE 4096
87
88 /* Number of superblock log zones */
89 #define BTRFS_NR_SB_LOG_ZONES 2
90
91 /* Introduce some macros and types to unify the code with kernel side */
92 #define SECTOR_SHIFT 9
93
94 typedef uint64_t sector_t;
95
96 #ifdef HAVE_LINUX_BLKZONED_H
97 static int sb_write_pointer(blkid_probe pr, struct blk_zone *zones, uint64_t *wp_ret)
98 {
99 bool empty[BTRFS_NR_SB_LOG_ZONES];
100 bool full[BTRFS_NR_SB_LOG_ZONES];
101 sector_t sector;
102
103 assert(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL &&
104 zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL);
105
106 empty[0] = zones[0].cond == BLK_ZONE_COND_EMPTY;
107 empty[1] = zones[1].cond == BLK_ZONE_COND_EMPTY;
108 full[0] = zones[0].cond == BLK_ZONE_COND_FULL;
109 full[1] = zones[1].cond == BLK_ZONE_COND_FULL;
110
111 /*
112 * Possible states of log buffer zones
113 *
114 * Empty[0] In use[0] Full[0]
115 * Empty[1] * x 0
116 * In use[1] 0 x 0
117 * Full[1] 1 1 C
118 *
119 * Log position:
120 * *: Special case, no superblock is written
121 * 0: Use write pointer of zones[0]
122 * 1: Use write pointer of zones[1]
123 * C: Compare super blocks from zones[0] and zones[1], use the latest
124 * one determined by generation
125 * x: Invalid state
126 */
127
128 if (empty[0] && empty[1]) {
129 /* Special case to distinguish no superblock to read */
130 *wp_ret = zones[0].start << SECTOR_SHIFT;
131 return -ENOENT;
132 } else if (full[0] && full[1]) {
133 /* Compare two super blocks */
134 struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
135 int i;
136
137 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
138 uint64_t bytenr;
139
140 bytenr = ((zones[i].start + zones[i].len)
141 << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
142
143 super[i] = (struct btrfs_super_block *)
144 blkid_probe_get_buffer(pr, bytenr, BTRFS_SUPER_INFO_SIZE);
145 if (!super[i])
146 return -EIO;
147 DBG(LOWPROBE, ul_debug("(btrfs) checking #%d zone "
148 "[start=%" PRIu64", len=%" PRIu64", sb-offset=%" PRIu64"]",
149 i, (uint64_t) zones[i].start,
150 (uint64_t) zones[i].len, bytenr));
151 }
152
153 if (super[0]->generation > super[1]->generation)
154 sector = zones[1].start;
155 else
156 sector = zones[0].start;
157 } else if (!full[0] && (empty[1] || full[1])) {
158 sector = zones[0].wp;
159 } else if (full[0]) {
160 sector = zones[1].wp;
161 } else {
162 return -EUCLEAN;
163 }
164 *wp_ret = sector << SECTOR_SHIFT;
165
166 DBG(LOWPROBE, ul_debug("(btrfs) write pointer: %" PRIu64" sector", sector));
167 return 0;
168 }
169
170 static int sb_log_offset(blkid_probe pr, uint64_t *bytenr_ret)
171 {
172 uint32_t zone_num = 0;
173 uint32_t zone_size_sector;
174 struct blk_zone_report *rep;
175 struct blk_zone *zones;
176 int ret;
177 int i;
178 uint64_t wp;
179
180
181 zone_size_sector = pr->zone_size >> SECTOR_SHIFT;
182 rep = blkdev_get_zonereport(pr->fd, zone_num * zone_size_sector, 2);
183 if (!rep) {
184 ret = -errno;
185 goto out;
186 }
187 zones = (struct blk_zone *)(rep + 1);
188
189 /*
190 * Use the head of the first conventional zone, if the zones
191 * contain one.
192 */
193 for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
194 if (zones[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
195 DBG(LOWPROBE, ul_debug("(btrfs) checking conventional zone"));
196 *bytenr_ret = zones[i].start << SECTOR_SHIFT;
197 ret = 0;
198 goto out;
199 }
200 }
201
202 ret = sb_write_pointer(pr, zones, &wp);
203 if (ret != -ENOENT && ret) {
204 ret = 1;
205 goto out;
206 }
207 if (ret != -ENOENT) {
208 if (wp == zones[0].start << SECTOR_SHIFT)
209 wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
210 wp -= BTRFS_SUPER_INFO_SIZE;
211 }
212 *bytenr_ret = wp;
213
214 ret = 0;
215 out:
216 free(rep);
217
218 return ret;
219 }
220 #endif
221
222 static int btrfs_verify_csum(blkid_probe pr, const struct btrfs_super_block *bfs)
223 {
224 uint16_t csum_type = le16_to_cpu(bfs->csum_type);
225 const void *csum_data = (char *) bfs + sizeof(bfs->csum);
226 size_t csum_data_size = sizeof(*bfs) - sizeof(bfs->csum);
227 switch (csum_type) {
228 case BTRFS_SUPER_BLOCK_CSUM_TYPE_CRC32C: {
229 uint32_t crc = ~crc32c(~0L, csum_data, csum_data_size);
230 return blkid_probe_verify_csum(pr, crc,
231 le32_to_cpu(bfs->csum.crc32c));
232 }
233 case BTRFS_SUPER_BLOCK_CSUM_TYPE_XXHASH: {
234 XXH64_hash_t xxh64 = XXH64(csum_data, csum_data_size, 0);
235 return blkid_probe_verify_csum(pr, xxh64,
236 le64_to_cpu(bfs->csum.xxh64));
237 }
238 case BTRFS_SUPER_BLOCK_CSUM_TYPE_SHA256: {
239 uint8_t sha256[UL_SHA256LENGTH];
240 ul_SHA256(sha256, csum_data, csum_data_size);
241 return blkid_probe_verify_csum_buf(pr, UL_SHA256LENGTH,
242 sha256, bfs->csum.sha256);
243 }
244 default:
245 DBG(LOWPROBE, ul_debug("(btrfs) unknown checksum type %d, skipping validation",
246 csum_type));
247 return 1;
248 }
249 }
250
251 static int probe_btrfs(blkid_probe pr, const struct blkid_idmag *mag)
252 {
253 struct btrfs_super_block *bfs;
254
255 if (pr->zone_size) {
256 #ifdef HAVE_LINUX_BLKZONED_H
257 uint64_t offset = 0;
258 int ret;
259
260 ret = sb_log_offset(pr, &offset);
261 if (ret)
262 return ret;
263 bfs = (struct btrfs_super_block *)
264 blkid_probe_get_buffer(pr, offset,
265 sizeof(struct btrfs_super_block));
266 #else
267 /* Nothing can be done */
268 return 1;
269 #endif
270 } else {
271 bfs = blkid_probe_get_sb(pr, mag, struct btrfs_super_block);
272 }
273 if (!bfs)
274 return errno ? -errno : 1;
275
276 if (!btrfs_verify_csum(pr, bfs))
277 return 1;
278
279 /* Invalid sector size; total_bytes would be bogus. */
280 if (!le32_to_cpu(bfs->sectorsize))
281 return 1;
282
283 if (*bfs->label)
284 blkid_probe_set_label(pr,
285 (unsigned char *) bfs->label,
286 sizeof(bfs->label));
287
288 blkid_probe_set_uuid(pr, bfs->fsid);
289 blkid_probe_set_uuid_as(pr, bfs->dev_item.uuid, "UUID_SUB");
290 blkid_probe_set_fsblocksize(pr, le32_to_cpu(bfs->sectorsize));
291 blkid_probe_set_block_size(pr, le32_to_cpu(bfs->sectorsize));
292
293 uint32_t sectorsize_log = 31 -
294 __builtin_clz(le32_to_cpu(bfs->sectorsize));
295 blkid_probe_set_fslastblock(pr,
296 le64_to_cpu(bfs->total_bytes) >> sectorsize_log);
297
298 /* The size is calculated without the RAID factor. It could not be
299 * obtained from the superblock as it is property of device tree.
300 * Without the factor we would show fs size with the redundant data. The
301 * acquisition of the factor will require additional parsing of btrfs
302 * tree.
303 */
304 blkid_probe_set_fssize(pr, le64_to_cpu(bfs->total_bytes));
305
306 return 0;
307 }
308
309 const struct blkid_idinfo btrfs_idinfo =
310 {
311 .name = "btrfs",
312 .usage = BLKID_USAGE_FILESYSTEM,
313 .probefunc = probe_btrfs,
314 .minsz = 1024 * 1024,
315 .magics =
316 {
317 { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40, .kboff = 64 },
318 /* For zoned btrfs */
319 { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
320 .is_zoned = 1, .zonenum = 0, .kboff_inzone = 0 },
321 { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
322 .is_zoned = 1, .zonenum = 1, .kboff_inzone = 0 },
323 { NULL }
324 }
325 };
326