1  /*
       2   * Copyright (C) 2009-2010 by Andreas Dilger <adilger@sun.com>
       3   *
       4   * This file may be redistributed under the terms of the
       5   * GNU Lesser General Public License.
       6   */
       7  
       8  #include <stdio.h>
       9  #include <stdlib.h>
      10  #include <unistd.h>
      11  #include <string.h>
      12  #include <errno.h>
      13  #include <ctype.h>
      14  #include <inttypes.h>
      15  #include <limits.h>
      16  
      17  #include "superblocks.h"
      18  
      19  #define VDEV_LABEL_UBERBLOCK	(128 * 1024ULL)
      20  #define VDEV_LABEL_NVPAIR	( 16 * 1024ULL)
      21  #define VDEV_LABEL_SIZE		(256 * 1024ULL)
      22  #define UBERBLOCK_SIZE		1024ULL
      23  #define UBERBLOCKS_COUNT   128
      24  
      25  /* #include <sys/uberblock_impl.h> */
      26  #define UBERBLOCK_MAGIC         0x00bab10c              /* oo-ba-bloc!  */
      27  struct zfs_uberblock {
      28  	uint64_t	ub_magic;	/* UBERBLOCK_MAGIC		*/
      29  	uint64_t	ub_version;	/* SPA_VERSION			*/
      30  	uint64_t	ub_txg;		/* txg of last sync		*/
      31  	uint64_t	ub_guid_sum;	/* sum of all vdev guids	*/
      32  	uint64_t	ub_timestamp;	/* UTC time of last sync	*/
      33  	char		ub_rootbp;	/* MOS objset_phys_t		*/
      34  } __attribute__((packed));
      35  
      36  #define ZFS_WANT	 4
      37  
      38  #define DATA_TYPE_UINT64 8
      39  #define DATA_TYPE_STRING 9
      40  #define DATA_TYPE_DIRECTORY 19
      41  
      42  struct nvpair {
      43  	uint32_t	nvp_size;
      44  	uint32_t	nvp_unkown;
      45  	uint32_t	nvp_namelen;
      46  	char		nvp_name[0]; /* aligned to 4 bytes */
      47  	/* aligned ptr array for string arrays */
      48  	/* aligned array of data for value */
      49  };
      50  
      51  struct nvstring {
      52  	uint32_t	nvs_type;
      53  	uint32_t	nvs_elem;
      54  	uint32_t	nvs_strlen;
      55  	unsigned char	nvs_string[0];
      56  };
      57  
      58  struct nvuint64 {
      59  	uint32_t	nvu_type;
      60  	uint32_t	nvu_elem;
      61  	uint64_t	nvu_value;
      62  } __attribute__((packed));
      63  
      64  struct nvdirectory {
      65  	uint32_t	nvd_type;
      66  	uint32_t	nvd_unknown[3];
      67  };
      68  
      69  struct nvlist {
      70  	uint32_t	nvl_unknown[3];
      71  	struct nvpair	nvl_nvpair;
      72  };
      73  
      74  static void zfs_process_value(blkid_probe pr, char *name, size_t namelen,
      75  			     void *value, size_t max_value_size, unsigned directory_level)
      76  {
      77  	if (strncmp(name, "name", namelen) == 0 &&
      78  	    sizeof(struct nvstring) <= max_value_size &&
      79  	    !directory_level) {
      80  		struct nvstring *nvs = value;
      81  		uint32_t nvs_type = be32_to_cpu(nvs->nvs_type);
      82  		uint32_t nvs_strlen = be32_to_cpu(nvs->nvs_strlen);
      83  
      84  		if (nvs_type != DATA_TYPE_STRING ||
      85  		    (uint64_t)nvs_strlen + sizeof(*nvs) > max_value_size)
      86  			return;
      87  
      88  		DBG(LOWPROBE, ul_debug("nvstring: type %u string %*s",
      89  				       nvs_type, nvs_strlen, nvs->nvs_string));
      90  
      91  		blkid_probe_set_label(pr, nvs->nvs_string, nvs_strlen);
      92  	} else if (strncmp(name, "guid", namelen) == 0 &&
      93  		   sizeof(struct nvuint64) <= max_value_size &&
      94  		   !directory_level) {
      95  		struct nvuint64 *nvu = value;
      96  		uint32_t nvu_type = be32_to_cpu(nvu->nvu_type);
      97  		uint64_t nvu_value;
      98  
      99  		memcpy(&nvu_value, &nvu->nvu_value, sizeof(nvu_value));
     100  		nvu_value = be64_to_cpu(nvu_value);
     101  
     102  		if (nvu_type != DATA_TYPE_UINT64)
     103  			return;
     104  
     105  		DBG(LOWPROBE, ul_debug("nvuint64: type %u value %"PRIu64,
     106  				       nvu_type, nvu_value));
     107  
     108  		blkid_probe_sprintf_value(pr, "UUID_SUB",
     109  					  "%"PRIu64, nvu_value);
     110  	} else if (strncmp(name, "pool_guid", namelen) == 0 &&
     111  		   sizeof(struct nvuint64) <= max_value_size &&
     112  		   !directory_level) {
     113  		struct nvuint64 *nvu = value;
     114  		uint32_t nvu_type = be32_to_cpu(nvu->nvu_type);
     115  		uint64_t nvu_value;
     116  
     117  		memcpy(&nvu_value, &nvu->nvu_value, sizeof(nvu_value));
     118  		nvu_value = be64_to_cpu(nvu_value);
     119  
     120  		if (nvu_type != DATA_TYPE_UINT64)
     121  			return;
     122  
     123  		DBG(LOWPROBE, ul_debug("nvuint64: type %u value %"PRIu64,
     124  				       nvu_type, nvu_value));
     125  
     126  		blkid_probe_sprintf_uuid(pr, (unsigned char *) &nvu_value,
     127  					 sizeof(nvu_value),
     128  					 "%"PRIu64, nvu_value);
     129  	} else if (strncmp(name, "ashift", namelen) == 0 &&
     130  		   sizeof(struct nvuint64) <= max_value_size) {
     131  		struct nvuint64 *nvu = value;
     132  		uint32_t nvu_type = be32_to_cpu(nvu->nvu_type);
     133  		uint64_t nvu_value;
     134  
     135  		memcpy(&nvu_value, &nvu->nvu_value, sizeof(nvu_value));
     136  		nvu_value = be64_to_cpu(nvu_value);
     137  
     138  		if (nvu_type != DATA_TYPE_UINT64)
     139  			return;
     140  
     141  		if (nvu_value < 32){
     142  			blkid_probe_set_fsblocksize(pr, 1U << nvu_value);
     143  			blkid_probe_set_block_size(pr, 1U << nvu_value);
     144  		}
     145  	}
     146  }
     147  
     148  static void zfs_extract_guid_name(blkid_probe pr, loff_t offset)
     149  {
     150  	unsigned char *p;
     151  	struct nvlist *nvl;
     152  	struct nvpair *nvp;
     153  	size_t left = 4096;
     154  	unsigned directory_level = 0;
     155  
     156  	offset = (offset & ~(VDEV_LABEL_SIZE - 1)) + VDEV_LABEL_NVPAIR;
     157  
     158  	/* Note that we currently assume that the desired fields are within
     159  	 * the first 4k (left) of the nvlist.  This is true for all pools
     160  	 * I've seen, and simplifies this code somewhat, because we don't
     161  	 * have to handle an nvpair crossing a buffer boundary. */
     162  	p = blkid_probe_get_buffer(pr, offset, left);
     163  	if (!p)
     164  		return;
     165  
     166  	DBG(LOWPROBE, ul_debug("zfs_extract: nvlist offset %jd",
     167  			       (intmax_t)offset));
     168  
     169  	nvl = (struct nvlist *) p;
     170  	nvp = &nvl->nvl_nvpair;
     171  	left -= (unsigned char *)nvp - p; /* Already used up 12 bytes */
     172  
     173  	while (left > sizeof(*nvp)) {
     174  		uint32_t nvp_size = be32_to_cpu(nvp->nvp_size);
     175  		uint32_t nvp_namelen = be32_to_cpu(nvp->nvp_namelen);
     176  		uint64_t namesize = ((uint64_t)nvp_namelen + 3) & ~3;
     177  		size_t max_value_size;
     178  		void *value;
     179  
     180  		if (!nvp->nvp_size) {
     181  			if (!directory_level)
     182  				break;
     183  			directory_level--;
     184  			nvp_size = 8;
     185  			goto cont;
     186  		}
     187  
     188  		DBG(LOWPROBE, ul_debug("left %zd nvp_size %u",
     189  				       left, nvp_size));
     190  
     191  		/* nvpair fits in buffer and name fits in nvpair? */
     192  		if (nvp_size > left || namesize + sizeof(*nvp) > nvp_size)
     193  			break;
     194  
     195  		DBG(LOWPROBE,
     196  		    ul_debug("nvlist: size %u, namelen %u, name %*s",
     197  			     nvp_size, nvp_namelen, nvp_namelen,
     198  			     nvp->nvp_name));
     199  
     200  		max_value_size = nvp_size - (namesize + sizeof(*nvp));
     201  		value = nvp->nvp_name + namesize;
     202  
     203  		if (sizeof(struct nvdirectory) <= max_value_size) {
     204  			struct nvdirectory *nvu = value;
     205  			if (be32_to_cpu(nvu->nvd_type) == DATA_TYPE_DIRECTORY) {
     206  				nvp_size = sizeof(*nvp) + namesize + sizeof(*nvu);
     207  				directory_level++;
     208  				goto cont;
     209  			}
     210  		}
     211  
     212  		zfs_process_value(pr, nvp->nvp_name, nvp_namelen,
     213  				  value, max_value_size, directory_level);
     214  
     215  cont:
     216  		if (nvp_size > left)
     217  			break;
     218  		left -= nvp_size;
     219  
     220  		nvp = (struct nvpair *)((char *)nvp + nvp_size);
     221  	}
     222  }
     223  
     224  static int find_uberblocks(const void *label, loff_t *ub_offset, int *swap_endian)
     225  {
     226  	uint64_t swab_magic = swab64((uint64_t)UBERBLOCK_MAGIC);
     227  	const struct zfs_uberblock *ub;
     228  	int i, found = 0;
     229  	loff_t offset = VDEV_LABEL_UBERBLOCK;
     230  
     231  	for (i = 0; i < UBERBLOCKS_COUNT; i++, offset += UBERBLOCK_SIZE) {
     232  		ub = (const struct zfs_uberblock *)((const char *) label + offset);
     233  
     234  		if (ub->ub_magic == UBERBLOCK_MAGIC) {
     235  			*ub_offset = offset;
     236  			*swap_endian = 0;
     237  			found++;
     238  			DBG(LOWPROBE, ul_debug("probe_zfs: found little-endian uberblock at %jd", (intmax_t)offset >> 10));
     239  		}
     240  
     241  		if (ub->ub_magic == swab_magic) {
     242  			*ub_offset = offset;
     243  			*swap_endian = 1;
     244  			found++;
     245  			DBG(LOWPROBE, ul_debug("probe_zfs: found big-endian uberblock at %jd", (intmax_t)offset >> 10));
     246  		}
     247  	}
     248  
     249  	return found;
     250  }
     251  
     252  /* ZFS has 128x1kB host-endian root blocks, stored in 2 areas at the start
     253   * of the disk, and 2 areas at the end of the disk.  Check only some of them...
     254   * #4 (@ 132kB) is the first one written on a new filesystem. */
     255  static int probe_zfs(blkid_probe pr,
     256  	const struct blkid_idmag *mag  __attribute__((__unused__)))
     257  {
     258  	int swab_endian = 0;
     259  	struct zfs_uberblock *ub = NULL;
     260  	loff_t offset = 0, ub_offset = 0;
     261  	int label_no, found = 0, found_in_label;
     262  	void *label;
     263  	loff_t blk_align = (pr->size % (256 * 1024ULL));
     264  
     265  	DBG(PROBE, ul_debug("probe_zfs"));
     266  	/* Look for at least 4 uberblocks to ensure a positive match */
     267  	for (label_no = 0; label_no < 4; label_no++) {
     268  		switch(label_no) {
     269  		case 0: // jump to L0
     270  			offset = 0;
     271  			break;
     272  		case 1: // jump to L1
     273  			offset = VDEV_LABEL_SIZE;
     274  			break;
     275  		case 2: // jump to L2
     276  			offset = pr->size - 2 * VDEV_LABEL_SIZE - blk_align;
     277  			break;
     278  		case 3: // jump to L3
     279  			offset = pr->size - VDEV_LABEL_SIZE - blk_align;
     280  			break;
     281  		}
     282  
     283  		if ((S_ISREG(pr->mode) || blkid_probe_is_wholedisk(pr)) &&
     284  		    blkid_probe_is_covered_by_pt(pr,  offset, VDEV_LABEL_SIZE))
     285  			/* ignore this area, it's within any partition and
     286  			 * we are working with whole-disk now */
     287  			continue;
     288  
     289  		label = blkid_probe_get_buffer(pr, offset, VDEV_LABEL_SIZE);
     290  		if (label == NULL)
     291  			return errno ? -errno : 1;
     292  
     293  		found_in_label = find_uberblocks(label, &ub_offset, &swab_endian);
     294  
     295  		if (found_in_label > 0) {
     296  			found+= found_in_label;
     297  			ub = (struct zfs_uberblock *)((char *) label + ub_offset);
     298  			ub_offset += offset;
     299  
     300  			if (found >= ZFS_WANT)
     301  				break;
     302  		}
     303  	}
     304  
     305  	if (found < ZFS_WANT)
     306  		return 1;
     307  
     308  	/* If we found the 4th uberblock, then we will have exited from the
     309  	 * scanning loop immediately, and ub will be a valid uberblock. */
     310  	blkid_probe_sprintf_version(pr, "%" PRIu64, swab_endian ?
     311  				    swab64(ub->ub_version) : ub->ub_version);
     312  
     313  	zfs_extract_guid_name(pr, offset);
     314  
     315  	if (blkid_probe_set_magic(pr, ub_offset,
     316  				sizeof(ub->ub_magic),
     317  				(unsigned char *) &ub->ub_magic))
     318  		return 1;
     319  
     320  	blkid_probe_set_fsendianness(pr, !swab_endian ?
     321  			BLKID_ENDIANNESS_NATIVE : BLKID_ENDIANNESS_OTHER);
     322  
     323  	return 0;
     324  }
     325  
     326  const struct blkid_idinfo zfs_idinfo =
     327  {
     328  	.name		= "zfs_member",
     329  	.usage		= BLKID_USAGE_FILESYSTEM,
     330  	.probefunc	= probe_zfs,
     331  	.minsz		= 64 * 1024 * 1024,
     332  	.magics		= BLKID_NONE_MAGIC
     333  };