This is a nuuu style: gigadelic at prgmr with FreeBSD!
2012-04-05 14:04- Build Your Own Distribution
- Partitioning
- UFS via NetBSD
- Bootloader Configuration
- First Boot Maintenance
- Configuration
- Notes
I spent a year with CentOS on gigadelic, my web server, and found myself itching for FreeBSD. This installment of my unintentional “How to install FreeBSD on things” series examines the steps necessary to get FreeBSD running comfortably in my Xen environment, a paravirtualized instance at prgmr.
Note: This information also lives on the Prgmr wiki and may be more up-to-date there.
FreeBSD has fairly complete Xen domU support, though it’s not as well-tested as NetBSD’s. Nor does the FreeBSD project distribute an easy domU distribution, like NetBSD’s netbsd-INSTALL_XEN3_DOMU.gz
that’s runnable right from pvgrub, so the installation procedure is a tad involved.
You may either build your own copy of FreeBSD/Xen from an existing FreeBSD installation or download my prebuilt copy.
Some caveats:
- Xen paravirtualization (PV), as used on prgmr, is supported only on FreeBSD i386, not amd64. FreeBSD amd64 supports only Xen HVM and is thus unusable in the prgmr environment.
- Someone else’s binaries. Do you trust me?
Build Your Own Distribution
The operating system is built using the standard procedure, cross-compiling for i386 when necessary.
Patching
FreeBSD 9.0-RELEASE has a few outstanding issues on Xen, such as the inability to function with more than 768mb of provisioned memory and a panic when initializing SMP, even on vcpus=1
instances like mine. These two issues are fixed in 9-STABLE and will be rolled into 9.1-RELEASE later this year. I prefer to stick to releases when I can, though, so apply these two patches to your RELENG_9_0
source tree.
First, to fix the memory limitation. Taken from r228746.
- --- sys/i386/xen/pmap.c.orig 2011-11-10 23:20:22.000000000 -0500
- +++ sys/i386/xen/pmap.c 2012-03-27 12:36:06.553212765 -0400
- @@ -184,9 +184,6 @@
- #define PV_STAT(x) do { } while (0)
- #endif
-
- -#define pa_index(pa) ((pa) >> PDRSHIFT)
- -#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
- -
- /*
- * Get PDEs and PTEs for user/kernel address space
- */
- @@ -230,7 +227,6 @@
- * Data for the pv entry allocation mechanism
- */
- static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
- -static struct md_page *pv_table;
- static int shpgperproc = PMAP_SHPGPERPROC;
-
- struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
- @@ -278,9 +274,6 @@
- static struct mtx PMAP2mutex;
-
- SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
- -static int pg_ps_enabled;
- -SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
- - "Are large page mappings enabled?");
-
- SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
- "Max number of PV entries");
- @@ -636,24 +629,8 @@
- void
- pmap_init(void)
- {
- - vm_page_t mpte;
- - vm_size_t s;
- - int i, pv_npg;
-
- /*
- - * Initialize the vm page array entries for the kernel pmap's
- - * page table pages.
- - */
- - for (i = 0; i < nkpt; i++) {
- - mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME));
- - KASSERT(mpte >= vm_page_array &&
- - mpte < &vm_page_array[vm_page_array_size],
- - ("pmap_init: page table page is out of range"));
- - mpte->pindex = i + KPTDI;
- - mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME);
- - }
- -
- - /*
- * Initialize the address space (zone) for the pv entries. Set a
- * high water mark so that the system can recover from excessive
- * numbers of pv entries.
- @@ -664,26 +641,6 @@
- pv_entry_max = roundup(pv_entry_max, _NPCPV);
- pv_entry_high_water = 9 * (pv_entry_max / 10);
-
- - /*
- - * Are large page mappings enabled?
- - */
- - TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
- -
- - /*
- - * Calculate the size of the pv head table for superpages.
- - */
- - for (i = 0; phys_avail[i + 1]; i += 2);
- - pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
- -
- - /*
- - * Allocate memory for the pv head table for superpages.
- - */
- - s = (vm_size_t)(pv_npg * sizeof(struct md_page));
- - s = round_page(s);
- - pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
- - for (i = 0; i < pv_npg; i++)
- - TAILQ_INIT(&pv_table[i].pv_list);
- -
- pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
- pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
- PAGE_SIZE * pv_maxchunks);
- @@ -3452,21 +3409,15 @@
- }
-
- /*
- - * Returns TRUE if the given page is mapped individually or as part of
- - * a 4mpage. Otherwise, returns FALSE.
- + * Returns TRUE if the given page is mapped. Otherwise, returns FALSE.
- */
- boolean_t
- pmap_page_is_mapped(vm_page_t m)
- {
- - boolean_t rv;
-
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (FALSE);
- - vm_page_lock_queues();
- - rv = !TAILQ_EMPTY(&m->md.pv_list) ||
- - !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
- - vm_page_unlock_queues();
- - return (rv);
- + return (!TAILQ_EMPTY(&m->md.pv_list));
- }
-
- /*
And second, to bypass the SMP panic by limiting MAXCPU
to 1. It may be worth building 9-STABLE
instead if you require SMP support.
- --- sys/i386/include/param.h.orig 2012-03-27 16:15:06.767507025 -0400
- +++ sys/i386/include/param.h 2012-03-27 15:46:33.525462762 -0400
- @@ -69,7 +69,7 @@
-
- #if defined(SMP) || defined(KLD_MODULE)
- #ifndef MAXCPU
- -#define MAXCPU 32
- +#define MAXCPU 1
- #endif
- #else
- #define MAXCPU 1
Optionally, patch xen/clock.c
to silence annoying repeated console-spamming clock nudge messages unless the system is booted verbosely. Seen in kern/155353.
- --- sys/i386/xen/clock.c.orig 2012-03-15 12:33:13.887459073 -0400
- +++ sys/i386/xen/clock.c 2012-03-15 12:35:03.189146788 -0400
- @@ -349,7 +349,8 @@
-
- if (shadow_tv_version != HYPERVISOR_shared_info->wc_version &&
- !independent_wallclock) {
- - printf("[XEN] hypervisor wallclock nudged; nudging TOD.\n");
- + if(bootverbose)
- + printf("[XEN] hypervisor wallclock nudged; nudging TOD.\n");
- update_wallclock();
- add_uptime_to_wallclock();
- tc_setclock(&shadow_tv);
Kernel Configuration
The stock XEN
kernel config file should boot with no trouble. You’ll probably want to customize it a bit, though, to enable features like PF/ALTQ and to disable the system-slowing kernel debugging features, especially witness. Disable debugging with caution. You may want to build a XEN
kernel first to ensure it boots and to diagnose why if it doesn’t.
- --- sys/i386/conf/XEN 2011-11-10 23:20:22.000000000 -0500
- +++ sys/i386/conf/XEN-COOLTRAINER 2012-04-03 16:04:41.674814536 -0400
- @@ -44,16 +44,6 @@
- options KBD_INSTALL_CDEV # install a CDEV entry in /dev
- options AUDIT # Security event auditing
-
- -# Debugging for use in -current
- -options KDB # Enable kernel debugger support.
- -options DDB # Support DDB.
- -options GDB # Support remote GDB.
- -options DEADLKRES # Enable the deadlock resolver
- -options INVARIANTS # Enable calls of extra sanity checking
- -options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
- -options WITNESS # Enable checks to detect deadlocks and cycles
- -options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
- -
- options PAE
- nooption NATIVE
- option XEN
- @@ -88,3 +78,16 @@
- # Note that 'bpf' is required for DHCP.
- device bpf # Berkeley packet filter
-
- +# Enable PF
- +device pf
- +device pflog
- +device pfsync
- +
- +# Enable ALTQ (QoS)
- +options ALTQ
- +options ALTQ_CBQ # Class Bases Queuing (CBQ)
- +options ALTQ_RED # Random Early Detection (RED)
- +options ALTQ_RIO # RED In/Out
- +options ALTQ_HFSC # Hierarchical Packet Scheduler (HFSC)
- +options ALTQ_PRIQ # Priority Queuing (PRIQ)
- +options ALTQ_NOPCC # Required for SMP build
Build
You’re ready to build your i386 world and kernel. Use the TARGET
and TARGET_ARCH
environment variables to cross-compile for i386 if necessary, such as on my amd64 build host. Specify either the stock XEN
kernel configuration or the name of any custom configuration you create.
- make buildworld TARGET=i386
- make buildkernel TARGET_ARCH=i386 TARGET=i386 KERNCONF=XEN-COOLTRAINER
Install
Create a directory to serve as the temporary home of your OS distribution, set DESTDIR
to its path, and install.
- mkdir /root/xenworld
- export DESTDIR=/root/xenworld
- make installworld TARGET=i386
- make installkernel KERNCONF=XEN-COOLTRAINER TARGET_ARCH=i386 TARGET=i386
- make distribution TARGET=i386
Configure
Set the mount options in fstab for what will be our root filesystem.
- echo "/dev/xbd0s2 / ufs rw 1 1" > $DESTDIR/etc/fstab
Remove the default virtual terminals from /etc/ttys
. They don’t exist in the Xen environment and will throw errors into your console at boot if included. Then, add a line for the Xen console device so we can access the system via our out-of-band console.
- sed -i '' '/^ttyv/d' $DESTDIR/etc/ttys
- echo 'xc0 "/usr/libexec/getty Pc" vt100 on secure' >> $DESTDIR/etc/ttys
Make any other changes you wish to include, create a tarball of the Xen world, and copy the kernel ELF out of DESTDIR
for later use.
- cp $DESTDIR/boot/kernel/kernel /root/kernel
- cd $DESTDIR && tar zcvf /root/xenworld.tar.gz .
Partitioning
If you’re converting an existing Prgmr instance to FreeBSD, double-check your backups and make sure it holds nothing you’d be mad at yourself for losing.
Reboot your instance to the CentOS rescue environment via the out-of-band console, then blow away your disk with fdisk
. Create a 512MiB Linux partition, then dedicate the rest of the disk to FreeBSD, hexadecimal partition type a5
.
My properly-partitioned 25GB disk on gigadelic looks like this:
- [root@gigadelic ~]# fdisk /dev/xvda
- Disk /dev/xvda: 25.7 GB, 25769803776 bytes
- 255 heads, 63 sectors/track, 3133 cylinders
- Units = cylinders of 16065 * 512 = 8225280 bytes
-
- Device Boot Start End Blocks Id System
- /dev/xvda1 1 63 506016 83 Linux
- /dev/xvda2 64 3133 24659775 a5 FreeBSD
Format your Linux partition as ext2, a nice universally-mountable default, then mount it and create some needed directories.
- mkfs.ext2 /dev/xvda1
- mount /dev/xvda1 /mnt && cd /mnt
- mkdir -p boot/grub
Transfer your kernel ELF and world tarball to the root of the ext partition. These two files will consume approximately 5MiB and 150MiB, respectively. I hosted the two files using www/woof from my build machine.
- [root@emi#src] woof -i 172.16.0.40 /root/xenworld/boot/kernel/kernel
- Now serving on http://172.16.0.40:8080/
- gigadelic.cooltrainer.org - - [05/Apr/2012 10:45:06] "GET /kernel HTTP/1.0" 200 -
- wget http://emi.aloe.cooltrainer.org:8080/kernel
- wget http://emi.aloe.cooltrainer.org:8080/xenworld.tar.gz
Alternatively, download my pre-built world and kernel.
- wget http://prgmr.com/~nb/images/beta/kernel-XEN-COOLTRAINER
- wget http://prgmr.com/~nb/images/beta/xenworld.tar.gz
Move your kernel of choice to the /mnt/boot
directory with filename kernel
.
- mv kernel-XEN-COOLTRAINER boot/kernel
UFS via NetBSD
Reboot your instance once more, this time to the included NetBSD installer. If your dom0 doesn’t include NetBSD, check the directions for fetching the installer in the NetBSD how-to wiki article.
Drop to a shell when given the option. Double-check your partition layout with NetBSD’s fdisk
. Your disk device will be /dev/xbd0
.
- fdisk xbd0
- Disk: /dev/rxbd0d
- NetBSD disklabel disk geometry:
- cylinders: 24576, heads: 1, sectors/track: 2048 (2048 sectors/cylinder)
- total sectors: 50331648
-
- BIOS disk geometry:
- cylinders: 1024, heads: 255, sectors/track: 63 (16065 sectors/cylinder)
- total sectors: 50331648
-
- Partition table:
- 0: Linux native (sysid 131)
- start 63, size 1012032 (494 MB, Cyls 0-62)
- PBR is not bootable: All bytes are identical (0x00)
- 1: FreeBSD or 386BSD or old NetBSD (sysid 165)
- start 1012095, size 49319550 (24082 MB, Cyls 63-3132)
- PBR is not bootable: Bad magic number (0x5da0)
- 2: <UNUSED>
- 3: <UNUSED>
- No active partition.
- Drive serial number: 331975 (0x000510c7)
The FreeBSD partition you created in Linux’s fdisk
will show up as slice f
and the ext partition as slice e
of xbd0
, as visualized here in the partitioning step of NetBSD’s guided installer.
- Start MB End MB Size MB FS type Newfs Mount Mount point
- --------- --------- --------- ---------- ----- ----- -----------
- a: 0 0 0 unused
- b: 0 0 0 unused
- c: 0 24575 24576 NetBSD partition
- d: 0 24575 24576 Whole disk
- e: 0 493 494 Linux Ext2
- >f: 494 24574 24081 FFSv1 Yes Yes /
- g: 0 0 0 unused
- h: Show all unused partitions
- i: Change input units (sectors/cylinders/MB)
- x: Partition sizes ok
Create a new FFSv2 filesystem on slice f
.
- newfs -O 2 /dev/rxbd0f
Create mountpoints for both partitions, mount them, and extract your FreeBSD world tarball.
- mkdir -p /mnt/world
- mkdir -p /mnt/boot
- mount -t ext2fs /dev/xbd0e /mnt/boot
- mount /dev/xbd0f /mnt/world
- tar zxvf /mnt/boot/xenworld.tar.gz -C /mnt/world
Bootloader Configuration
While still in NetBSD, create a text file at /mnt/boot/boot/grub/menu.lst
. pvgrub will expect the user bootloader configuration in (hd0,0)/boot/grub/menu.lst
. (hd0,0)
is your ext2 partition in this case.
- timeout 5
-
- title FreeBSD
- root (hd0,0)
- kernel /boot/kernel vfs.root.mountfrom=ufs:xbd0s2,machdep.idle_mwait=0,kern.hz=100
-
- title FreeBSD Verbose
- root (hd0,0)
- kernel /boot/kernel vfs.root.mountfrom=ufs:xbd0s2,machdep.idle_mwait=0,kern.hz=100,bootverbose=1
The kernel lines’ arguments instruct the kernel to root from /dev/xbd0s2
, as seen earlier in fstab, to avoid a panic seen in kern/152228, and to tick the system clock at 100hz. kern.hz
should be detected automatically, but specifying it here won’t hurt. The second pvgrub entry is identical but boots verbosely.
Once that’s in place, shut down the instance and make sure, through the admininstation console, that you’re using the i386 pvgrub. amd64 pvgrub cannot load our i386 kernel ELF. Option #6 in the console allows you to swap bootloaders.
- 6. swap i386/amd64 bootloaders (pvgrub) currently i386
Finally, start your instance and watch through the out-of-band console as pvgrub loads your user configuration and eventually the FreeBSD kernel!
First Boot Maintenance
Booted successfully? Awesome. There are just a few things to take care of before you can settle in to FreeBSD.
First, use mtree to repair directory and file permissions and flags mangled by NetBSD’s tar
.
- mtree -U -p / -f /etc/mtree/BSD.root.dist
- mtree -U -p /usr -f /etc/mtree/BSD.usr.dist
- mtree -U -p /usr/local -f /etc/mtree/BSD.local.dist
- mtree -U -p /usr/include -f /etc/mtree/BSD.include.dist
- mtree -U -p /var -f /etc/mtree/BSD.var.dist
Enable the Blowfish hash function in auth.conf
instead of the default aging MD5 algorithm.
- echo "crypt_default=blf" >> /etc/auth.conf
Enable blf
hashing in login.conf as well, along with the UTF-8 character set.
- --- login.conf.default 2012-01-02 17:08:05.804291477 -0500
- +++ login.conf 2012-01-02 17:08:16.996213774 -0500
- @@ -23,7 +23,7 @@
- # AND SEMANTICS'' section of getcap(3) for more escape sequences).
-
- default:\
- - :passwd_format=md5:\
- + :passwd_format=blf:\
- :copyright=/etc/COPYRIGHT:\
- :welcome=/etc/motd:\
- :setenv=MAIL=/var/mail/$,BLOCKSIZE=K,FTP_PASSIVE_MODE=YES:\
- @@ -44,7 +44,9 @@
- :pseudoterminals=unlimited:\
- :priority=0:\
- :ignoretime@:\
- - :umask=022:
- + :umask=022:\
- + :charset=UTF-8:\
- + :lang=en_US.UTF-8:
-
-
- #
Rebuild the login database with cap_mkdb /etc/login.conf
, then set your new blf-hashed root password with passwd
.
Set your time zone information with tzsetup
, then create the mail alias database for Sendmail.
- cd /etc/mail && make aliases
Configuration
Set your hostname and configure networking in rc.conf. IPv4 can be configured with DHCP, but IPv6 must be configured manually. See Setting up IPv6 on the prgmr wiki. Gigadelic resides in the Fremont datacenter, so this example uses the Fremont IPv6 gateway address.
- hostname="gigadelic.cooltrainer.org"
- ipv6_activate_all_interfaces="YES"
- ifconfig_xn0_ipv6="2001:470:1:41:a800:ff:fd3e:bc0c"
- ipv6_defaultrouter="2001:470:1:41::1"
- ifconfig_xn0="DHCP"
You can test your IPv6 setup by ping6
-ing a known-v6 host.
- PING6(56=40+8+8 bytes) 2001:470:1:41:a800:ff:fd3e:bc0c --> 2001:4860:4001:800::1014
- 16 bytes from 2001:4860:4001:800::1014, icmp_seq=0 hlim=58 time=2.219 ms
- 16 bytes from 2001:4860:4001:800::1014, icmp_seq=1 hlim=58 time=2.064 ms
- 16 bytes from 2001:4860:4001:800::1014, icmp_seq=2 hlim=58 time=2.141 ms
- 16 bytes from 2001:4860:4001:800::1014, icmp_seq=3 hlim=58 time=2.005 ms
-
- --- ipv6.l.google.com ping6 statistics ---
- 4 packets transmitted, 4 packets received, 0.0% packet loss
- round-trip min/avg/max/std-dev = 2.005/2.107/2.219/0.081 ms
Turn on OpenSSH if you desire remote access. Please realize your SSH daemon will be subject to near-constant automated break-in attempts. Use good passwords, leave AllowRootLogin
disabled in /etc/ssh/sshd_config
, and consider blocking bad hosts with PF or another firewall.
- echo 'sshd_enable="YES"' >> /etc/rc.conf
- service sshd start
With that, you’re all done! Fetch a Ports tree with portsnap fetch extract
, and install some software.
Notes
Spurious LORs under KERNCONF=XEN
The stock KERNCONF XEN
has WITNESS
enabled, a debugging feature for lock validation. If you’re running with options WITNESS
you may see some spurious LORs related to the filesystem. They’re false positives, as noted in i386/153260, and your system will safely come right out the other end.
- Looking up portsnap.FreeBSD.org mirrors... 5 mirrors found.
- Fetching public key from portsnap2.FreeBSD.org... done.
- Fetching snapshot tag from portsnap2.FreeBSD.org... done.
- Fetching snapshot metadata... done.
- Fetching snapshot generated at Tue Mar 13 00:07:53 UTC 2012:
- cbf91c2407f03283d90f5ded759a3af9833e067158f236100% of 66 MB 3469 kBps 00m00s
- Extracting snapshot... lock order reversal:
- 1st 0xd8459228 bufwait (bufwait) @ /usr/src/sys/kern/vfs_bio.c:2658
- 2nd 0xc38db400 dirhash (dirhash) @ /usr/src/sys/ufs/ufs/ufs_dirhash.c:284
- KDB: stack backtrace:
- X_db_sym_numargs(c03dd42c,c04320d8,c3a19b80,c3a510ac,b395eb11,...) at X_db_sym_numargs+0x146
- kdb_backtrace(c018627b,c03e0d7b,c360dae8,c3610a08,e58f2800,...) at kdb_backtrace+0x2a
- witness_display_spinlock(c03e0d7b,c38db400,c04047aa,c3610a08,c040442f,...) at witness_display_spinlock+0x75
- witness_checkorder(c38db400,9,c040442f,11c,0,...) at witness_checkorder+0x839
- _sx_xlock(c38db400,0,c040442f,11c,c3a14e80,...) at _sx_xlock+0x85
- ufsdirhash_enduseful(d84591c8,c3a14e80,e58f2930,dd12884c,e58f28d0,...) at ufsdirhash_enduseful+0x2f5
- ufsdirhash_add(c3a14e80,e58f2930,84c,e58f28bc,e58f28c0,...) at ufsdirhash_add+0x13
- ufs_direnter(c3a51000,c3b07aa0,e58f2930,e58f2bd0,0,...) at ufs_direnter+0x739
- ufs_itimes(e58f2bd0,0,e58f2b14,e58f2a78,c03c1bc5,...) at ufs_itimes+0x14bc
- ufs_itimes(e58f2b14,e58f2b2c,0,0,e58f2b90,...) at ufs_itimes+0x17d0
- VOP_CREATE_APV(c0449ec0,e58f2b14,2,c04059c0,0,...) at VOP_CREATE_APV+0xa5
- vn_open_cred(e58f2b90,e58f2c58,1a4,0,c38e5280,...) at vn_open_cred+0x1d3
- vn_open(e58f2b90,e58f2c58,1a4,c38f0690,c0a64dd0,...) at vn_open+0x3b
- kern_openat(c3a19b80,ffffff9c,2846f1f0,0,a02,...) at kern_openat+0x1ec
- kern_open(c3a19b80,2846f1f0,0,a01,1a4,...) at kern_open+0x35
- sys_open(c3a19b80,e58f2cfc,c0414576,c03e19be,206,...) at sys_open+0x30
- syscall(e58f2d38) at syscall+0x284
- Xint0x80_syscall() at Xint0x80_syscall+0x22
- --- syscall (5, FreeBSD ELF32, sys_open), eip = 0x2837dd03, esp = 0xbf7fe83c, ebp = 0xbf7fe868 ---
- done.
- Verifying snapshot integrity...
Updating the kernel
If you want to update your FreeBSD kernel ELF from within FreeBSD itself, install sysutils/e2fsprogs, create a mount point, and mount your ext2 boot partition.
- mkdir /root/boot
- mount -t ext2fs /dev/xbd0s1 /root/boot
Stability?
FreeBSD has powered gigadelic and this blog for a month now with no panics or other show-stoppers.