diff -Naur spl-0.6.5.7/cmd/Makefile.am spl-0.6.5.7.new/cmd/Makefile.am
--- spl-0.6.5.7/cmd/Makefile.am	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/cmd/Makefile.am	2016-08-01 16:43:23.435766048 +0200
@@ -1,11 +1 @@
-include $(top_srcdir)/config/Rules.am
-
-DEFAULT_INCLUDES += \
-	-I$(top_srcdir)/lib
-
-sbin_PROGRAMS = splat
-
-splat_SOURCES = splat.c
-splat_LDFLAGS = $(top_builddir)/lib/libcommon.la
-
-EXTRA_DIST = splat.h
+SUBDIRS = splat splslab
diff -Naur spl-0.6.5.7/cmd/splat/Makefile.am spl-0.6.5.7.new/cmd/splat/Makefile.am
--- spl-0.6.5.7/cmd/splat/Makefile.am	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splat/Makefile.am	2016-08-01 16:43:23.435766048 +0200
@@ -0,0 +1,11 @@
+include $(top_srcdir)/config/Rules.am
+
+DEFAULT_INCLUDES += \
+	-I$(top_srcdir)/lib
+
+sbin_PROGRAMS = splat
+
+splat_SOURCES = splat.c
+splat_LDFLAGS = $(top_builddir)/lib/libcommon.la
+
+EXTRA_DIST = splat.h
diff -Naur spl-0.6.5.7/cmd/splat/splat.c spl-0.6.5.7.new/cmd/splat/splat.c
--- spl-0.6.5.7/cmd/splat/splat.c	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splat/splat.c	2016-08-01 16:43:23.436766051 +0200
@@ -0,0 +1,836 @@
+/*****************************************************************************\
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *  For details, see <http://zfsonlinux.org/>.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *****************************************************************************
+ *  Solaris Porting LAyer Tests (SPLAT) User Space Interface.
+\*****************************************************************************/
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "splat.h"
+
+#undef ioctl
+
+static const char shortOpts[] = "hvlat:xc";
+static const struct option longOpts[] = {
+	{ "help",            no_argument,       0, 'h' },
+	{ "verbose",         no_argument,       0, 'v' },
+	{ "list",            no_argument,       0, 'l' },
+	{ "all",             no_argument,       0, 'a' },
+	{ "test",            required_argument, 0, 't' },
+	{ "exit",            no_argument,       0, 'x' },
+	{ "nocolor",         no_argument,       0, 'c' },
+	{ 0,                 0,                 0, 0   }
+};
+
+#define VERSION_SIZE	64
+
+static List subsystems;				/* Subsystem/tests */
+static int splatctl_fd;				/* Control file descriptor */
+static char splat_version[VERSION_SIZE];	/* Kernel version string */
+static char *splat_buffer = NULL;		/* Scratch space area */
+static int splat_buffer_size = 0;		/* Scratch space size */
+
+
+static void test_list(List, int);
+static int dev_clear(void);
+static void subsystem_fini(subsystem_t *);
+static void test_fini(test_t *);
+
+
+static int usage(void) {
+	fprintf(stderr, "usage: splat [hvla] [-t <subsystem:<tests>>]\n");
+	fprintf(stderr,
+	"  --help      -h               This help\n"
+	"  --verbose   -v               Increase verbosity\n"
+	"  --list      -l               List all tests in all subsystems\n"
+	"  --all       -a               Run all tests in all subsystems\n"
+	"  --test      -t <sub:test>    Run 'test' in subsystem 'sub'\n"
+	"  --exit      -x               Exit on first test error\n"
+	"  --nocolor   -c               Do not colorize output\n");
+	fprintf(stderr, "\n"
+	"Examples:\n"
+	"  splat -t kmem:all     # Runs all kmem tests\n"
+	"  splat -t taskq:0x201  # Run taskq test 0x201\n");
+
+	return 0;
+}
+
+static subsystem_t *subsystem_init(splat_user_t *desc)
+{
+	subsystem_t *sub;
+
+	sub = (subsystem_t *)malloc(sizeof(*sub));
+	if (sub == NULL)
+		return NULL;
+
+	memcpy(&sub->sub_desc, desc, sizeof(*desc));
+
+	sub->sub_tests = list_create((ListDelF)test_fini);
+	if (sub->sub_tests == NULL) {
+		free(sub);
+		return NULL;
+	}
+
+	return sub;
+}
+
+static void subsystem_fini(subsystem_t *sub)
+{
+	assert(sub != NULL);
+	free(sub);
+}
+
+static int subsystem_setup(void)
+{
+	splat_cfg_t *cfg;
+	int i, rc, size, cfg_size;
+	subsystem_t *sub;
+	splat_user_t *desc;
+
+	/* Aquire the number of registered subsystems */
+	cfg_size = sizeof(*cfg);
+	cfg = (splat_cfg_t *)malloc(cfg_size);
+	if (cfg == NULL)
+		return -ENOMEM;
+
+	memset(cfg, 0, cfg_size);
+	cfg->cfg_magic = SPLAT_CFG_MAGIC;
+        cfg->cfg_cmd   = SPLAT_CFG_SUBSYSTEM_COUNT;
+
+	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
+	if (rc) {
+		fprintf(stderr, "Ioctl() error 0x%lx / %d: %d\n",
+		        (unsigned long)SPLAT_CFG, cfg->cfg_cmd, errno);
+		free(cfg);
+		return rc;
+	}
+
+	size = cfg->cfg_rc1;
+	free(cfg);
+
+	/* Based on the newly acquired number of subsystems allocate
+	 * memory to get the descriptive information for them all. */
+	cfg_size = sizeof(*cfg) + size * sizeof(splat_user_t);
+	cfg = (splat_cfg_t *)malloc(cfg_size);
+	if (cfg == NULL)
+		return -ENOMEM;
+
+	memset(cfg, 0, cfg_size);
+	cfg->cfg_magic = SPLAT_CFG_MAGIC;
+	cfg->cfg_cmd   = SPLAT_CFG_SUBSYSTEM_LIST;
+	cfg->cfg_data.splat_subsystems.size = size;
+
+	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
+	if (rc) {
+		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+		        (unsigned long) SPLAT_CFG, cfg->cfg_cmd, errno);
+		free(cfg);
+		return rc;
+	}
+
+	/* Add the new subsystems in to the global list */
+	size = cfg->cfg_rc1;
+	for (i = 0; i < size; i++) {
+		desc = &(cfg->cfg_data.splat_subsystems.descs[i]);
+
+		sub = subsystem_init(desc);
+		if (sub == NULL) {
+			fprintf(stderr, "Error initializing subsystem: %s\n",
+			        desc->name);
+			free(cfg);
+			return -ENOMEM;
+		}
+
+		list_append(subsystems, sub);
+	}
+
+	free(cfg);
+	return 0;
+}
+
+static void subsystem_list(List l, int indent)
+{
+	ListIterator i;
+	subsystem_t *sub;
+
+	fprintf(stdout,
+	        "------------------------------ "
+	        "Available SPLAT Tests "
+	        "------------------------------\n");
+
+	i = list_iterator_create(l);
+
+	while ((sub = list_next(i))) {
+		fprintf(stdout, "%*s0x%0*x %-*s ---- %s ----\n",
+		        indent, "",
+		        4, sub->sub_desc.id,
+		        SPLAT_NAME_SIZE + 7, sub->sub_desc.name,
+		        sub->sub_desc.desc);
+		test_list(sub->sub_tests, indent + 7);
+	}
+
+	list_iterator_destroy(i);
+}
+
+static test_t *test_init(subsystem_t *sub, splat_user_t *desc)
+{
+	test_t *test;
+
+	test = (test_t *)malloc(sizeof(*test));
+	if (test == NULL)
+		return NULL;
+
+	test->test_sub = sub;
+	memcpy(&test->test_desc, desc, sizeof(*desc));
+
+	return test;
+}
+
+static void test_fini(test_t *test)
+{
+	assert(test != NULL);
+	free(test);
+}
+
+static int test_setup(subsystem_t *sub)
+{
+	splat_cfg_t *cfg;
+	int i, rc, size;
+	test_t *test;
+	splat_user_t *desc;
+
+	/* Aquire the number of registered tests for the give subsystem */
+	cfg = (splat_cfg_t *)malloc(sizeof(*cfg));
+	if (cfg == NULL)
+		return -ENOMEM;
+
+	memset(cfg, 0, sizeof(*cfg));
+	cfg->cfg_magic = SPLAT_CFG_MAGIC;
+        cfg->cfg_cmd   = SPLAT_CFG_TEST_COUNT;
+	cfg->cfg_arg1  = sub->sub_desc.id; /* Subsystem of interest */
+
+	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
+	if (rc) {
+		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+		        (unsigned long) SPLAT_CFG, cfg->cfg_cmd, errno);
+		free(cfg);
+		return rc;
+	}
+
+	size = cfg->cfg_rc1;
+	free(cfg);
+
+	/* Based on the newly aquired number of tests allocate enough
+	 * memory to get the descriptive information for them all. */
+	cfg = (splat_cfg_t *)malloc(sizeof(*cfg) + size*sizeof(splat_user_t));
+	if (cfg == NULL)
+		return -ENOMEM;
+
+	memset(cfg, 0, sizeof(*cfg) + size * sizeof(splat_user_t));
+	cfg->cfg_magic = SPLAT_CFG_MAGIC;
+	cfg->cfg_cmd   = SPLAT_CFG_TEST_LIST;
+	cfg->cfg_arg1  = sub->sub_desc.id; /* Subsystem of interest */
+	cfg->cfg_data.splat_tests.size = size;
+
+	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
+	if (rc) {
+		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+		        (unsigned long) SPLAT_CFG, cfg->cfg_cmd, errno);
+		free(cfg);
+		return rc;
+	}
+
+	/* Add the new tests in to the relevant subsystems */
+	size = cfg->cfg_rc1;
+	for (i = 0; i < size; i++) {
+		desc = &(cfg->cfg_data.splat_tests.descs[i]);
+
+		test = test_init(sub, desc);
+		if (test == NULL) {
+			fprintf(stderr, "Error initializing test: %s\n",
+			        desc->name);
+			free(cfg);
+			return -ENOMEM;
+		}
+
+		list_append(sub->sub_tests, test);
+	}
+
+	free(cfg);
+	return 0;
+}
+
+static test_t *test_copy(test_t *test)
+{
+	return test_init(test->test_sub, &test->test_desc);
+}
+
+static void test_list(List l, int indent)
+{
+	ListIterator i;
+	test_t *test;
+
+	i = list_iterator_create(l);
+
+	while ((test = list_next(i)))
+		fprintf(stdout, "%*s0x%0*x %-*s %s\n",
+		        indent, "", 04, test->test_desc.id,
+		        SPLAT_NAME_SIZE, test->test_desc.name,
+		        test->test_desc.desc);
+
+	list_iterator_destroy(i);
+}
+
+static test_t *test_find(char *sub_str, char *test_str)
+{
+	ListIterator si, ti;
+	subsystem_t *sub;
+	test_t *test;
+	__u32 sub_num, test_num;
+
+	/*
+	 * No error checking here because it may not be a number, it's
+	 * perfectly OK for it to be a string.  Since we're just using
+	 * it for comparison purposes this is all very safe.
+	 */
+	sub_num = strtoul(sub_str, NULL, 0);
+	test_num = strtoul(test_str, NULL, 0);
+
+        si = list_iterator_create(subsystems);
+
+        while ((sub = list_next(si))) {
+
+		if (strncmp(sub->sub_desc.name, sub_str, SPLAT_NAME_SIZE) &&
+		    sub->sub_desc.id != sub_num)
+			continue;
+
+		ti = list_iterator_create(sub->sub_tests);
+
+		while ((test = list_next(ti))) {
+
+			if (!strncmp(test->test_desc.name, test_str,
+		            SPLAT_NAME_SIZE) || test->test_desc.id==test_num) {
+				list_iterator_destroy(ti);
+			        list_iterator_destroy(si);
+				return test;
+			}
+		}
+
+	        list_iterator_destroy(ti);
+        }
+
+        list_iterator_destroy(si);
+
+	return NULL;
+}
+
+static int test_add(cmd_args_t *args, test_t *test)
+{
+	test_t *tmp;
+
+	tmp = test_copy(test);
+	if (tmp == NULL)
+		return -ENOMEM;
+
+	list_append(args->args_tests, tmp);
+	return 0;
+}
+
+static int test_add_all(cmd_args_t *args)
+{
+	ListIterator si, ti;
+	subsystem_t *sub;
+	test_t *test;
+	int rc;
+
+        si = list_iterator_create(subsystems);
+
+        while ((sub = list_next(si))) {
+		ti = list_iterator_create(sub->sub_tests);
+
+		while ((test = list_next(ti))) {
+			if ((rc = test_add(args, test))) {
+			        list_iterator_destroy(ti);
+			        list_iterator_destroy(si);
+				return rc;
+			}
+		}
+
+	        list_iterator_destroy(ti);
+        }
+
+        list_iterator_destroy(si);
+
+	return 0;
+}
+
+static int test_run(cmd_args_t *args, test_t *test)
+{
+	subsystem_t *sub = test->test_sub;
+	splat_cmd_t *cmd;
+	int rc, cmd_size;
+
+	dev_clear();
+
+	cmd_size = sizeof(*cmd);
+	cmd = (splat_cmd_t *)malloc(cmd_size);
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	memset(cmd, 0, cmd_size);
+	cmd->cmd_magic = SPLAT_CMD_MAGIC;
+        cmd->cmd_subsystem = sub->sub_desc.id;
+	cmd->cmd_test = test->test_desc.id;
+	cmd->cmd_data_size = 0; /* Unused feature */
+
+	fprintf(stdout, "%*s:%-*s ",
+	        SPLAT_NAME_SIZE, sub->sub_desc.name,
+	        SPLAT_NAME_SIZE, test->test_desc.name);
+	fflush(stdout);
+	rc = ioctl(splatctl_fd, SPLAT_CMD, cmd);
+	if (args->args_do_color) {
+		fprintf(stdout, "%s  %s\n", rc ?
+		        COLOR_RED "Fail" COLOR_RESET :
+		        COLOR_GREEN "Pass" COLOR_RESET,
+			rc ? strerror(errno) : "");
+	} else {
+		fprintf(stdout, "%s  %s\n", rc ?
+		        "Fail" : "Pass",
+			rc ? strerror(errno) : "");
+	}
+	fflush(stdout);
+	free(cmd);
+
+	if ((args->args_verbose == 1 && rc) ||
+	    (args->args_verbose >= 2)) {
+		if ((rc = read(splatctl_fd, splat_buffer,
+			       splat_buffer_size - 1)) < 0) {
+			fprintf(stdout, "Error reading results: %d\n", rc);
+		} else {
+			fprintf(stdout, "\n%s\n", splat_buffer);
+			fflush(stdout);
+		}
+	}
+
+	return rc;
+}
+
+static int tests_run(cmd_args_t *args)
+{
+        ListIterator i;
+	test_t *test;
+	int rc;
+
+	fprintf(stdout,
+	        "------------------------------ "
+	        "Running SPLAT Tests "
+	        "------------------------------\n");
+
+	i = list_iterator_create(args->args_tests);
+
+	while ((test = list_next(i))) {
+		rc = test_run(args, test);
+		if (rc && args->args_exit_on_error) {
+			list_iterator_destroy(i);
+			return rc;
+		}
+	}
+
+	list_iterator_destroy(i);
+	return 0;
+}
+
+static int args_parse_test(cmd_args_t *args, char *str)
+{
+        ListIterator si, ti;
+	subsystem_t *s;
+	test_t *t;
+	char *sub_str, *test_str;
+	int sub_num, test_num;
+	int sub_all = 0, test_all = 0;
+	int rc, flag = 0;
+
+	test_str = strchr(str, ':');
+	if (test_str == NULL) {
+		fprintf(stderr, "Test must be of the "
+		        "form <subsystem:test>\n");
+		return -EINVAL;
+	}
+
+	sub_str = str;
+	test_str[0] = '\0';
+	test_str = test_str + 1;
+
+	sub_num = strtol(sub_str, NULL, 0);
+	test_num = strtol(test_str, NULL, 0);
+
+	if (!strncasecmp(sub_str, "all", strlen(sub_str)) || (sub_num == -1))
+		sub_all = 1;
+
+	if (!strncasecmp(test_str,"all",strlen(test_str)) || (test_num == -1))
+		test_all = 1;
+
+	si = list_iterator_create(subsystems);
+
+	if (sub_all) {
+		if (test_all) {
+			/* Add all tests from all subsystems */
+			while ((s = list_next(si))) {
+				ti = list_iterator_create(s->sub_tests);
+				while ((t = list_next(ti))) {
+					if ((rc = test_add(args, t))) {
+						list_iterator_destroy(ti);
+						goto error_run;
+					}
+				}
+				list_iterator_destroy(ti);
+			}
+		} else {
+			/* Add a specific test from all subsystems */
+			while ((s = list_next(si))) {
+				if ((t=test_find(s->sub_desc.name,test_str))) {
+					if ((rc = test_add(args, t)))
+						goto error_run;
+
+					flag = 1;
+				}
+			}
+
+			if (!flag)
+				fprintf(stderr, "No tests '%s:%s' could be "
+				        "found\n", sub_str, test_str);
+		}
+	} else {
+		if (test_all) {
+			/* Add all tests from a specific subsystem */
+			while ((s = list_next(si))) {
+				if (strncasecmp(sub_str, s->sub_desc.name,
+				    strlen(sub_str)))
+					continue;
+
+				ti = list_iterator_create(s->sub_tests);
+				while ((t = list_next(ti))) {
+					if ((rc = test_add(args, t))) {
+						list_iterator_destroy(ti);
+						goto error_run;
+					}
+				}
+				list_iterator_destroy(ti);
+			}
+		} else {
+			/* Add a specific test from a specific subsystem */
+			if ((t = test_find(sub_str, test_str))) {
+				if ((rc = test_add(args, t)))
+					goto error_run;
+			} else {
+				fprintf(stderr, "Test '%s:%s' could not be "
+				        "found\n", sub_str, test_str);
+				return -EINVAL;
+			}
+		}
+	}
+
+	list_iterator_destroy(si);
+
+	return 0;
+
+error_run:
+	list_iterator_destroy(si);
+
+	fprintf(stderr, "Test '%s:%s' not added to run list: %d\n",
+	        sub_str, test_str, rc);
+
+	return rc;
+}
+
+static void args_fini(cmd_args_t *args)
+{
+	assert(args != NULL);
+
+	if (args->args_tests != NULL)
+		list_destroy(args->args_tests);
+
+	free(args);
+}
+
+static cmd_args_t *
+args_init(int argc, char **argv)
+{
+	cmd_args_t *args;
+	int c, rc;
+
+	if (argc == 1) {
+		usage();
+		return (cmd_args_t *) NULL;
+	}
+
+	/* Configure and populate the args structures */
+	args = malloc(sizeof(*args));
+	if (args == NULL)
+		return NULL;
+
+	memset(args, 0, sizeof(*args));
+	args->args_verbose = 0;
+	args->args_do_list = 0;
+	args->args_do_all  = 0;
+	args->args_do_color = 1;
+	args->args_exit_on_error = 0;
+	args->args_tests = list_create((ListDelF)test_fini);
+	if (args->args_tests == NULL) {
+		args_fini(args);
+		return NULL;
+	}
+
+	while ((c = getopt_long(argc, argv, shortOpts, longOpts, NULL)) != -1){
+		switch (c) {
+		case 'v':  args->args_verbose++;			break;
+		case 'l':  args->args_do_list = 1;			break;
+		case 'a':  args->args_do_all = 1;			break;
+		case 'c':  args->args_do_color = 0;			break;
+		case 'x':  args->args_exit_on_error = 1;		break;
+		case 't':
+			if (args->args_do_all) {
+				fprintf(stderr, "Option -t <subsystem:test> is "
+				        "useless when used with -a\n");
+				args_fini(args);
+				return NULL;
+			}
+
+			rc = args_parse_test(args, argv[optind - 1]);
+			if (rc) {
+				args_fini(args);
+				return NULL;
+			}
+			break;
+		case 'h':
+		case '?':
+			usage();
+			args_fini(args);
+			return NULL;
+		default:
+			fprintf(stderr, "Unknown option '%s'\n",
+			        argv[optind - 1]);
+			break;
+		}
+	}
+
+	return args;
+}
+
+static int
+dev_clear(void)
+{
+	splat_cfg_t cfg;
+	int rc;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.cfg_magic = SPLAT_CFG_MAGIC;
+        cfg.cfg_cmd   = SPLAT_CFG_BUFFER_CLEAR;
+	cfg.cfg_arg1  = 0;
+
+	rc = ioctl(splatctl_fd, SPLAT_CFG, &cfg);
+	if (rc)
+		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+		        (unsigned long) SPLAT_CFG, cfg.cfg_cmd, errno);
+
+	lseek(splatctl_fd, 0, SEEK_SET);
+
+	return rc;
+}
+
+static int
+dev_size(int size)
+{
+	splat_cfg_t cfg;
+	int rc;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.cfg_magic = SPLAT_CFG_MAGIC;
+        cfg.cfg_cmd   = SPLAT_CFG_BUFFER_SIZE;
+	cfg.cfg_arg1  = size;
+
+	rc = ioctl(splatctl_fd, SPLAT_CFG, &cfg);
+	if (rc) {
+		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+		        (unsigned long) SPLAT_CFG, cfg.cfg_cmd, errno);
+		return rc;
+	}
+
+	return cfg.cfg_rc1;
+}
+
+static void
+dev_fini(void)
+{
+	if (splat_buffer)
+		free(splat_buffer);
+
+	if (splatctl_fd != -1) {
+		if (close(splatctl_fd) == -1) {
+			fprintf(stderr, "Unable to close %s: %d\n",
+		                SPLAT_DEV, errno);
+		}
+	}
+}
+
+static int
+dev_init(void)
+{
+	ListIterator i;
+	subsystem_t *sub;
+	int rc;
+
+	splatctl_fd = open(SPLAT_DEV, O_RDONLY);
+	if (splatctl_fd == -1) {
+		fprintf(stderr, "Unable to open %s: %d\n"
+		        "Is the splat module loaded?\n", SPLAT_DEV, errno);
+		rc = errno;
+		goto error;
+	}
+
+	/* Determine kernel module version string */
+	memset(splat_version, 0, VERSION_SIZE);
+	if ((rc = read(splatctl_fd, splat_version, VERSION_SIZE - 1)) == -1)
+		goto error;
+
+	if ((rc = dev_clear()))
+		goto error;
+
+	if ((rc = dev_size(0)) < 0)
+		goto error;
+
+	splat_buffer_size = rc;
+	splat_buffer = (char *)malloc(splat_buffer_size);
+	if (splat_buffer == NULL) {
+		rc = -ENOMEM;
+		goto error;
+	}
+
+	memset(splat_buffer, 0, splat_buffer_size);
+
+	/* Determine available subsystems */
+	if ((rc = subsystem_setup()) != 0)
+		goto error;
+
+	/* Determine available tests for all subsystems */
+	i = list_iterator_create(subsystems);
+
+	while ((sub = list_next(i))) {
+		if ((rc = test_setup(sub)) != 0) {
+			list_iterator_destroy(i);
+			goto error;
+		}
+	}
+
+	list_iterator_destroy(i);
+	return 0;
+
+error:
+	if (splatctl_fd != -1) {
+		if (close(splatctl_fd) == -1) {
+			fprintf(stderr, "Unable to close %s: %d\n",
+		                SPLAT_DEV, errno);
+		}
+	}
+
+	return rc;
+}
+
+int
+init(void)
+{
+	int rc = 0;
+
+	/* Allocate the subsystem list */
+	subsystems = list_create((ListDelF)subsystem_fini);
+	if (subsystems == NULL)
+		rc = ENOMEM;
+
+	return rc;
+}
+
+void
+fini(void)
+{
+	list_destroy(subsystems);
+}
+
+
+int
+main(int argc, char **argv)
+{
+	cmd_args_t *args = NULL;
+	int rc = 0;
+
+	/* General init */
+	if ((rc = init()))
+		return rc;
+
+	/* Device specific init */
+	if ((rc = dev_init()))
+		goto out;
+
+	/* Argument init and parsing */
+	if ((args = args_init(argc, argv)) == NULL) {
+		rc = -1;
+		goto out;
+	}
+
+	/* Generic kernel version string */
+	if (args->args_verbose)
+		fprintf(stdout, "%s", splat_version);
+
+	/* Print the available test list and exit */
+	if (args->args_do_list) {
+		subsystem_list(subsystems, 0);
+		goto out;
+	}
+
+	/* Add all available test to the list of tests to run */
+	if (args->args_do_all) {
+		if ((rc = test_add_all(args)))
+			goto out;
+	}
+
+	/* Run all the requested tests */
+	if ((rc = tests_run(args)))
+		goto out;
+
+out:
+	if (args != NULL)
+		args_fini(args);
+
+	dev_fini();
+	fini();
+	return rc;
+}
diff -Naur spl-0.6.5.7/cmd/splat/splat.h spl-0.6.5.7.new/cmd/splat/splat.h
--- spl-0.6.5.7/cmd/splat/splat.h	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splat/splat.h	2016-08-01 16:43:23.436766051 +0200
@@ -0,0 +1,70 @@
+/*****************************************************************************\
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *  For details, see <http://zfsonlinux.org/>.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _SPLAT_H
+#define _SPLAT_H
+
+#include "list.h"
+#include "../include/splat-ctl.h"
+
+#define DEV_NAME			"/dev/splatctl"
+#define COLOR_BLACK			"\033[0;30m"
+#define COLOR_DK_GRAY			"\033[1;30m"
+#define COLOR_BLUE			"\033[0;34m"
+#define COLOR_LT_BLUE			"\033[1;34m"
+#define COLOR_GREEN			"\033[0;32m"
+#define COLOR_LT_GREEN			"\033[1;32m"
+#define COLOR_CYAN			"\033[0;36m"
+#define COLOR_LT_CYAN			"\033[1;36m"
+#define COLOR_RED			"\033[0;31m"
+#define COLOR_LT_RED			"\033[1;31m"
+#define COLOR_PURPLE			"\033[0;35m"
+#define COLOR_LT_PURPLE			"\033[1;35m"
+#define COLOR_BROWN			"\033[0;33m"
+#define COLOR_YELLOW			"\033[1;33m"
+#define COLOR_LT_GRAY			"\033[0;37m"
+#define COLOR_WHITE			"\033[1;37m"
+#define COLOR_RESET			"\033[0m"
+
+typedef struct subsystem {
+	splat_user_t sub_desc;		/* Subsystem description */
+	List sub_tests;			/* Assocated subsystem tests list */
+} subsystem_t;
+
+typedef struct test {
+	splat_user_t test_desc;		/* Test description */
+	subsystem_t *test_sub;		/* Parent subsystem */
+} test_t;
+
+typedef struct cmd_args {
+	int args_verbose;		/* Verbose flag */
+	int args_do_list;		/* Display all tests flag */
+	int args_do_all;		/* Run all tests flag */
+	int args_do_color;		/* Colorize output */
+	int args_exit_on_error;		/* Exit on first error flag */
+	List args_tests;		/* Requested subsystems/tests */
+} cmd_args_t;
+
+#endif /* _SPLAT_H */
+
diff -Naur spl-0.6.5.7/cmd/splat.c spl-0.6.5.7.new/cmd/splat.c
--- spl-0.6.5.7/cmd/splat.c	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splat.c	1970-01-01 01:00:00.000000000 +0100
@@ -1,836 +0,0 @@
-/*****************************************************************************\
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *****************************************************************************
- *  Solaris Porting LAyer Tests (SPLAT) User Space Interface.
-\*****************************************************************************/
-
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <getopt.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include "splat.h"
-
-#undef ioctl
-
-static const char shortOpts[] = "hvlat:xc";
-static const struct option longOpts[] = {
-	{ "help",            no_argument,       0, 'h' },
-	{ "verbose",         no_argument,       0, 'v' },
-	{ "list",            no_argument,       0, 'l' },
-	{ "all",             no_argument,       0, 'a' },
-	{ "test",            required_argument, 0, 't' },
-	{ "exit",            no_argument,       0, 'x' },
-	{ "nocolor",         no_argument,       0, 'c' },
-	{ 0,                 0,                 0, 0   }
-};
-
-#define VERSION_SIZE	64
-
-static List subsystems;				/* Subsystem/tests */
-static int splatctl_fd;				/* Control file descriptor */
-static char splat_version[VERSION_SIZE];	/* Kernel version string */
-static char *splat_buffer = NULL;		/* Scratch space area */
-static int splat_buffer_size = 0;		/* Scratch space size */
-
-
-static void test_list(List, int);
-static int dev_clear(void);
-static void subsystem_fini(subsystem_t *);
-static void test_fini(test_t *);
-
-
-static int usage(void) {
-	fprintf(stderr, "usage: splat [hvla] [-t <subsystem:<tests>>]\n");
-	fprintf(stderr,
-	"  --help      -h               This help\n"
-	"  --verbose   -v               Increase verbosity\n"
-	"  --list      -l               List all tests in all subsystems\n"
-	"  --all       -a               Run all tests in all subsystems\n"
-	"  --test      -t <sub:test>    Run 'test' in subsystem 'sub'\n"
-	"  --exit      -x               Exit on first test error\n"
-	"  --nocolor   -c               Do not colorize output\n");
-	fprintf(stderr, "\n"
-	"Examples:\n"
-	"  splat -t kmem:all     # Runs all kmem tests\n"
-	"  splat -t taskq:0x201  # Run taskq test 0x201\n");
-
-	return 0;
-}
-
-static subsystem_t *subsystem_init(splat_user_t *desc)
-{
-	subsystem_t *sub;
-
-	sub = (subsystem_t *)malloc(sizeof(*sub));
-	if (sub == NULL)
-		return NULL;
-
-	memcpy(&sub->sub_desc, desc, sizeof(*desc));
-
-	sub->sub_tests = list_create((ListDelF)test_fini);
-	if (sub->sub_tests == NULL) {
-		free(sub);
-		return NULL;
-	}
-
-	return sub;
-}
-
-static void subsystem_fini(subsystem_t *sub)
-{
-	assert(sub != NULL);
-	free(sub);
-}
-
-static int subsystem_setup(void)
-{
-	splat_cfg_t *cfg;
-	int i, rc, size, cfg_size;
-	subsystem_t *sub;
-	splat_user_t *desc;
-
-	/* Aquire the number of registered subsystems */
-	cfg_size = sizeof(*cfg);
-	cfg = (splat_cfg_t *)malloc(cfg_size);
-	if (cfg == NULL)
-		return -ENOMEM;
-
-	memset(cfg, 0, cfg_size);
-	cfg->cfg_magic = SPLAT_CFG_MAGIC;
-        cfg->cfg_cmd   = SPLAT_CFG_SUBSYSTEM_COUNT;
-
-	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
-	if (rc) {
-		fprintf(stderr, "Ioctl() error 0x%lx / %d: %d\n",
-		        (unsigned long)SPLAT_CFG, cfg->cfg_cmd, errno);
-		free(cfg);
-		return rc;
-	}
-
-	size = cfg->cfg_rc1;
-	free(cfg);
-
-	/* Based on the newly acquired number of subsystems allocate
-	 * memory to get the descriptive information for them all. */
-	cfg_size = sizeof(*cfg) + size * sizeof(splat_user_t);
-	cfg = (splat_cfg_t *)malloc(cfg_size);
-	if (cfg == NULL)
-		return -ENOMEM;
-
-	memset(cfg, 0, cfg_size);
-	cfg->cfg_magic = SPLAT_CFG_MAGIC;
-	cfg->cfg_cmd   = SPLAT_CFG_SUBSYSTEM_LIST;
-	cfg->cfg_data.splat_subsystems.size = size;
-
-	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
-	if (rc) {
-		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
-		        (unsigned long) SPLAT_CFG, cfg->cfg_cmd, errno);
-		free(cfg);
-		return rc;
-	}
-
-	/* Add the new subsystems in to the global list */
-	size = cfg->cfg_rc1;
-	for (i = 0; i < size; i++) {
-		desc = &(cfg->cfg_data.splat_subsystems.descs[i]);
-
-		sub = subsystem_init(desc);
-		if (sub == NULL) {
-			fprintf(stderr, "Error initializing subsystem: %s\n",
-			        desc->name);
-			free(cfg);
-			return -ENOMEM;
-		}
-
-		list_append(subsystems, sub);
-	}
-
-	free(cfg);
-	return 0;
-}
-
-static void subsystem_list(List l, int indent)
-{
-	ListIterator i;
-	subsystem_t *sub;
-
-	fprintf(stdout,
-	        "------------------------------ "
-	        "Available SPLAT Tests "
-	        "------------------------------\n");
-
-	i = list_iterator_create(l);
-
-	while ((sub = list_next(i))) {
-		fprintf(stdout, "%*s0x%0*x %-*s ---- %s ----\n",
-		        indent, "",
-		        4, sub->sub_desc.id,
-		        SPLAT_NAME_SIZE + 7, sub->sub_desc.name,
-		        sub->sub_desc.desc);
-		test_list(sub->sub_tests, indent + 7);
-	}
-
-	list_iterator_destroy(i);
-}
-
-static test_t *test_init(subsystem_t *sub, splat_user_t *desc)
-{
-	test_t *test;
-
-	test = (test_t *)malloc(sizeof(*test));
-	if (test == NULL)
-		return NULL;
-
-	test->test_sub = sub;
-	memcpy(&test->test_desc, desc, sizeof(*desc));
-
-	return test;
-}
-
-static void test_fini(test_t *test)
-{
-	assert(test != NULL);
-	free(test);
-}
-
-static int test_setup(subsystem_t *sub)
-{
-	splat_cfg_t *cfg;
-	int i, rc, size;
-	test_t *test;
-	splat_user_t *desc;
-
-	/* Aquire the number of registered tests for the give subsystem */
-	cfg = (splat_cfg_t *)malloc(sizeof(*cfg));
-	if (cfg == NULL)
-		return -ENOMEM;
-
-	memset(cfg, 0, sizeof(*cfg));
-	cfg->cfg_magic = SPLAT_CFG_MAGIC;
-        cfg->cfg_cmd   = SPLAT_CFG_TEST_COUNT;
-	cfg->cfg_arg1  = sub->sub_desc.id; /* Subsystem of interest */
-
-	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
-	if (rc) {
-		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
-		        (unsigned long) SPLAT_CFG, cfg->cfg_cmd, errno);
-		free(cfg);
-		return rc;
-	}
-
-	size = cfg->cfg_rc1;
-	free(cfg);
-
-	/* Based on the newly aquired number of tests allocate enough
-	 * memory to get the descriptive information for them all. */
-	cfg = (splat_cfg_t *)malloc(sizeof(*cfg) + size*sizeof(splat_user_t));
-	if (cfg == NULL)
-		return -ENOMEM;
-
-	memset(cfg, 0, sizeof(*cfg) + size * sizeof(splat_user_t));
-	cfg->cfg_magic = SPLAT_CFG_MAGIC;
-	cfg->cfg_cmd   = SPLAT_CFG_TEST_LIST;
-	cfg->cfg_arg1  = sub->sub_desc.id; /* Subsystem of interest */
-	cfg->cfg_data.splat_tests.size = size;
-
-	rc = ioctl(splatctl_fd, SPLAT_CFG, cfg);
-	if (rc) {
-		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
-		        (unsigned long) SPLAT_CFG, cfg->cfg_cmd, errno);
-		free(cfg);
-		return rc;
-	}
-
-	/* Add the new tests in to the relevant subsystems */
-	size = cfg->cfg_rc1;
-	for (i = 0; i < size; i++) {
-		desc = &(cfg->cfg_data.splat_tests.descs[i]);
-
-		test = test_init(sub, desc);
-		if (test == NULL) {
-			fprintf(stderr, "Error initializing test: %s\n",
-			        desc->name);
-			free(cfg);
-			return -ENOMEM;
-		}
-
-		list_append(sub->sub_tests, test);
-	}
-
-	free(cfg);
-	return 0;
-}
-
-static test_t *test_copy(test_t *test)
-{
-	return test_init(test->test_sub, &test->test_desc);
-}
-
-static void test_list(List l, int indent)
-{
-	ListIterator i;
-	test_t *test;
-
-	i = list_iterator_create(l);
-
-	while ((test = list_next(i)))
-		fprintf(stdout, "%*s0x%0*x %-*s %s\n",
-		        indent, "", 04, test->test_desc.id,
-		        SPLAT_NAME_SIZE, test->test_desc.name,
-		        test->test_desc.desc);
-
-	list_iterator_destroy(i);
-}
-
-static test_t *test_find(char *sub_str, char *test_str)
-{
-	ListIterator si, ti;
-	subsystem_t *sub;
-	test_t *test;
-	__u32 sub_num, test_num;
-
-	/*
-	 * No error checking here because it may not be a number, it's
-	 * perfectly OK for it to be a string.  Since we're just using
-	 * it for comparison purposes this is all very safe.
-	 */
-	sub_num = strtoul(sub_str, NULL, 0);
-	test_num = strtoul(test_str, NULL, 0);
-
-        si = list_iterator_create(subsystems);
-
-        while ((sub = list_next(si))) {
-
-		if (strncmp(sub->sub_desc.name, sub_str, SPLAT_NAME_SIZE) &&
-		    sub->sub_desc.id != sub_num)
-			continue;
-
-		ti = list_iterator_create(sub->sub_tests);
-
-		while ((test = list_next(ti))) {
-
-			if (!strncmp(test->test_desc.name, test_str,
-		            SPLAT_NAME_SIZE) || test->test_desc.id==test_num) {
-				list_iterator_destroy(ti);
-			        list_iterator_destroy(si);
-				return test;
-			}
-		}
-
-	        list_iterator_destroy(ti);
-        }
-
-        list_iterator_destroy(si);
-
-	return NULL;
-}
-
-static int test_add(cmd_args_t *args, test_t *test)
-{
-	test_t *tmp;
-
-	tmp = test_copy(test);
-	if (tmp == NULL)
-		return -ENOMEM;
-
-	list_append(args->args_tests, tmp);
-	return 0;
-}
-
-static int test_add_all(cmd_args_t *args)
-{
-	ListIterator si, ti;
-	subsystem_t *sub;
-	test_t *test;
-	int rc;
-
-        si = list_iterator_create(subsystems);
-
-        while ((sub = list_next(si))) {
-		ti = list_iterator_create(sub->sub_tests);
-
-		while ((test = list_next(ti))) {
-			if ((rc = test_add(args, test))) {
-			        list_iterator_destroy(ti);
-			        list_iterator_destroy(si);
-				return rc;
-			}
-		}
-
-	        list_iterator_destroy(ti);
-        }
-
-        list_iterator_destroy(si);
-
-	return 0;
-}
-
-static int test_run(cmd_args_t *args, test_t *test)
-{
-	subsystem_t *sub = test->test_sub;
-	splat_cmd_t *cmd;
-	int rc, cmd_size;
-
-	dev_clear();
-
-	cmd_size = sizeof(*cmd);
-	cmd = (splat_cmd_t *)malloc(cmd_size);
-	if (cmd == NULL)
-		return -ENOMEM;
-
-	memset(cmd, 0, cmd_size);
-	cmd->cmd_magic = SPLAT_CMD_MAGIC;
-        cmd->cmd_subsystem = sub->sub_desc.id;
-	cmd->cmd_test = test->test_desc.id;
-	cmd->cmd_data_size = 0; /* Unused feature */
-
-	fprintf(stdout, "%*s:%-*s ",
-	        SPLAT_NAME_SIZE, sub->sub_desc.name,
-	        SPLAT_NAME_SIZE, test->test_desc.name);
-	fflush(stdout);
-	rc = ioctl(splatctl_fd, SPLAT_CMD, cmd);
-	if (args->args_do_color) {
-		fprintf(stdout, "%s  %s\n", rc ?
-		        COLOR_RED "Fail" COLOR_RESET :
-		        COLOR_GREEN "Pass" COLOR_RESET,
-			rc ? strerror(errno) : "");
-	} else {
-		fprintf(stdout, "%s  %s\n", rc ?
-		        "Fail" : "Pass",
-			rc ? strerror(errno) : "");
-	}
-	fflush(stdout);
-	free(cmd);
-
-	if ((args->args_verbose == 1 && rc) ||
-	    (args->args_verbose >= 2)) {
-		if ((rc = read(splatctl_fd, splat_buffer,
-			       splat_buffer_size - 1)) < 0) {
-			fprintf(stdout, "Error reading results: %d\n", rc);
-		} else {
-			fprintf(stdout, "\n%s\n", splat_buffer);
-			fflush(stdout);
-		}
-	}
-
-	return rc;
-}
-
-static int tests_run(cmd_args_t *args)
-{
-        ListIterator i;
-	test_t *test;
-	int rc;
-
-	fprintf(stdout,
-	        "------------------------------ "
-	        "Running SPLAT Tests "
-	        "------------------------------\n");
-
-	i = list_iterator_create(args->args_tests);
-
-	while ((test = list_next(i))) {
-		rc = test_run(args, test);
-		if (rc && args->args_exit_on_error) {
-			list_iterator_destroy(i);
-			return rc;
-		}
-	}
-
-	list_iterator_destroy(i);
-	return 0;
-}
-
-static int args_parse_test(cmd_args_t *args, char *str)
-{
-        ListIterator si, ti;
-	subsystem_t *s;
-	test_t *t;
-	char *sub_str, *test_str;
-	int sub_num, test_num;
-	int sub_all = 0, test_all = 0;
-	int rc, flag = 0;
-
-	test_str = strchr(str, ':');
-	if (test_str == NULL) {
-		fprintf(stderr, "Test must be of the "
-		        "form <subsystem:test>\n");
-		return -EINVAL;
-	}
-
-	sub_str = str;
-	test_str[0] = '\0';
-	test_str = test_str + 1;
-
-	sub_num = strtol(sub_str, NULL, 0);
-	test_num = strtol(test_str, NULL, 0);
-
-	if (!strncasecmp(sub_str, "all", strlen(sub_str)) || (sub_num == -1))
-		sub_all = 1;
-
-	if (!strncasecmp(test_str,"all",strlen(test_str)) || (test_num == -1))
-		test_all = 1;
-
-	si = list_iterator_create(subsystems);
-
-	if (sub_all) {
-		if (test_all) {
-			/* Add all tests from all subsystems */
-			while ((s = list_next(si))) {
-				ti = list_iterator_create(s->sub_tests);
-				while ((t = list_next(ti))) {
-					if ((rc = test_add(args, t))) {
-						list_iterator_destroy(ti);
-						goto error_run;
-					}
-				}
-				list_iterator_destroy(ti);
-			}
-		} else {
-			/* Add a specific test from all subsystems */
-			while ((s = list_next(si))) {
-				if ((t=test_find(s->sub_desc.name,test_str))) {
-					if ((rc = test_add(args, t)))
-						goto error_run;
-
-					flag = 1;
-				}
-			}
-
-			if (!flag)
-				fprintf(stderr, "No tests '%s:%s' could be "
-				        "found\n", sub_str, test_str);
-		}
-	} else {
-		if (test_all) {
-			/* Add all tests from a specific subsystem */
-			while ((s = list_next(si))) {
-				if (strncasecmp(sub_str, s->sub_desc.name,
-				    strlen(sub_str)))
-					continue;
-
-				ti = list_iterator_create(s->sub_tests);
-				while ((t = list_next(ti))) {
-					if ((rc = test_add(args, t))) {
-						list_iterator_destroy(ti);
-						goto error_run;
-					}
-				}
-				list_iterator_destroy(ti);
-			}
-		} else {
-			/* Add a specific test from a specific subsystem */
-			if ((t = test_find(sub_str, test_str))) {
-				if ((rc = test_add(args, t)))
-					goto error_run;
-			} else {
-				fprintf(stderr, "Test '%s:%s' could not be "
-				        "found\n", sub_str, test_str);
-				return -EINVAL;
-			}
-		}
-	}
-
-	list_iterator_destroy(si);
-
-	return 0;
-
-error_run:
-	list_iterator_destroy(si);
-
-	fprintf(stderr, "Test '%s:%s' not added to run list: %d\n",
-	        sub_str, test_str, rc);
-
-	return rc;
-}
-
-static void args_fini(cmd_args_t *args)
-{
-	assert(args != NULL);
-
-	if (args->args_tests != NULL)
-		list_destroy(args->args_tests);
-
-	free(args);
-}
-
-static cmd_args_t *
-args_init(int argc, char **argv)
-{
-	cmd_args_t *args;
-	int c, rc;
-
-	if (argc == 1) {
-		usage();
-		return (cmd_args_t *) NULL;
-	}
-
-	/* Configure and populate the args structures */
-	args = malloc(sizeof(*args));
-	if (args == NULL)
-		return NULL;
-
-	memset(args, 0, sizeof(*args));
-	args->args_verbose = 0;
-	args->args_do_list = 0;
-	args->args_do_all  = 0;
-	args->args_do_color = 1;
-	args->args_exit_on_error = 0;
-	args->args_tests = list_create((ListDelF)test_fini);
-	if (args->args_tests == NULL) {
-		args_fini(args);
-		return NULL;
-	}
-
-	while ((c = getopt_long(argc, argv, shortOpts, longOpts, NULL)) != -1){
-		switch (c) {
-		case 'v':  args->args_verbose++;			break;
-		case 'l':  args->args_do_list = 1;			break;
-		case 'a':  args->args_do_all = 1;			break;
-		case 'c':  args->args_do_color = 0;			break;
-		case 'x':  args->args_exit_on_error = 1;		break;
-		case 't':
-			if (args->args_do_all) {
-				fprintf(stderr, "Option -t <subsystem:test> is "
-				        "useless when used with -a\n");
-				args_fini(args);
-				return NULL;
-			}
-
-			rc = args_parse_test(args, argv[optind - 1]);
-			if (rc) {
-				args_fini(args);
-				return NULL;
-			}
-			break;
-		case 'h':
-		case '?':
-			usage();
-			args_fini(args);
-			return NULL;
-		default:
-			fprintf(stderr, "Unknown option '%s'\n",
-			        argv[optind - 1]);
-			break;
-		}
-	}
-
-	return args;
-}
-
-static int
-dev_clear(void)
-{
-	splat_cfg_t cfg;
-	int rc;
-
-	memset(&cfg, 0, sizeof(cfg));
-	cfg.cfg_magic = SPLAT_CFG_MAGIC;
-        cfg.cfg_cmd   = SPLAT_CFG_BUFFER_CLEAR;
-	cfg.cfg_arg1  = 0;
-
-	rc = ioctl(splatctl_fd, SPLAT_CFG, &cfg);
-	if (rc)
-		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
-		        (unsigned long) SPLAT_CFG, cfg.cfg_cmd, errno);
-
-	lseek(splatctl_fd, 0, SEEK_SET);
-
-	return rc;
-}
-
-static int
-dev_size(int size)
-{
-	splat_cfg_t cfg;
-	int rc;
-
-	memset(&cfg, 0, sizeof(cfg));
-	cfg.cfg_magic = SPLAT_CFG_MAGIC;
-        cfg.cfg_cmd   = SPLAT_CFG_BUFFER_SIZE;
-	cfg.cfg_arg1  = size;
-
-	rc = ioctl(splatctl_fd, SPLAT_CFG, &cfg);
-	if (rc) {
-		fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
-		        (unsigned long) SPLAT_CFG, cfg.cfg_cmd, errno);
-		return rc;
-	}
-
-	return cfg.cfg_rc1;
-}
-
-static void
-dev_fini(void)
-{
-	if (splat_buffer)
-		free(splat_buffer);
-
-	if (splatctl_fd != -1) {
-		if (close(splatctl_fd) == -1) {
-			fprintf(stderr, "Unable to close %s: %d\n",
-		                SPLAT_DEV, errno);
-		}
-	}
-}
-
-static int
-dev_init(void)
-{
-	ListIterator i;
-	subsystem_t *sub;
-	int rc;
-
-	splatctl_fd = open(SPLAT_DEV, O_RDONLY);
-	if (splatctl_fd == -1) {
-		fprintf(stderr, "Unable to open %s: %d\n"
-		        "Is the splat module loaded?\n", SPLAT_DEV, errno);
-		rc = errno;
-		goto error;
-	}
-
-	/* Determine kernel module version string */
-	memset(splat_version, 0, VERSION_SIZE);
-	if ((rc = read(splatctl_fd, splat_version, VERSION_SIZE - 1)) == -1)
-		goto error;
-
-	if ((rc = dev_clear()))
-		goto error;
-
-	if ((rc = dev_size(0)) < 0)
-		goto error;
-
-	splat_buffer_size = rc;
-	splat_buffer = (char *)malloc(splat_buffer_size);
-	if (splat_buffer == NULL) {
-		rc = -ENOMEM;
-		goto error;
-	}
-
-	memset(splat_buffer, 0, splat_buffer_size);
-
-	/* Determine available subsystems */
-	if ((rc = subsystem_setup()) != 0)
-		goto error;
-
-	/* Determine available tests for all subsystems */
-	i = list_iterator_create(subsystems);
-
-	while ((sub = list_next(i))) {
-		if ((rc = test_setup(sub)) != 0) {
-			list_iterator_destroy(i);
-			goto error;
-		}
-	}
-
-	list_iterator_destroy(i);
-	return 0;
-
-error:
-	if (splatctl_fd != -1) {
-		if (close(splatctl_fd) == -1) {
-			fprintf(stderr, "Unable to close %s: %d\n",
-		                SPLAT_DEV, errno);
-		}
-	}
-
-	return rc;
-}
-
-int
-init(void)
-{
-	int rc = 0;
-
-	/* Allocate the subsystem list */
-	subsystems = list_create((ListDelF)subsystem_fini);
-	if (subsystems == NULL)
-		rc = ENOMEM;
-
-	return rc;
-}
-
-void
-fini(void)
-{
-	list_destroy(subsystems);
-}
-
-
-int
-main(int argc, char **argv)
-{
-	cmd_args_t *args = NULL;
-	int rc = 0;
-
-	/* General init */
-	if ((rc = init()))
-		return rc;
-
-	/* Device specific init */
-	if ((rc = dev_init()))
-		goto out;
-
-	/* Argument init and parsing */
-	if ((args = args_init(argc, argv)) == NULL) {
-		rc = -1;
-		goto out;
-	}
-
-	/* Generic kernel version string */
-	if (args->args_verbose)
-		fprintf(stdout, "%s", splat_version);
-
-	/* Print the available test list and exit */
-	if (args->args_do_list) {
-		subsystem_list(subsystems, 0);
-		goto out;
-	}
-
-	/* Add all available test to the list of tests to run */
-	if (args->args_do_all) {
-		if ((rc = test_add_all(args)))
-			goto out;
-	}
-
-	/* Run all the requested tests */
-	if ((rc = tests_run(args)))
-		goto out;
-
-out:
-	if (args != NULL)
-		args_fini(args);
-
-	dev_fini();
-	fini();
-	return rc;
-}
diff -Naur spl-0.6.5.7/cmd/splat.h spl-0.6.5.7.new/cmd/splat.h
--- spl-0.6.5.7/cmd/splat.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splat.h	1970-01-01 01:00:00.000000000 +0100
@@ -1,70 +0,0 @@
-/*****************************************************************************\
- *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
- *  Copyright (C) 2007 The Regents of the University of California.
- *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
- *  UCRL-CODE-235197
- *
- *  This file is part of the SPL, Solaris Porting Layer.
- *  For details, see <http://zfsonlinux.org/>.
- *
- *  The SPL is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  The SPL is distributed in the hope that it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
-\*****************************************************************************/
-
-#ifndef _SPLAT_H
-#define _SPLAT_H
-
-#include "list.h"
-#include "../include/splat-ctl.h"
-
-#define DEV_NAME			"/dev/splatctl"
-#define COLOR_BLACK			"\033[0;30m"
-#define COLOR_DK_GRAY			"\033[1;30m"
-#define COLOR_BLUE			"\033[0;34m"
-#define COLOR_LT_BLUE			"\033[1;34m"
-#define COLOR_GREEN			"\033[0;32m"
-#define COLOR_LT_GREEN			"\033[1;32m"
-#define COLOR_CYAN			"\033[0;36m"
-#define COLOR_LT_CYAN			"\033[1;36m"
-#define COLOR_RED			"\033[0;31m"
-#define COLOR_LT_RED			"\033[1;31m"
-#define COLOR_PURPLE			"\033[0;35m"
-#define COLOR_LT_PURPLE			"\033[1;35m"
-#define COLOR_BROWN			"\033[0;33m"
-#define COLOR_YELLOW			"\033[1;33m"
-#define COLOR_LT_GRAY			"\033[0;37m"
-#define COLOR_WHITE			"\033[1;37m"
-#define COLOR_RESET			"\033[0m"
-
-typedef struct subsystem {
-	splat_user_t sub_desc;		/* Subsystem description */
-	List sub_tests;			/* Assocated subsystem tests list */
-} subsystem_t;
-
-typedef struct test {
-	splat_user_t test_desc;		/* Test description */
-	subsystem_t *test_sub;		/* Parent subsystem */
-} test_t;
-
-typedef struct cmd_args {
-	int args_verbose;		/* Verbose flag */
-	int args_do_list;		/* Display all tests flag */
-	int args_do_all;		/* Run all tests flag */
-	int args_do_color;		/* Colorize output */
-	int args_exit_on_error;		/* Exit on first error flag */
-	List args_tests;		/* Requested subsystems/tests */
-} cmd_args_t;
-
-#endif /* _SPLAT_H */
-
diff -Naur spl-0.6.5.7/cmd/splslab/Makefile.am spl-0.6.5.7.new/cmd/splslab/Makefile.am
--- spl-0.6.5.7/cmd/splslab/Makefile.am	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splslab/Makefile.am	2016-08-01 16:43:23.436766051 +0200
@@ -0,0 +1,2 @@
+bin_SCRIPTS = splslab.py
+EXTRA_DIST = $(bin_SCRIPTS)
diff -Naur spl-0.6.5.7/cmd/splslab/splslab.py spl-0.6.5.7.new/cmd/splslab/splslab.py
--- spl-0.6.5.7/cmd/splslab/splslab.py	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/cmd/splslab/splslab.py	2016-08-01 16:43:23.436766051 +0200
@@ -0,0 +1,202 @@
+#!/usr/bin/python
+
+import sys
+import time
+import getopt
+import re
+import signal
+from collections import defaultdict
+
+class Stat:
+    # flag definitions based on the kmem.h
+    NOTOUCH = 1
+    NODEBUG = 2
+    KMEM = 32
+    VMEM = 64
+    SLAB = 128
+    OFFSLAB = 256
+    NOEMERGENCY = 512
+    DEADLOCKED = 16384
+    GROWING = 32768
+    REAPING = 65536
+    DESTROY = 131072
+
+    fdefs = {
+        NOTOUCH : "NTCH",
+        NODEBUG : "NDBG",
+        KMEM : "KMEM",
+        VMEM : "VMEM",
+        SLAB : "SLAB",
+        OFFSLAB : "OFSL",
+        NOEMERGENCY : "NEMG",
+        DEADLOCKED : "DDLK",
+        GROWING : "GROW",
+        REAPING : "REAP",
+        DESTROY : "DSTR"
+        }
+
+    def __init__(self, name, flags, size, alloc, slabsize, objsize):
+        self._name = name
+        self._flags = self.f2str(flags)
+        self._size = size
+        self._alloc = alloc
+        self._slabsize = slabsize
+        self._objsize = objsize
+
+    def f2str(self, flags):
+        fstring = ''
+        for k in Stat.fdefs.keys():
+            if flags & k:
+                fstring = fstring + Stat.fdefs[k] + '|'
+
+        fstring = fstring[:-1]
+        return fstring
+
+class CumulativeStat:
+    def __init__(self, skey="a"):
+        self._size = 0
+        self._alloc = 0
+        self._pct = 0
+        self._skey = skey
+        self._regexp = \
+            re.compile('(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+');
+        self._stats = defaultdict(list)
+
+    # Add another stat to the dictionary and re-calculate the totals
+    def add(self, s):
+        key = 0
+        if self._skey == "a":
+            key = s._alloc
+        else:
+            key = s._size
+        self._stats[key].append(s)
+        self._size = self._size + s._size
+        self._alloc = self._alloc + s._alloc
+        if self._size:
+            self._pct = self._alloc * 100 / self._size
+        else:
+            self._pct = 0
+
+    # Parse the slab info in the procfs
+    # Calculate cumulative stats
+    def slab_update(self):
+        k = [line.strip() for line in open('/proc/spl/kmem/slab')]
+
+        if not k:
+            sys.stderr.write("No SPL slab stats found\n")
+            sys.exit(1)
+
+        del k[0:2]
+
+        for s in k:
+            if not s:
+                continue
+            m = self._regexp.match(s)
+            if m:
+                self.add(Stat(m.group(1), int(m.group(2),16), int(m.group(3)),
+                            int(m.group(4)), int(m.group(5)), int(m.group(6))))
+            else:
+                sys.stderr.write("Error: unexpected input format\n" % s)
+                exit(-1)
+
+    def show_header(self):
+        sys.stdout.write("\n%25s %20s %15s %15s %15s %15s\n\n" % \
+            ("cache name", "flags", "size", "alloc", "slabsize", "objsize"))
+
+    # Show up to the number of 'rows' of output sorted in descending order
+    # by the key specified earlier; if rows == 0, all rows are shown
+    def show(self, rows):
+        self.show_header()
+        i = 1
+        done = False
+        for k in reversed(sorted(self._stats.keys())):
+            for s in self._stats[k]:
+                sys.stdout.write("%25s %20s %15d %15d %15d %15d\n" % \
+                                     (s._name, s._flags, s._size, s._alloc, \
+                                          s._slabsize, s._objsize))
+                i = i + 1
+                if rows != 0 and i > rows:
+                    done = True
+                    break
+            if done:
+                break
+        sys.stdout.write("%25s %36d %15d (%d%%)\n\n" % \
+            ("Totals:", self._size, self._alloc, self._pct))
+
+def usage():
+    cmd = "Usage: splslab.py [-n|--num-rows] number [-s|--sort-by] " + \
+        "[interval] [count]";
+    sys.stderr.write("%s\n" % cmd)
+    sys.stderr.write("\t-h : print help\n")
+    sys.stderr.write("\t-n : --num-rows N : limit output to N top " +
+                     "largest slabs (default: all)\n")
+    sys.stderr.write("\t-s : --sort-by key : sort output in descending " +
+                     "order by total size (s)\n\t\tor allocated size (a) " +
+                     "(default: a)\n")
+    sys.stderr.write("\tinterval : repeat every interval seconds\n")
+    sys.stderr.write("\tcount : output statistics count times and exit\n")
+
+
+def main():
+
+    rows = 0
+    count = 0
+    skey = "a"
+    interval = 1
+
+    signal.signal(signal.SIGINT, signal.SIG_DFL)
+
+    try:
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "n:s:h",
+            [
+                "num-rows",
+                "sort-by",
+                "help"
+            ]
+        )
+    except getopt.error as e:
+        sys.stderr.write("Error: %s\n" % e.msg)
+        usage()
+        exit(-1)
+
+    i = 1
+    for opt, arg in opts:
+        if opt in ('-n', '--num-rows'):
+            rows = int(arg)
+            i = i + 2
+        elif opt in ('-s', '--sort-by'):
+            if arg != "s" and arg != "a":
+                sys.stderr.write("Error: invalid sorting key \"%s\"\n" % arg)
+                usage()
+                exit(-1)
+            skey = arg
+            i = i + 2
+        elif opt in ('-h', '--help'):
+            usage()
+            exit(0)
+        else:
+            break
+
+    args = sys.argv[i:]
+
+    interval = int(args[0]) if len(args) else interval
+    count = int(args[1]) if len(args) > 1 else count
+
+    i = 0
+    while True:
+        cs = CumulativeStat(skey)
+        cs.slab_update()
+        cs.show(rows)
+
+        i = i + 1
+        if count and i >= count:
+            break
+
+        time.sleep(interval)
+
+    return 0
+
+if __name__ == '__main__':
+    main()
diff -Naur spl-0.6.5.7/config/spl-build.m4 spl-0.6.5.7.new/config/spl-build.m4
--- spl-0.6.5.7/config/spl-build.m4	2015-12-24 01:31:01.000000000 +0100
+++ spl-0.6.5.7.new/config/spl-build.m4	2016-08-01 16:43:34.280796341 +0200
@@ -39,11 +39,15 @@
 	SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
 	SPL_AC_SHRINK_CONTROL_STRUCT
 	SPL_AC_RWSEM_SPINLOCK_IS_RAW
+	SPL_AC_RWSEM_ACTIVITY
+	SPL_AC_RWSEM_ATOMIC_LONG_COUNT
 	SPL_AC_SCHED_RT_HEADER
 	SPL_AC_2ARGS_VFS_GETATTR
 	SPL_AC_USLEEP_RANGE
 	SPL_AC_KMEM_CACHE_ALLOCFLAGS
 	SPL_AC_WAIT_ON_BIT
+	SPL_AC_MUTEX_OWNER
+	SPL_AC_INODE_LOCK
 ])
 
 AC_DEFUN([SPL_AC_MODULE_SYMVERS], [
@@ -1315,6 +1319,55 @@
 ])
 
 dnl #
+dnl # 3.16 API Change
+dnl #
+dnl # rwsem-spinlock "->activity" changed to "->count"
+dnl #
+AC_DEFUN([SPL_AC_RWSEM_ACTIVITY], [
+	AC_MSG_CHECKING([whether struct rw_semaphore has member activity])
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/rwsem.h>
+	],[
+		struct rw_semaphore dummy_semaphore __attribute__ ((unused));
+		dummy_semaphore.activity = 0;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1,
+		[struct rw_semaphore has member activity])
+	],[
+		AC_MSG_RESULT(no)
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # 4.8 API Change
+dnl #
+dnl # rwsem "->count" changed to atomic_long_t type
+dnl #
+AC_DEFUN([SPL_AC_RWSEM_ATOMIC_LONG_COUNT], [
+	AC_MSG_CHECKING(
+	[whether struct rw_semaphore has atomic_long_t member count])
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/rwsem.h>
+	],[
+		DECLARE_RWSEM(dummy_semaphore);
+		(void) atomic_long_read(&dummy_semaphore.count);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_RWSEM_ATOMIC_LONG_COUNT, 1,
+		[struct rw_semaphore has atomic_long_t member count])
+	],[
+		AC_MSG_RESULT(no)
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
 dnl # 3.9 API change,
 dnl # Moved things from linux/sched.h to linux/sched/rt.h
 dnl #
@@ -1447,3 +1500,55 @@
 		AC_MSG_RESULT(no)
 	])
 ])
+
+dnl #
+dnl # Check whether mutex has owner with task_struct type.
+dnl #
+dnl # Note that before Linux 3.0, mutex owner is of type thread_info.
+dnl #
+dnl # Note that in Linux 3.18, the condition for owner is changed from
+dnl # defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) to
+dnl # defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
+dnl #
+AC_DEFUN([SPL_AC_MUTEX_OWNER], [
+	AC_MSG_CHECKING([whether mutex has owner])
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/mutex.h>
+	],[
+		DEFINE_MUTEX(m);
+		struct task_struct *t __attribute__ ((unused));
+		t = m.owner;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_MUTEX_OWNER, 1, [yes])
+	],[
+		AC_MSG_RESULT(no)
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # 4.7 API change
+dnl # i_mutex is changed to i_rwsem. Instead of directly using
+dnl # i_mutex/i_rwsem, we should use inode_lock() and inode_lock_shared()
+dnl # We test inode_lock_shared because inode_lock is introduced earlier.
+dnl #
+AC_DEFUN([SPL_AC_INODE_LOCK], [
+	AC_MSG_CHECKING([whether inode_lock_shared() exists])
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		struct inode *inode = NULL;
+		inode_lock_shared(inode);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_LOCK_SHARED, 1, [yes])
+	],[
+		AC_MSG_RESULT(no)
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
diff -Naur spl-0.6.5.7/config/spl-build.m4.orig spl-0.6.5.7.new/config/spl-build.m4.orig
--- spl-0.6.5.7/config/spl-build.m4.orig	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/config/spl-build.m4.orig	2016-08-01 16:43:25.376771470 +0200
@@ -0,0 +1,1449 @@
+###############################################################################
+# Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+# Copyright (C) 2007 The Regents of the University of California.
+# Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+###############################################################################
+# SPL_AC_CONFIG_KERNEL: Default SPL kernel configuration.
+###############################################################################
+
+AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
+	SPL_AC_KERNEL
+
+	if test "${LINUX_OBJ}" != "${LINUX}"; then
+		KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
+	fi
+	AC_SUBST(KERNELMAKE_PARAMS)
+
+	KERNELCPPFLAGS="$KERNELCPPFLAGS -Wstrict-prototypes"
+	AC_SUBST(KERNELCPPFLAGS)
+
+	SPL_AC_DEBUG
+	SPL_AC_DEBUG_KMEM
+	SPL_AC_DEBUG_KMEM_TRACKING
+	SPL_AC_TEST_MODULE
+	SPL_AC_ATOMIC_SPINLOCK
+	SPL_AC_SHRINKER_CALLBACK
+	SPL_AC_CTL_NAME
+	SPL_AC_PDE_DATA
+	SPL_AC_SET_FS_PWD_WITH_CONST
+	SPL_AC_2ARGS_VFS_UNLINK
+	SPL_AC_4ARGS_VFS_RENAME
+	SPL_AC_2ARGS_VFS_FSYNC
+	SPL_AC_INODE_TRUNCATE_RANGE
+	SPL_AC_FS_STRUCT_SPINLOCK
+	SPL_AC_KUIDGID_T
+	SPL_AC_PUT_TASK_STRUCT
+	SPL_AC_KERNEL_FALLOCATE
+	SPL_AC_CONFIG_ZLIB_INFLATE
+	SPL_AC_CONFIG_ZLIB_DEFLATE
+	SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
+	SPL_AC_SHRINK_CONTROL_STRUCT
+	SPL_AC_RWSEM_SPINLOCK_IS_RAW
+	SPL_AC_SCHED_RT_HEADER
+	SPL_AC_2ARGS_VFS_GETATTR
+	SPL_AC_USLEEP_RANGE
+	SPL_AC_KMEM_CACHE_ALLOCFLAGS
+	SPL_AC_WAIT_ON_BIT
+])
+
+AC_DEFUN([SPL_AC_MODULE_SYMVERS], [
+	modpost=$LINUX/scripts/Makefile.modpost
+	AC_MSG_CHECKING([kernel file name for module symbols])
+	if test "x$enable_linux_builtin" != xyes -a -f "$modpost"; then
+		if grep -q Modules.symvers $modpost; then
+			LINUX_SYMBOLS=Modules.symvers
+		else
+			LINUX_SYMBOLS=Module.symvers
+		fi
+
+		if ! test -f "$LINUX_OBJ/$LINUX_SYMBOLS"; then
+			AC_MSG_ERROR([
+	*** Please make sure the kernel devel package for your distribution
+	*** is installed.  If you are building with a custom kernel, make sure the
+	*** kernel is configured, built, and the '--with-linux=PATH' configure
+	*** option refers to the location of the kernel source.])
+		fi
+	else
+		LINUX_SYMBOLS=NONE
+	fi
+	AC_MSG_RESULT($LINUX_SYMBOLS)
+	AC_SUBST(LINUX_SYMBOLS)
+])
+
+AC_DEFUN([SPL_AC_KERNEL], [
+	AC_ARG_WITH([linux],
+		AS_HELP_STRING([--with-linux=PATH],
+		[Path to kernel source]),
+		[kernelsrc="$withval"])
+
+	AC_ARG_WITH([linux-obj],
+		AS_HELP_STRING([--with-linux-obj=PATH],
+		[Path to kernel build objects]),
+		[kernelbuild="$withval"])
+
+	AC_MSG_CHECKING([kernel source directory])
+	if test -z "$kernelsrc"; then
+		if test -e "/lib/modules/$(uname -r)/source"; then
+			headersdir="/lib/modules/$(uname -r)/source"
+			sourcelink=$(readlink -f "$headersdir")
+		elif test -e "/lib/modules/$(uname -r)/build"; then
+			headersdir="/lib/modules/$(uname -r)/build"
+			sourcelink=$(readlink -f "$headersdir")
+		else
+			sourcelink=$(ls -1d /usr/src/kernels/* \
+			             /usr/src/linux-* \
+			             2>/dev/null | grep -v obj | tail -1)
+		fi
+
+		if test -n "$sourcelink" && test -e ${sourcelink}; then
+			kernelsrc=`readlink -f ${sourcelink}`
+		else
+			kernelsrc="[Not found]"
+		fi
+	else
+		if test "$kernelsrc" = "NONE"; then
+			kernsrcver=NONE
+		fi
+	fi
+
+	AC_MSG_RESULT([$kernelsrc])
+	if test ! -d "$kernelsrc"; then
+		AC_MSG_ERROR([
+	*** Please make sure the kernel devel package for your distribution
+	*** is installed and then try again.  If that fails, you can specify the
+	*** location of the kernel source with the '--with-linux=PATH' option.])
+	fi
+
+	AC_MSG_CHECKING([kernel build directory])
+	if test -z "$kernelbuild"; then
+		if test -e "/lib/modules/$(uname -r)/build"; then
+			kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
+		elif test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}; then
+			kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
+		elif test -d ${kernelsrc}-obj/${target_cpu}/default; then
+			kernelbuild=${kernelsrc}-obj/${target_cpu}/default
+		elif test -d `dirname ${kernelsrc}`/build-${target_cpu}; then
+			kernelbuild=`dirname ${kernelsrc}`/build-${target_cpu}
+		else
+			kernelbuild=${kernelsrc}
+		fi
+	fi
+	AC_MSG_RESULT([$kernelbuild])
+
+	AC_MSG_CHECKING([kernel source version])
+	utsrelease1=$kernelbuild/include/linux/version.h
+	utsrelease2=$kernelbuild/include/linux/utsrelease.h
+	utsrelease3=$kernelbuild/include/generated/utsrelease.h
+	if test -r $utsrelease1 && fgrep -q UTS_RELEASE $utsrelease1; then
+		utsrelease=linux/version.h
+	elif test -r $utsrelease2 && fgrep -q UTS_RELEASE $utsrelease2; then
+		utsrelease=linux/utsrelease.h
+	elif test -r $utsrelease3 && fgrep -q UTS_RELEASE $utsrelease3; then
+		utsrelease=generated/utsrelease.h
+	fi
+
+	if test "$utsrelease"; then
+		kernsrcver=`(echo "#include <$utsrelease>";
+		             echo "kernsrcver=UTS_RELEASE") | 
+		             cpp -I $kernelbuild/include |
+		             grep "^kernsrcver=" | cut -d \" -f 2`
+
+		if test -z "$kernsrcver"; then
+			AC_MSG_RESULT([Not found])
+			AC_MSG_ERROR([*** Cannot determine kernel version.])
+		fi
+	else
+		AC_MSG_RESULT([Not found])
+		if test "x$enable_linux_builtin" != xyes; then
+			AC_MSG_ERROR([*** Cannot find UTS_RELEASE definition.])
+		else
+			AC_MSG_ERROR([
+	*** Cannot find UTS_RELEASE definition.
+	*** Please run 'make prepare' inside the kernel source tree.])
+		fi
+	fi
+
+	AC_MSG_RESULT([$kernsrcver])
+
+	LINUX=${kernelsrc}
+	LINUX_OBJ=${kernelbuild}
+	LINUX_VERSION=${kernsrcver}
+
+	AC_SUBST(LINUX)
+	AC_SUBST(LINUX_OBJ)
+	AC_SUBST(LINUX_VERSION)
+
+	SPL_AC_MODULE_SYMVERS
+])
+
+dnl #
+dnl # Default SPL user configuration
+dnl #
+AC_DEFUN([SPL_AC_CONFIG_USER], [])
+
+dnl #
+dnl # Check for rpm+rpmbuild to build RPM packages.  If these tools
+dnl # are missing, it is non-fatal, but you will not be able to build
+dnl # RPM packages and will be warned if you try too.
+dnl #
+dnl # By default, the generic spec file will be used because it requires
+dnl # minimal dependencies.  Distribution specific spec files can be
+dnl # placed under the 'rpm/<distribution>' directory and enabled using
+dnl # the --with-spec=<distribution> configure option.
+dnl #
+AC_DEFUN([SPL_AC_RPM], [
+	RPM=rpm
+	RPMBUILD=rpmbuild
+
+	AC_MSG_CHECKING([whether $RPM is available])
+	AS_IF([tmp=$($RPM --version 2>/dev/null)], [
+		RPM_VERSION=$(echo $tmp | $AWK '/RPM/ { print $[3] }')
+		HAVE_RPM=yes
+		AC_MSG_RESULT([$HAVE_RPM ($RPM_VERSION)])
+	],[
+		HAVE_RPM=no
+		AC_MSG_RESULT([$HAVE_RPM])
+	])
+
+	AC_MSG_CHECKING([whether $RPMBUILD is available])
+	AS_IF([tmp=$($RPMBUILD --version 2>/dev/null)], [
+		RPMBUILD_VERSION=$(echo $tmp | $AWK '/RPM/ { print $[3] }')
+		HAVE_RPMBUILD=yes
+		AC_MSG_RESULT([$HAVE_RPMBUILD ($RPMBUILD_VERSION)])
+	],[
+		HAVE_RPMBUILD=no
+		AC_MSG_RESULT([$HAVE_RPMBUILD])
+	])
+
+	RPM_DEFINE_COMMON='--define "$(DEBUG_SPL) 1" --define "$(DEBUG_KMEM) 1" --define "$(DEBUG_KMEM_TRACKING) 1"'
+	RPM_DEFINE_UTIL=
+	RPM_DEFINE_KMOD='--define "kernels $(LINUX_VERSION)"'
+	RPM_DEFINE_DKMS=
+
+	SRPM_DEFINE_COMMON='--define "build_src_rpm 1"'
+	SRPM_DEFINE_UTIL=
+	SRPM_DEFINE_KMOD=
+	SRPM_DEFINE_DKMS=
+
+	RPM_SPEC_DIR="rpm/generic"
+	AC_ARG_WITH([spec],
+		AS_HELP_STRING([--with-spec=SPEC],
+		[Spec files 'generic|redhat']),
+		[RPM_SPEC_DIR="rpm/$withval"])
+
+	AC_MSG_CHECKING([whether spec files are available])
+	AC_MSG_RESULT([yes ($RPM_SPEC_DIR/*.spec.in)])
+
+	AC_SUBST(HAVE_RPM)
+	AC_SUBST(RPM)
+	AC_SUBST(RPM_VERSION)
+
+	AC_SUBST(HAVE_RPMBUILD)
+	AC_SUBST(RPMBUILD)
+	AC_SUBST(RPMBUILD_VERSION)
+
+	AC_SUBST(RPM_SPEC_DIR)
+	AC_SUBST(RPM_DEFINE_UTIL)
+	AC_SUBST(RPM_DEFINE_KMOD)
+	AC_SUBST(RPM_DEFINE_DKMS)
+	AC_SUBST(RPM_DEFINE_COMMON)
+	AC_SUBST(SRPM_DEFINE_UTIL)
+	AC_SUBST(SRPM_DEFINE_KMOD)
+	AC_SUBST(SRPM_DEFINE_DKMS)
+	AC_SUBST(SRPM_DEFINE_COMMON)
+])
+
+dnl #
+dnl # Check for dpkg+dpkg-buildpackage to build DEB packages.  If these
+dnl # tools are missing it is non-fatal but you will not be able to build
+dnl # DEB packages and will be warned if you try too.
+dnl #
+AC_DEFUN([SPL_AC_DPKG], [
+	DPKG=dpkg
+	DPKGBUILD=dpkg-buildpackage
+
+	AC_MSG_CHECKING([whether $DPKG is available])
+	AS_IF([tmp=$($DPKG --version 2>/dev/null)], [
+		DPKG_VERSION=$(echo $tmp | $AWK '/Debian/ { print $[7] }')
+		HAVE_DPKG=yes
+		AC_MSG_RESULT([$HAVE_DPKG ($DPKG_VERSION)])
+	],[
+		HAVE_DPKG=no
+		AC_MSG_RESULT([$HAVE_DPKG])
+	])
+
+	AC_MSG_CHECKING([whether $DPKGBUILD is available])
+	AS_IF([tmp=$($DPKGBUILD --version 2>/dev/null)], [
+		DPKGBUILD_VERSION=$(echo $tmp | \
+		    $AWK '/Debian/ { print $[4] }' | cut -f-4 -d'.')
+		HAVE_DPKGBUILD=yes
+		AC_MSG_RESULT([$HAVE_DPKGBUILD ($DPKGBUILD_VERSION)])
+	],[
+		HAVE_DPKGBUILD=no
+		AC_MSG_RESULT([$HAVE_DPKGBUILD])
+	])
+
+	AC_SUBST(HAVE_DPKG)
+	AC_SUBST(DPKG)
+	AC_SUBST(DPKG_VERSION)
+
+	AC_SUBST(HAVE_DPKGBUILD)
+	AC_SUBST(DPKGBUILD)
+	AC_SUBST(DPKGBUILD_VERSION)
+])
+
+dnl #
+dnl # Until native packaging for various different packing systems
+dnl # can be added the least we can do is attempt to use alien to
+dnl # convert the RPM packages to the needed package type.  This is
+dnl # a hack but so far it has worked reasonable well.
+dnl #
+AC_DEFUN([SPL_AC_ALIEN], [
+	ALIEN=alien
+
+	AC_MSG_CHECKING([whether $ALIEN is available])
+	AS_IF([tmp=$($ALIEN --version 2>/dev/null)], [
+		ALIEN_VERSION=$(echo $tmp | $AWK '{ print $[3] }')
+		HAVE_ALIEN=yes
+		AC_MSG_RESULT([$HAVE_ALIEN ($ALIEN_VERSION)])
+	],[
+		HAVE_ALIEN=no
+		AC_MSG_RESULT([$HAVE_ALIEN])
+	])
+
+	AC_SUBST(HAVE_ALIEN)
+	AC_SUBST(ALIEN)
+	AC_SUBST(ALIEN_VERSION)
+])
+
+dnl #
+dnl # Using the VENDOR tag from config.guess set the default
+dnl # package type for 'make pkg': (rpm | deb | tgz)
+dnl #
+AC_DEFUN([SPL_AC_DEFAULT_PACKAGE], [
+	AC_MSG_CHECKING([linux distribution])
+	if test -f /etc/toss-release ; then
+		VENDOR=toss ;
+	elif test -f /etc/fedora-release ; then
+		VENDOR=fedora ;
+	elif test -f /etc/redhat-release ; then
+		VENDOR=redhat ;
+	elif test -f /etc/gentoo-release ; then
+		VENDOR=gentoo ;
+	elif test -f /etc/arch-release ; then
+		VENDOR=arch ;
+	elif test -f /etc/SuSE-release ; then
+		VENDOR=sles ;
+	elif test -f /etc/slackware-version ; then
+		VENDOR=slackware ;
+	elif test -f /etc/lunar.release ; then
+		VENDOR=lunar ;
+	elif test -f /etc/lsb-release ; then
+		VENDOR=ubuntu ;
+	elif test -f /etc/debian_version ; then
+		VENDOR=debian ;
+	else
+		VENDOR= ;
+	fi
+	AC_MSG_RESULT([$VENDOR])
+	AC_SUBST(VENDOR)
+
+	AC_MSG_CHECKING([default package type])
+	case "$VENDOR" in
+		toss)       DEFAULT_PACKAGE=rpm  ;;
+		redhat)     DEFAULT_PACKAGE=rpm  ;;
+		fedora)     DEFAULT_PACKAGE=rpm  ;;
+		gentoo)     DEFAULT_PACKAGE=tgz  ;;
+		arch)       DEFAULT_PACKAGE=tgz  ;;
+		sles)       DEFAULT_PACKAGE=rpm  ;;
+		slackware)  DEFAULT_PACKAGE=tgz  ;;
+		lunar)      DEFAULT_PACKAGE=tgz  ;;
+		ubuntu)     DEFAULT_PACKAGE=deb  ;;
+		debian)     DEFAULT_PACKAGE=deb  ;;
+		*)          DEFAULT_PACKAGE=rpm  ;;
+	esac
+
+	AC_MSG_RESULT([$DEFAULT_PACKAGE])
+	AC_SUBST(DEFAULT_PACKAGE)
+])
+
+dnl #
+dnl # Default SPL user configuration
+dnl #
+AC_DEFUN([SPL_AC_PACKAGE], [
+	SPL_AC_DEFAULT_PACKAGE
+	SPL_AC_RPM
+	SPL_AC_DPKG
+	SPL_AC_ALIEN
+])
+
+AC_DEFUN([SPL_AC_LICENSE], [
+	AC_MSG_CHECKING([spl author])
+	AC_MSG_RESULT([$SPL_META_AUTHOR])
+
+	AC_MSG_CHECKING([spl license])
+	AC_MSG_RESULT([$SPL_META_LICENSE])
+])
+
+AC_DEFUN([SPL_AC_CONFIG], [
+	SPL_CONFIG=all
+	AC_ARG_WITH([config],
+		AS_HELP_STRING([--with-config=CONFIG],
+		[Config file 'kernel|user|all|srpm']),
+		[SPL_CONFIG="$withval"])
+	AC_ARG_ENABLE([linux-builtin],
+		[AC_HELP_STRING([--enable-linux-builtin],
+		[Configure for builtin in-tree kernel modules @<:@default=no@:>@])],
+		[],
+		[enable_linux_builtin=no])
+
+	AC_MSG_CHECKING([spl config])
+	AC_MSG_RESULT([$SPL_CONFIG]);
+	AC_SUBST(SPL_CONFIG)
+
+	case "$SPL_CONFIG" in
+		kernel) SPL_AC_CONFIG_KERNEL ;;
+		user)   SPL_AC_CONFIG_USER   ;;
+		all)    SPL_AC_CONFIG_KERNEL
+		        SPL_AC_CONFIG_USER   ;;
+		srpm)                        ;;
+		*)
+		AC_MSG_RESULT([Error!])
+		AC_MSG_ERROR([Bad value "$SPL_CONFIG" for --with-config,
+		             user kernel|user|all|srpm]) ;;
+	esac
+
+	AM_CONDITIONAL([CONFIG_USER],
+	               [test "$SPL_CONFIG" = user -o "$SPL_CONFIG" = all])
+	AM_CONDITIONAL([CONFIG_KERNEL],
+	               [test "$SPL_CONFIG" = kernel -o "$SPL_CONFIG" = all] &&
+	               [test "x$enable_linux_builtin" != xyes ])
+])
+
+dnl #
+dnl # Enable if the SPL should be compiled with internal debugging enabled.
+dnl # By default this support is disabled.
+dnl #
+AC_DEFUN([SPL_AC_DEBUG], [
+	AC_MSG_CHECKING([whether debugging is enabled])
+	AC_ARG_ENABLE([debug],
+		[AS_HELP_STRING([--enable-debug],
+		[Enable generic debug support @<:@default=no@:>@])],
+		[],
+		[enable_debug=no])
+
+	AS_IF([test "x$enable_debug" = xyes],
+	[
+		KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG -Werror"
+		DEBUG_CFLAGS="-DDEBUG -Werror"
+		DEBUG_SPL="_with_debug"
+	], [
+		KERNELCPPFLAGS="${KERNELCPPFLAGS} -DNDEBUG"
+		DEBUG_CFLAGS="-DNDEBUG"
+		DEBUG_SPL="_without_debug"
+	])
+
+	AC_SUBST(DEBUG_CFLAGS)
+	AC_SUBST(DEBUG_SPL)
+	AC_MSG_RESULT([$enable_debug])
+])
+
+dnl #
+dnl # Enabled by default it provides a minimal level of memory tracking.
+dnl # A total count of bytes allocated is kept for each alloc and free.
+dnl # Then at module unload time a report to the console will be printed
+dnl # if memory was leaked.
+dnl #
+AC_DEFUN([SPL_AC_DEBUG_KMEM], [
+	AC_ARG_ENABLE([debug-kmem],
+		[AS_HELP_STRING([--enable-debug-kmem],
+		[Enable basic kmem accounting @<:@default=no@:>@])],
+		[],
+		[enable_debug_kmem=no])
+
+	AS_IF([test "x$enable_debug_kmem" = xyes],
+	[
+		KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_KMEM"
+		DEBUG_KMEM="_with_debug_kmem"
+		AC_DEFINE([DEBUG_KMEM], [1],
+		[Define to 1 to enable basic kmem accounting])
+	], [
+		DEBUG_KMEM="_without_debug_kmem"
+	])
+
+	AC_SUBST(DEBUG_KMEM)
+	AC_MSG_CHECKING([whether basic kmem accounting is enabled])
+	AC_MSG_RESULT([$enable_debug_kmem])
+])
+
+dnl #
+dnl # Disabled by default it provides detailed memory tracking.  This
+dnl # feature also requires --enable-debug-kmem to be set.  When enabled
+dnl # not only will total bytes be tracked but also the location of every
+dnl # alloc and free.  When the SPL module is unloaded a list of all leaked
+dnl # addresses and where they were allocated will be dumped to the console.
+dnl # Enabling this feature has a significant impact on performance but it
+dnl # makes finding memory leaks pretty straight forward.
+dnl #
+AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [
+	AC_ARG_ENABLE([debug-kmem-tracking],
+		[AS_HELP_STRING([--enable-debug-kmem-tracking],
+		[Enable detailed kmem tracking  @<:@default=no@:>@])],
+		[],
+		[enable_debug_kmem_tracking=no])
+
+	AS_IF([test "x$enable_debug_kmem_tracking" = xyes],
+	[
+		KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_KMEM_TRACKING"
+		DEBUG_KMEM_TRACKING="_with_debug_kmem_tracking"
+		AC_DEFINE([DEBUG_KMEM_TRACKING], [1],
+		[Define to 1 to enable detailed kmem tracking])
+	], [
+		DEBUG_KMEM_TRACKING="_without_debug_kmem_tracking"
+	])
+
+	AC_SUBST(DEBUG_KMEM_TRACKING)
+	AC_MSG_CHECKING([whether detailed kmem tracking is enabled])
+	AC_MSG_RESULT([$enable_debug_kmem_tracking])
+])
+
+dnl #
+dnl # SPL_LINUX_CONFTEST
+dnl #
+AC_DEFUN([SPL_LINUX_CONFTEST], [
+cat confdefs.h - <<_ACEOF >conftest.c
+$1
+_ACEOF
+])
+
+dnl #
+dnl # SPL_LANG_PROGRAM(C)([PROLOGUE], [BODY])
+dnl #
+m4_define([SPL_LANG_PROGRAM], [
+$1
+int
+main (void)
+{
+dnl Do *not* indent the following line: there may be CPP directives.
+dnl Don't move the `;' right after for the same reason.
+$2
+  ;
+  return 0;
+}
+])
+
+dnl #
+dnl # SPL_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE
+dnl #
+AC_DEFUN([SPL_LINUX_COMPILE_IFELSE], [
+	m4_ifvaln([$1], [SPL_LINUX_CONFTEST([$1])])
+	rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+	echo "obj-m := conftest.o" >build/Makefile
+	modpost_flag=''
+	test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+	AS_IF(
+		[AC_TRY_COMMAND(cp conftest.c build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
+		[$4],
+		[_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])]
+	)
+	rm -Rf build
+])
+
+dnl #
+dnl # SPL_LINUX_TRY_COMPILE like AC_TRY_COMPILE
+dnl #
+AC_DEFUN([SPL_LINUX_TRY_COMPILE],
+	[SPL_LINUX_COMPILE_IFELSE(
+	[AC_LANG_SOURCE([SPL_LANG_PROGRAM([[$1]], [[$2]])])],
+	[modules],
+	[test -s build/conftest.o],
+	[$3], [$4])
+])
+
+dnl #
+dnl # SPL_CHECK_SYMBOL_EXPORT
+dnl # check symbol exported or not
+dnl #
+AC_DEFUN([SPL_CHECK_SYMBOL_EXPORT], [
+	grep -q -E '[[[:space:]]]$1[[[:space:]]]' \
+		$LINUX_OBJ/Module*.symvers 2>/dev/null
+	rc=$?
+	if test $rc -ne 0; then
+		export=0
+		for file in $2; do
+			grep -q -E "EXPORT_SYMBOL.*($1)" \
+				"$LINUX_OBJ/$file" 2>/dev/null
+			rc=$?
+			if test $rc -eq 0; then
+				export=1
+				break;
+			fi
+		done
+		if test $export -eq 0; then :
+			$4
+		else :
+			$3
+		fi
+	else :
+		$3
+	fi
+])
+
+dnl #
+dnl # SPL_LINUX_TRY_COMPILE_SYMBOL
+dnl # like SPL_LINUX_TRY_COMPILE, except SPL_CHECK_SYMBOL_EXPORT
+dnl # is called if not compiling for builtin
+dnl #
+AC_DEFUN([SPL_LINUX_TRY_COMPILE_SYMBOL], [
+	SPL_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1])
+	if test $rc -ne 0; then :
+		$6
+	else
+		if test "x$enable_linux_builtin" != xyes; then
+			SPL_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1])
+		fi
+		if test $rc -ne 0; then :
+			$6
+		else :
+			$5
+		fi
+	fi
+])
+
+dnl #
+dnl # SPL_CHECK_SYMBOL_HEADER
+dnl # check if a symbol prototype is defined in listed headers.
+dnl #
+AC_DEFUN([SPL_CHECK_SYMBOL_HEADER], [
+	AC_MSG_CHECKING([whether symbol $1 exists in header])
+	header=0
+	for file in $3; do
+		grep -q "$2" "$LINUX/$file" 2>/dev/null
+		rc=$?
+		if test $rc -eq 0; then
+			header=1
+			break;
+		fi
+	done
+	if test $header -eq 0; then
+		AC_MSG_RESULT([no])
+		$5
+	else
+		AC_MSG_RESULT([yes])
+		$4
+	fi
+])
+
+dnl #
+dnl # SPL_CHECK_HEADER
+dnl # check whether header exists and define HAVE_$2_HEADER
+dnl #
+AC_DEFUN([SPL_CHECK_HEADER],
+	[AC_MSG_CHECKING([whether header $1 exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <$1>
+	],[
+		return 0;
+	],[
+		AC_DEFINE(HAVE_$2_HEADER, 1, [$1 exists])
+		AC_MSG_RESULT(yes)
+		$3
+	],[
+		AC_MSG_RESULT(no)
+		$4
+	])
+])
+
+dnl #
+dnl # Basic toolchain sanity check.  Verify that kernel modules can
+dnl # be built and which symbols can be used.
+dnl #
+AC_DEFUN([SPL_AC_TEST_MODULE],
+	[AC_MSG_CHECKING([whether modules can be built])
+	SPL_LINUX_TRY_COMPILE([],[],[
+		AC_MSG_RESULT([yes])
+	],[
+		AC_MSG_RESULT([no])
+		if test "x$enable_linux_builtin" != xyes; then
+			AC_MSG_ERROR([*** Unable to build an empty module.])
+		else
+			AC_MSG_ERROR([
+	*** Unable to build an empty module.
+	*** Please run 'make scripts' inside the kernel source tree.])
+		fi
+	])
+
+	AS_IF([test "x$cross_compiling" != xyes], [
+		AC_RUN_IFELSE([
+			AC_LANG_PROGRAM([
+				#include "$LINUX/include/linux/license.h"
+			], [
+				return !license_is_gpl_compatible(
+				    "$SPL_META_LICENSE");
+			])
+		], [
+			AC_DEFINE([SPL_IS_GPL_COMPATIBLE], [1],
+			    [Define to 1 if GPL-only symbols can be used])
+		], [
+		])
+	])
+])
+
+dnl #
+dnl # Use the atomic implemenation based on global spinlocks.  This
+dnl # should only be needed by 32-bit kernels which do not provide
+dnl # the atomic64_* API.  It may be optionally enabled as a fallback
+dnl # if problems are observed with the direct mapping to the native
+dnl # Linux atomic operations.  You may not disable atomic spinlocks
+dnl # if you kernel does not an atomic64_* API.
+dnl #
+AC_DEFUN([SPL_AC_ATOMIC_SPINLOCK], [
+	AC_ARG_ENABLE([atomic-spinlocks],
+		[AS_HELP_STRING([--enable-atomic-spinlocks],
+		[Atomic types use spinlocks @<:@default=check@:>@])],
+		[],
+		[enable_atomic_spinlocks=check])
+
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		atomic64_t *ptr __attribute__ ((unused));
+	],[
+		have_atomic64_t=yes
+		AC_DEFINE(HAVE_ATOMIC64_T, 1,
+			[kernel defines atomic64_t])
+	],[
+		have_atomic64_t=no
+	])
+
+	AS_IF([test "x$enable_atomic_spinlocks" = xcheck], [
+		AS_IF([test "x$have_atomic64_t" = xyes], [
+			enable_atomic_spinlocks=no
+		],[
+			enable_atomic_spinlocks=yes
+		])
+	])
+
+	AS_IF([test "x$enable_atomic_spinlocks" = xyes], [
+		AC_DEFINE([ATOMIC_SPINLOCK], [1],
+			[Atomic types use spinlocks])
+	],[
+		AS_IF([test "x$have_atomic64_t" = xno], [
+			AC_MSG_FAILURE(
+			[--disable-atomic-spinlocks given but required atomic64 support is unavailable])
+		])
+	])
+
+	AC_MSG_CHECKING([whether atomic types use spinlocks])
+	AC_MSG_RESULT([$enable_atomic_spinlocks])
+
+	AC_MSG_CHECKING([whether kernel defines atomic64_t])
+	AC_MSG_RESULT([$have_atomic64_t])
+])
+
+AC_DEFUN([SPL_AC_SHRINKER_CALLBACK],[
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	dnl #
+	dnl # 2.6.23 to 2.6.34 API change
+	dnl # ->shrink(int nr_to_scan, gfp_t gfp_mask)
+	dnl #
+	AC_MSG_CHECKING([whether old 2-argument shrinker exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/mm.h>
+
+		int shrinker_cb(int nr_to_scan, gfp_t gfp_mask);
+	],[
+		struct shrinker cache_shrinker = {
+			.shrink = shrinker_cb,
+			.seeks = DEFAULT_SEEKS,
+		};
+		register_shrinker(&cache_shrinker);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_2ARGS_OLD_SHRINKER_CALLBACK, 1,
+			[old shrinker callback wants 2 args])
+	],[
+		AC_MSG_RESULT(no)
+		dnl #
+		dnl # 2.6.35 - 2.6.39 API change
+		dnl # ->shrink(struct shrinker *,
+		dnl #          int nr_to_scan, gfp_t gfp_mask)
+		dnl #
+		AC_MSG_CHECKING([whether old 3-argument shrinker exists])
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/mm.h>
+
+			int shrinker_cb(struct shrinker *, int nr_to_scan,
+					gfp_t gfp_mask);
+		],[
+			struct shrinker cache_shrinker = {
+				.shrink = shrinker_cb,
+				.seeks = DEFAULT_SEEKS,
+			};
+			register_shrinker(&cache_shrinker);
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_3ARGS_SHRINKER_CALLBACK, 1,
+				[old shrinker callback wants 3 args])
+		],[
+			AC_MSG_RESULT(no)
+			dnl #
+			dnl # 3.0 - 3.11 API change
+			dnl # ->shrink(struct shrinker *,
+			dnl #          struct shrink_control *sc)
+			dnl #
+			AC_MSG_CHECKING(
+				[whether new 2-argument shrinker exists])
+			SPL_LINUX_TRY_COMPILE([
+				#include <linux/mm.h>
+
+				int shrinker_cb(struct shrinker *,
+						struct shrink_control *sc);
+			],[
+				struct shrinker cache_shrinker = {
+					.shrink = shrinker_cb,
+					.seeks = DEFAULT_SEEKS,
+				};
+				register_shrinker(&cache_shrinker);
+			],[
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_2ARGS_NEW_SHRINKER_CALLBACK, 1,
+					[new shrinker callback wants 2 args])
+			],[
+				AC_MSG_RESULT(no)
+				dnl #
+				dnl # 3.12 API change,
+				dnl # ->shrink() is logically split in to
+				dnl # ->count_objects() and ->scan_objects()
+				dnl #
+				AC_MSG_CHECKING(
+				    [whether ->count_objects callback exists])
+				SPL_LINUX_TRY_COMPILE([
+					#include <linux/mm.h>
+
+					unsigned long shrinker_cb(
+						struct shrinker *,
+						struct shrink_control *sc);
+				],[
+					struct shrinker cache_shrinker = {
+						.count_objects = shrinker_cb,
+						.scan_objects = shrinker_cb,
+						.seeks = DEFAULT_SEEKS,
+					};
+					register_shrinker(&cache_shrinker);
+				],[
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK,
+						1, [->count_objects exists])
+				],[
+					AC_MSG_ERROR(error)
+				])
+			])
+		])
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # 2.6.33 API change,
+dnl # Removed .ctl_name from struct ctl_table.
+dnl #
+AC_DEFUN([SPL_AC_CTL_NAME], [
+	AC_MSG_CHECKING([whether struct ctl_table has ctl_name])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/sysctl.h>
+	],[
+		struct ctl_table ctl __attribute__ ((unused));
+		ctl.ctl_name = 0;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_CTL_NAME, 1, [struct ctl_table has ctl_name])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 3.10 API change,
+dnl # PDE is replaced by PDE_DATA
+dnl #
+AC_DEFUN([SPL_AC_PDE_DATA], [
+	AC_MSG_CHECKING([whether PDE_DATA() is available])
+	SPL_LINUX_TRY_COMPILE_SYMBOL([
+		#include <linux/proc_fs.h>
+	], [
+		PDE_DATA(NULL);
+	], [PDE_DATA], [], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_PDE_DATA, 1, [yes])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 3.9 API change
+dnl # set_fs_pwd takes const struct path *
+dnl #
+AC_DEFUN([SPL_AC_SET_FS_PWD_WITH_CONST],
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	[AC_MSG_CHECKING([whether set_fs_pwd() requires const struct path *])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/spinlock.h>
+		#include <linux/fs_struct.h>
+		#include <linux/path.h>
+		void (*const set_fs_pwd_func)
+			(struct fs_struct *, const struct path *)
+			= set_fs_pwd;
+	],[
+		return 0;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_SET_FS_PWD_WITH_CONST, 1,
+			[set_fs_pwd() needs const path *])
+	],[
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/spinlock.h>
+			#include <linux/fs_struct.h>
+			#include <linux/path.h>
+			void (*const set_fs_pwd_func)
+				(struct fs_struct *, struct path *)
+				= set_fs_pwd;
+		],[
+			return 0;
+		],[
+			AC_MSG_RESULT(no)
+		],[
+			AC_MSG_ERROR(unknown)
+		])
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # 3.13 API change
+dnl # vfs_unlink() updated to take a third delegated_inode argument.
+dnl #
+AC_DEFUN([SPL_AC_2ARGS_VFS_UNLINK],
+	[AC_MSG_CHECKING([whether vfs_unlink() wants 2 args])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		vfs_unlink((struct inode *) NULL, (struct dentry *) NULL);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_2ARGS_VFS_UNLINK, 1,
+		          [vfs_unlink() wants 2 args])
+	],[
+		AC_MSG_RESULT(no)
+		dnl #
+		dnl # Linux 3.13 API change
+		dnl # Added delegated inode
+		dnl #
+		AC_MSG_CHECKING([whether vfs_unlink() wants 3 args])
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/fs.h>
+		],[
+			vfs_unlink((struct inode *) NULL,
+				(struct dentry *) NULL,
+				(struct inode **) NULL);
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_3ARGS_VFS_UNLINK, 1,
+				  [vfs_unlink() wants 3 args])
+		],[
+			AC_MSG_ERROR(no)
+		])
+
+	])
+])
+
+dnl #
+dnl # 3.13 and 3.15 API changes
+dnl # Added delegated inode and flags argument.
+dnl #
+AC_DEFUN([SPL_AC_4ARGS_VFS_RENAME],
+	[AC_MSG_CHECKING([whether vfs_rename() wants 4 args])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		vfs_rename((struct inode *) NULL, (struct dentry *) NULL,
+			(struct inode *) NULL, (struct dentry *) NULL);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_4ARGS_VFS_RENAME, 1,
+		          [vfs_rename() wants 4 args])
+	],[
+		AC_MSG_RESULT(no)
+		dnl #
+		dnl # Linux 3.13 API change
+		dnl # Added delegated inode
+		dnl #
+		AC_MSG_CHECKING([whether vfs_rename() wants 5 args])
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/fs.h>
+		],[
+			vfs_rename((struct inode *) NULL,
+				(struct dentry *) NULL,
+				(struct inode *) NULL,
+				(struct dentry *) NULL,
+				(struct inode **) NULL);
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_5ARGS_VFS_RENAME, 1,
+				  [vfs_rename() wants 5 args])
+		],[
+			AC_MSG_RESULT(no)
+			dnl #
+			dnl # Linux 3.15 API change
+			dnl # Added flags
+			dnl #
+			AC_MSG_CHECKING([whether vfs_rename() wants 6 args])
+			SPL_LINUX_TRY_COMPILE([
+				#include <linux/fs.h>
+			],[
+				vfs_rename((struct inode *) NULL,
+					(struct dentry *) NULL,
+					(struct inode *) NULL,
+					(struct dentry *) NULL,
+					(struct inode **) NULL,
+					(unsigned int) 0);
+			],[
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_6ARGS_VFS_RENAME, 1,
+					  [vfs_rename() wants 6 args])
+			],[
+				AC_MSG_ERROR(no)
+			])
+		])
+	])
+])
+
+dnl #
+dnl # 2.6.36 API change,
+dnl # The 'struct fs_struct->lock' was changed from a rwlock_t to
+dnl # a spinlock_t to improve the fastpath performance.
+dnl #
+AC_DEFUN([SPL_AC_FS_STRUCT_SPINLOCK], [
+	AC_MSG_CHECKING([whether struct fs_struct uses spinlock_t])
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/sched.h>
+		#include <linux/fs_struct.h>
+	],[
+		static struct fs_struct fs;
+		spin_lock_init(&fs.lock);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FS_STRUCT_SPINLOCK, 1,
+		          [struct fs_struct uses spinlock_t])
+	],[
+		AC_MSG_RESULT(no)
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # User namespaces, use kuid_t in place of uid_t
+dnl # where available. Not strictly a user namespaces thing
+dnl # but it should prevent surprises
+dnl #
+AC_DEFUN([SPL_AC_KUIDGID_T], [
+	AC_MSG_CHECKING([whether kuid_t/kgid_t is available])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/uidgid.h>
+	], [
+		kuid_t userid = KUIDT_INIT(0);
+		kgid_t groupid = KGIDT_INIT(0);
+	],[
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/uidgid.h>
+		], [
+			kuid_t userid = 0;
+			kgid_t groupid = 0;
+		],[
+			AC_MSG_RESULT(yes; optional)
+		],[
+			AC_MSG_RESULT(yes; mandatory)
+			AC_DEFINE(HAVE_KUIDGID_T, 1, [kuid_t/kgid_t in use])
+		])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.39 API change,
+dnl # __put_task_struct() was exported by the mainline kernel.
+dnl #
+AC_DEFUN([SPL_AC_PUT_TASK_STRUCT],
+	[AC_MSG_CHECKING([whether __put_task_struct() is available])
+	SPL_LINUX_TRY_COMPILE_SYMBOL([
+		#include <linux/sched.h>
+	], [
+		__put_task_struct(NULL);
+	], [__put_task_struct], [], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_PUT_TASK_STRUCT, 1,
+		          [__put_task_struct() is available])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.35 API change,
+dnl # Unused 'struct dentry *' removed from vfs_fsync() prototype.
+dnl #
+AC_DEFUN([SPL_AC_2ARGS_VFS_FSYNC], [
+	AC_MSG_CHECKING([whether vfs_fsync() wants 2 args])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		vfs_fsync(NULL, 0);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_2ARGS_VFS_FSYNC, 1, [vfs_fsync() wants 2 args])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 3.5 API change,
+dnl # inode_operations.truncate_range removed
+dnl #
+AC_DEFUN([SPL_AC_INODE_TRUNCATE_RANGE], [
+	AC_MSG_CHECKING([whether truncate_range() inode operation is available])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		struct inode_operations ops;
+		ops.truncate_range = NULL;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_TRUNCATE_RANGE, 1,
+			[truncate_range() inode operation is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 2.6.38 - 3.x API
+dnl #
+AC_DEFUN([SPL_AC_KERNEL_FILE_FALLOCATE], [
+	AC_MSG_CHECKING([whether fops->fallocate() exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		long (*fallocate) (struct file *, int, loff_t, loff_t) = NULL;
+		struct file_operations fops __attribute__ ((unused)) = {
+			.fallocate = fallocate,
+		};
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 2.6.x - 2.6.37 API
+dnl #
+AC_DEFUN([SPL_AC_KERNEL_INODE_FALLOCATE], [
+	AC_MSG_CHECKING([whether iops->fallocate() exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		long (*fallocate) (struct inode *, int, loff_t, loff_t) = NULL;
+		struct inode_operations fops __attribute__ ((unused)) = {
+			.fallocate = fallocate,
+		};
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_FALLOCATE, 1, [fops->fallocate() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # PaX Linux 2.6.38 - 3.x API
+dnl #
+AC_DEFUN([SPL_AC_PAX_KERNEL_FILE_FALLOCATE], [
+	AC_MSG_CHECKING([whether fops->fallocate() exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		long (*fallocate) (struct file *, int, loff_t, loff_t) = NULL;
+		struct file_operations_no_const fops __attribute__ ((unused)) = {
+			.fallocate = fallocate,
+		};
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FILE_FALLOCATE, 1, [fops->fallocate() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # The fallocate callback was moved from the inode_operations
+dnl # structure to the file_operations structure.
+dnl #
+AC_DEFUN([SPL_AC_KERNEL_FALLOCATE], [
+	SPL_AC_KERNEL_FILE_FALLOCATE
+	SPL_AC_KERNEL_INODE_FALLOCATE
+	SPL_AC_PAX_KERNEL_FILE_FALLOCATE
+])
+
+dnl #
+dnl # zlib inflate compat,
+dnl # Verify the kernel has CONFIG_ZLIB_INFLATE support enabled.
+dnl #
+AC_DEFUN([SPL_AC_CONFIG_ZLIB_INFLATE], [
+	AC_MSG_CHECKING([whether CONFIG_ZLIB_INFLATE is defined])
+	SPL_LINUX_TRY_COMPILE([
+		#if !defined(CONFIG_ZLIB_INFLATE) && \
+		    !defined(CONFIG_ZLIB_INFLATE_MODULE)
+		#error CONFIG_ZLIB_INFLATE not defined
+		#endif
+	],[ ],[
+		AC_MSG_RESULT([yes])
+	],[
+		AC_MSG_RESULT([no])
+		AC_MSG_ERROR([
+	*** This kernel does not include the required zlib inflate support.
+	*** Rebuild the kernel with CONFIG_ZLIB_INFLATE=y|m set.])
+	])
+])
+
+dnl #
+dnl # zlib deflate compat,
+dnl # Verify the kernel has CONFIG_ZLIB_DEFLATE support enabled.
+dnl #
+AC_DEFUN([SPL_AC_CONFIG_ZLIB_DEFLATE], [
+	AC_MSG_CHECKING([whether CONFIG_ZLIB_DEFLATE is defined])
+	SPL_LINUX_TRY_COMPILE([
+		#if !defined(CONFIG_ZLIB_DEFLATE) && \
+		    !defined(CONFIG_ZLIB_DEFLATE_MODULE)
+		#error CONFIG_ZLIB_DEFLATE not defined
+		#endif
+	],[ ],[
+		AC_MSG_RESULT([yes])
+	],[
+		AC_MSG_RESULT([no])
+		AC_MSG_ERROR([
+	*** This kernel does not include the required zlib deflate support.
+	*** Rebuild the kernel with CONFIG_ZLIB_DEFLATE=y|m set.])
+	])
+])
+
+dnl #
+dnl # 2.6.39 API compat,
+dnl # The function zlib_deflate_workspacesize() now take 2 arguments.
+dnl # This was done to avoid always having to allocate the maximum size
+dnl # workspace (268K).  The caller can now specific the windowBits and
+dnl # memLevel compression parameters to get a smaller workspace.
+dnl #
+AC_DEFUN([SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE],
+	[AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/zlib.h>
+	],[
+		return zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1,
+		          [zlib_deflate_workspacesize() wants 2 args])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.39 API change,
+dnl # Shrinker adjust to use common shrink_control structure.
+dnl #
+AC_DEFUN([SPL_AC_SHRINK_CONTROL_STRUCT], [
+	AC_MSG_CHECKING([whether struct shrink_control exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/mm.h>
+	],[
+		struct shrink_control sc __attribute__ ((unused));
+
+		sc.nr_to_scan = 0;
+		sc.gfp_mask = GFP_KERNEL;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1,
+			[struct shrink_control exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 3.1 API Change
+dnl #
+dnl # The rw_semaphore.wait_lock member was changed from spinlock_t to
+dnl # raw_spinlock_t at commit ddb6c9b58a19edcfac93ac670b066c836ff729f1.
+dnl #
+AC_DEFUN([SPL_AC_RWSEM_SPINLOCK_IS_RAW], [
+	AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw])
+	tmp_flags="$EXTRA_KCFLAGS"
+	EXTRA_KCFLAGS="-Werror"
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/rwsem.h>
+	],[
+		struct rw_semaphore dummy_semaphore __attribute__ ((unused));
+		raw_spinlock_t dummy_lock __attribute__ ((unused));
+		dummy_semaphore.wait_lock = dummy_lock;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(RWSEM_SPINLOCK_IS_RAW, 1,
+		[struct rw_semaphore member wait_lock is raw_spinlock_t])
+	],[
+		AC_MSG_RESULT(no)
+	])
+	EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # 3.9 API change,
+dnl # Moved things from linux/sched.h to linux/sched/rt.h
+dnl #
+AC_DEFUN([SPL_AC_SCHED_RT_HEADER],
+	[AC_MSG_CHECKING([whether header linux/sched/rt.h exists])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/sched.h>
+		#include <linux/sched/rt.h>
+	],[
+		return 0;
+	],[
+		AC_DEFINE(HAVE_SCHED_RT_HEADER, 1, [linux/sched/rt.h exists])
+		AC_MSG_RESULT(yes)
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 3.9 API change,
+dnl # vfs_getattr() uses 2 args
+dnl # It takes struct path * instead of struct vfsmount * and struct dentry *
+dnl #
+AC_DEFUN([SPL_AC_2ARGS_VFS_GETATTR], [
+	AC_MSG_CHECKING([whether vfs_getattr() wants])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/fs.h>
+	],[
+		vfs_getattr((struct path *) NULL,
+			(struct kstat *)NULL);
+	],[
+		AC_MSG_RESULT(2 args)
+		AC_DEFINE(HAVE_2ARGS_VFS_GETATTR, 1,
+		          [vfs_getattr wants 2 args])
+	],[
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/fs.h>
+		],[
+			vfs_getattr((struct vfsmount *)NULL,
+				(struct dentry *)NULL,
+				(struct kstat *)NULL);
+		],[
+			AC_MSG_RESULT(3 args)
+		],[
+			AC_MSG_ERROR(unknown)
+		])
+	])
+])
+
+dnl #
+dnl # 2.6.36 API compatibility.
+dnl # Added usleep_range timer.
+dnl # usleep_range is a finer precision implementation of msleep
+dnl # designed to be a drop-in replacement for udelay where a precise
+dnl # sleep / busy-wait is unnecessary.
+dnl #
+AC_DEFUN([SPL_AC_USLEEP_RANGE], [
+	AC_MSG_CHECKING([whether usleep_range() is available])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/delay.h>
+	],[
+		usleep_range(0, 0);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_USLEEP_RANGE, 1,
+		          [usleep_range is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # 2.6.35 API change,
+dnl # The cachep->gfpflags member was renamed cachep->allocflags.  These are
+dnl # private allocation flags which are applied when allocating a new slab
+dnl # in kmem_getpages().  Unfortunately there is no public API for setting
+dnl # non-default flags.
+dnl #
+AC_DEFUN([SPL_AC_KMEM_CACHE_ALLOCFLAGS], [
+	AC_MSG_CHECKING([whether struct kmem_cache has allocflags])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/slab.h>
+	],[
+		struct kmem_cache cachep __attribute__ ((unused));
+		cachep.allocflags = GFP_KERNEL;
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_KMEM_CACHE_ALLOCFLAGS, 1,
+			[struct kmem_cache has allocflags])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether struct kmem_cache has gfpflags])
+		SPL_LINUX_TRY_COMPILE([
+			#include <linux/slab.h>
+		],[
+			struct kmem_cache cachep __attribute__ ((unused));
+			cachep.gfpflags = GFP_KERNEL;
+		],[
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_KMEM_CACHE_GFPFLAGS, 1,
+				[struct kmem_cache has gfpflags])
+		],[
+			AC_MSG_RESULT(no)
+		])
+	])
+])
+
+dnl #
+dnl # 3.17 API change,
+dnl # wait_on_bit() no longer requires an action argument. The former
+dnl # "wait_on_bit" interface required an 'action' function to be provided
+dnl # which does the actual waiting. There were over 20 such functions in the
+dnl # kernel, many of them identical, though most cases can be satisfied by one
+dnl # of just two functions: one which uses io_schedule() and one which just
+dnl # uses schedule().  This API change was made to consolidate all of those
+dnl # redundant wait functions.
+dnl #
+AC_DEFUN([SPL_AC_WAIT_ON_BIT], [
+	AC_MSG_CHECKING([whether wait_on_bit() takes an action])
+	SPL_LINUX_TRY_COMPILE([
+		#include <linux/wait.h>
+	],[
+		int (*action)(void *) = NULL;
+		wait_on_bit(NULL, 0, action, 0);
+	],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff -Naur spl-0.6.5.7/configure.ac spl-0.6.5.7.new/configure.ac
--- spl-0.6.5.7/configure.ac	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/configure.ac	2016-08-01 16:43:23.436766051 +0200
@@ -54,6 +54,8 @@
 	man/man5/Makefile
 	lib/Makefile
 	cmd/Makefile
+	cmd/splat/Makefile
+	cmd/splslab/Makefile
 	module/Makefile
 	module/spl/Makefile
 	module/splat/Makefile
diff -Naur spl-0.6.5.7/include/linux/file_compat.h spl-0.6.5.7.new/include/linux/file_compat.h
--- spl-0.6.5.7/include/linux/file_compat.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/linux/file_compat.h	2016-08-01 16:43:34.278796336 +0200
@@ -76,8 +76,25 @@
 #define	spl_filp_fsync(fp, sync)	vfs_fsync(fp, (fp)->f_dentry, sync)
 #endif /* HAVE_2ARGS_VFS_FSYNC */
 
+#ifdef HAVE_INODE_LOCK_SHARED
+#define	spl_inode_lock(ip)		inode_lock(ip)
+#define	spl_inode_unlock(ip)		inode_unlock(ip)
+#define	spl_inode_lock_shared(ip)	inode_lock_shared(ip)
+#define	spl_inode_unlock_shared(ip)	inode_unlock_shared(ip)
+#define	spl_inode_trylock(ip)		inode_trylock(ip)
+#define	spl_inode_trylock_shared(ip)	inode_trylock_shared(ip)
+#define	spl_inode_is_locked(ip)		inode_is_locked(ip)
+#define	spl_inode_lock_nested(ip, s)	inode_lock_nested(ip, s)
+#else
 #define	spl_inode_lock(ip)		mutex_lock(&(ip)->i_mutex)
 #define	spl_inode_unlock(ip)		mutex_unlock(&(ip)->i_mutex)
+#define	spl_inode_lock_shared(ip)	mutex_lock(&(ip)->i_mutex)
+#define	spl_inode_unlock_shared(ip)	mutex_unlock(&(ip)->i_mutex)
+#define	spl_inode_trylock(ip)		mutex_trylock(&(ip)->i_mutex)
+#define	spl_inode_trylock_shared(ip)	mutex_trylock(&(ip)->i_mutex)
+#define	spl_inode_is_locked(ip)		mutex_is_locked(&(ip)->i_mutex)
+#define	spl_inode_lock_nested(ip, s)	mutex_lock_nested(&(ip)->i_mutex, s)
+#endif
 
 #endif /* SPL_FILE_COMPAT_H */
 
diff -Naur spl-0.6.5.7/include/linux/rwsem_compat.h spl-0.6.5.7.new/include/linux/rwsem_compat.h
--- spl-0.6.5.7/include/linux/rwsem_compat.h	2015-09-19 21:51:19.000000000 +0200
+++ spl-0.6.5.7.new/include/linux/rwsem_compat.h	2016-08-01 16:43:34.281796344 +0200
@@ -27,6 +27,26 @@
 
 #include <linux/rwsem.h>
 
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+#define	SPL_RWSEM_SINGLE_READER_VALUE	(1)
+#define	SPL_RWSEM_SINGLE_WRITER_VALUE	(-1)
+#else
+#define	SPL_RWSEM_SINGLE_READER_VALUE	(RWSEM_ACTIVE_READ_BIAS)
+#define	SPL_RWSEM_SINGLE_WRITER_VALUE	(RWSEM_ACTIVE_WRITE_BIAS)
+#endif
+
+/* Linux 3.16 changed activity to count for rwsem-spinlock */
+#if defined(HAVE_RWSEM_ACTIVITY)
+#define	RWSEM_COUNT(sem)	sem->activity
+/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */
+#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT)
+#define	RWSEM_COUNT(sem)	atomic_long_read(&(sem)->count)
+#else
+#define	RWSEM_COUNT(sem)	sem->count
+#endif
+
+int rwsem_tryupgrade(struct rw_semaphore *rwsem);
+
 #if defined(RWSEM_SPINLOCK_IS_RAW)
 #define spl_rwsem_lock_irqsave(lk, fl)       raw_spin_lock_irqsave(lk, fl)
 #define spl_rwsem_unlock_irqrestore(lk, fl)  raw_spin_unlock_irqrestore(lk, fl)
diff -Naur spl-0.6.5.7/include/sys/byteorder.h spl-0.6.5.7.new/include/sys/byteorder.h
--- spl-0.6.5.7/include/sys/byteorder.h	2013-03-22 23:19:11.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/byteorder.h	2016-08-01 16:43:33.091793020 +0200
@@ -26,6 +26,7 @@
 #define _SPL_BYTEORDER_H
 
 #include <asm/byteorder.h>
+#include <sys/isa_defs.h>
 
 #define LE_16(x)	cpu_to_le16(x)
 #define LE_32(x)	cpu_to_le32(x)
@@ -43,4 +44,26 @@
 #define BE_IN32(xa) \
 	(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
 
+#ifdef _BIG_ENDIAN
+static __inline__ uint64_t
+htonll(uint64_t n) {
+	return (n);
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n) {
+	return (n);
+}
+#else
+static __inline__ uint64_t
+htonll(uint64_t n) {
+	return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n) {
+	return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
+}
+#endif
+
 #endif /* SPL_BYTEORDER_H */
diff -Naur spl-0.6.5.7/include/sys/condvar.h spl-0.6.5.7.new/include/sys/condvar.h
--- spl-0.6.5.7/include/sys/condvar.h	2015-09-19 21:51:19.000000000 +0200
+++ spl-0.6.5.7.new/include/sys/condvar.h	2016-08-01 16:43:34.276796330 +0200
@@ -59,6 +59,8 @@
 extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
 extern clock_t cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
     hrtime_t res, int flag);
+extern clock_t cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
+    hrtime_t res, int flag);
 extern void __cv_signal(kcondvar_t *);
 extern void __cv_broadcast(kcondvar_t *c);
 
diff -Naur spl-0.6.5.7/include/sys/dkioc_free_util.h spl-0.6.5.7.new/include/sys/dkioc_free_util.h
--- spl-0.6.5.7/include/sys/dkioc_free_util.h	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/dkioc_free_util.h	2016-08-01 16:43:34.275796327 +0200
@@ -0,0 +1,58 @@
+/*****************************************************************************\
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *  For details, see <http://zfsonlinux.org/>.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _SPL_DKIOC_UTIL_H
+#define	_SPL_DKIOC_UTIL_H
+
+#include <sys/dkio.h>
+
+typedef struct dkioc_free_list_ext_s {
+	uint64_t		dfle_start;
+	uint64_t		dfle_length;
+} dkioc_free_list_ext_t;
+
+typedef struct dkioc_free_list_s {
+	uint64_t		dfl_flags;
+	uint64_t		dfl_num_exts;
+	int64_t			dfl_offset;
+
+	/*
+	 * N.B. this is only an internal debugging API! This is only called
+	 * from debug builds of sd for pre-release checking. Remove before GA!
+	 */
+	void			(*dfl_ck_func)(uint64_t, uint64_t, void *);
+	void			*dfl_ck_arg;
+
+	dkioc_free_list_ext_t	dfl_exts[1];
+} dkioc_free_list_t;
+
+static inline void dfl_free(dkioc_free_list_t *dfl) {
+	vmem_free(dfl, DFL_SZ(dfl->dfl_num_exts));
+}
+
+static inline dkioc_free_list_t *dfl_alloc(uint64_t dfl_num_exts, int flags) {
+	return vmem_zalloc(DFL_SZ(dfl_num_exts), flags);
+}
+
+#endif /* _SPL_DKIOC_UTIL_H */
diff -Naur spl-0.6.5.7/include/sys/dkio.h spl-0.6.5.7.new/include/sys/dkio.h
--- spl-0.6.5.7/include/sys/dkio.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/dkio.h	2016-08-01 16:43:23.435766048 +0200
@@ -25,14 +25,16 @@
 #ifndef _SPL_DKIO_H
 #define	_SPL_DKIO_H
 
-struct dk_callback {
-	void (*dkc_callback)(void *dkc_cookie, int error);
-	void *dkc_cookie;
-	int dkc_flag;
-};
+#define	DFL_SZ(num_exts) \
+	(sizeof (dkioc_free_list_t) + (num_exts - 1) * 16)
 
-#define	DKIOC			(0x04 << 8)
-#define	DKIOCFLUSHWRITECACHE	(DKIOC | 34)
-#define	DKIOCTRIM		(DKIOC | 35)
+#define	DKIOC		(0x04 << 8)
+#define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
+
+/*
+ * ioctl to free space (e.g. SCSI UNMAP) off a disk.
+ * Pass a dkioc_free_list_t containing a list of extents to be freed.
+ */
+#define	DKIOCFREE	(DKIOC|50)
 
 #endif /* _SPL_DKIO_H */
diff -Naur spl-0.6.5.7/include/sys/isa_defs.h spl-0.6.5.7.new/include/sys/isa_defs.h
--- spl-0.6.5.7/include/sys/isa_defs.h	2016-03-22 19:59:29.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/isa_defs.h	2016-08-01 16:43:34.280796341 +0200
@@ -44,6 +44,9 @@
 #define _LP64
 #endif
 
+#define _ALIGNMENT_REQUIRED            1
+
+
 /* i386 arch specific defines */
 #elif defined(__i386) || defined(__i386__)
 
@@ -59,6 +62,8 @@
 #define _ILP32
 #endif
 
+#define _ALIGNMENT_REQUIRED            0
+
 /* powerpc (ppc64) arch specific defines */
 #elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
 
@@ -80,6 +85,12 @@
 #endif
 #endif
 
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for PPC, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED            1
+
 /* arm arch specific defines */
 #elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
 
@@ -107,6 +118,12 @@
 #define _BIG_ENDIAN
 #endif
 
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for ARM, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED            1
+
 /* sparc arch specific defines */
 #elif defined(__sparc) || defined(__sparc__)
 
@@ -130,6 +147,7 @@
 
 #define _BIG_ENDIAN
 #define _SUNOS_VTOC_16
+#define _ALIGNMENT_REQUIRED            1
 
 /* s390 arch specific defines */
 #elif defined(__s390__)
@@ -145,7 +163,40 @@
 
 #define	_BIG_ENDIAN
 
-#else /* Currently x86_64, i386, arm, powerpc, s390, and sparc are supported */
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for s390, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED            1
+
+/* MIPS arch specific defines */
+#elif defined(__mips__)
+
+#if defined(__MIPSEB__)
+#define	_BIG_ENDIAN
+#elif defined(__MIPSEL__)
+#define	_LITTLE_ENDIAN
+#else
+#error MIPS no endian specified
+#endif
+
+#ifndef _LP64
+#define	_ILP32
+#endif
+
+#define	_SUNOS_VTOC_16
+
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for MIPS, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED            1
+
+#else
+/*
+ * Currently supported:
+ * x86_64, i386, arm, powerpc, s390, sparc, and mips
+ */
 #error "Unsupported ISA type"
 #endif
 
diff -Naur spl-0.6.5.7/include/sys/kobj.h spl-0.6.5.7.new/include/sys/kobj.h
--- spl-0.6.5.7/include/sys/kobj.h	2013-03-22 23:19:11.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/kobj.h	2016-08-01 16:43:32.403791098 +0200
@@ -35,8 +35,8 @@
 
 extern struct _buf *kobj_open_file(const char *name);
 extern void kobj_close_file(struct _buf *file);
-extern int kobj_read_file(struct _buf *file, char *buf,
-			  ssize_t size, offset_t off);
+extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
+    unsigned off);
 extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
 
 #endif /* SPL_KOBJ_H */
diff -Naur spl-0.6.5.7/include/sys/Makefile.am spl-0.6.5.7.new/include/sys/Makefile.am
--- spl-0.6.5.7/include/sys/Makefile.am	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/Makefile.am	2016-08-01 16:43:23.435766048 +0200
@@ -29,6 +29,7 @@
 	$(top_srcdir)/include/sys/dirent.h \
 	$(top_srcdir)/include/sys/disp.h \
 	$(top_srcdir)/include/sys/dkio.h \
+	$(top_srcdir)/include/sys/dkioc_free_util.h \
 	$(top_srcdir)/include/sys/dklabel.h \
 	$(top_srcdir)/include/sys/dnlc.h \
 	$(top_srcdir)/include/sys/dumphdr.h \
diff -Naur spl-0.6.5.7/include/sys/mutex.h spl-0.6.5.7.new/include/sys/mutex.h
--- spl-0.6.5.7/include/sys/mutex.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/mutex.h	2016-08-01 16:43:34.275796327 +0200
@@ -28,17 +28,25 @@
 #include <sys/types.h>
 #include <linux/mutex.h>
 #include <linux/compiler_compat.h>
+#include <linux/lockdep.h>
 
 typedef enum {
 	MUTEX_DEFAULT	= 0,
 	MUTEX_SPIN	= 1,
-	MUTEX_ADAPTIVE	= 2
+	MUTEX_ADAPTIVE	= 2,
+	MUTEX_NOLOCKDEP	= 3
 } kmutex_type_t;
 
 typedef struct {
 	struct mutex		m_mutex;
 	spinlock_t		m_lock;	/* used for serializing mutex_exit */
+#ifndef HAVE_MUTEX_OWNER
+	/* only when kernel doesn't have owner */
 	kthread_t		*m_owner;
+#endif
+#ifdef CONFIG_LOCKDEP
+	kmutex_type_t		m_type;
+#endif /* CONFIG_LOCKDEP */
 } kmutex_t;
 
 #define	MUTEX(mp)		(&((mp)->m_mutex))
@@ -46,20 +54,56 @@
 static inline void
 spl_mutex_set_owner(kmutex_t *mp)
 {
+	/*
+	 * kernel will handle its owner, so we don't need to do anything if it
+	 * is defined.
+	 */
+#ifndef HAVE_MUTEX_OWNER
 	mp->m_owner = current;
+#endif
 }
 
 static inline void
 spl_mutex_clear_owner(kmutex_t *mp)
 {
+#ifndef HAVE_MUTEX_OWNER
 	mp->m_owner = NULL;
+#endif
 }
 
+#ifdef HAVE_MUTEX_OWNER
+#define	mutex_owner(mp)		(ACCESS_ONCE(MUTEX(mp)->owner))
+#else
 #define	mutex_owner(mp)		(ACCESS_ONCE((mp)->m_owner))
+#endif
 #define	mutex_owned(mp)		(mutex_owner(mp) == current)
 #define	MUTEX_HELD(mp)		mutex_owned(mp)
 #define	MUTEX_NOT_HELD(mp)	(!MUTEX_HELD(mp))
 
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_mutex_set_type(kmutex_t *mp, kmutex_type_t type)
+{
+	mp->m_type = type;
+}
+static inline void
+spl_mutex_lockdep_off_maybe(kmutex_t *mp)			\
+{								\
+	if (mp && mp->m_type == MUTEX_NOLOCKDEP)		\
+		lockdep_off();					\
+}
+static inline void
+spl_mutex_lockdep_on_maybe(kmutex_t *mp)			\
+{								\
+	if (mp && mp->m_type == MUTEX_NOLOCKDEP)		\
+		lockdep_on();					\
+}
+#else  /* CONFIG_LOCKDEP */
+#define spl_mutex_set_type(mp, type)
+#define spl_mutex_lockdep_off_maybe(mp)
+#define spl_mutex_lockdep_on_maybe(mp)
+#endif /* CONFIG_LOCKDEP */
+
 /*
  * The following functions must be a #define and not static inline.
  * This ensures that the native linux mutex functions (lock/unlock)
@@ -70,11 +114,12 @@
 #define	mutex_init(mp, name, type, ibc)				\
 {								\
 	static struct lock_class_key __key;			\
-	ASSERT(type == MUTEX_DEFAULT);				\
+	ASSERT(type == MUTEX_DEFAULT || type == MUTEX_NOLOCKDEP); \
 								\
 	__mutex_init(MUTEX(mp), (name) ? (#name) : (#mp), &__key); \
 	spin_lock_init(&(mp)->m_lock);				\
 	spl_mutex_clear_owner(mp);				\
+	spl_mutex_set_type(mp, type);				\
 }
 
 #undef mutex_destroy
@@ -87,8 +132,10 @@
 ({								\
 	int _rc_;						\
 								\
+	spl_mutex_lockdep_off_maybe(mp);			\
 	if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1)		\
 		spl_mutex_set_owner(mp);			\
+	spl_mutex_lockdep_on_maybe(mp);				\
 								\
 	_rc_;							\
 })
@@ -97,14 +144,18 @@
 #define	mutex_enter_nested(mp, subclass)			\
 {								\
 	ASSERT3P(mutex_owner(mp), !=, current);			\
+	spl_mutex_lockdep_off_maybe(mp);			\
 	mutex_lock_nested(MUTEX(mp), (subclass));		\
+	spl_mutex_lockdep_on_maybe(mp);				\
 	spl_mutex_set_owner(mp);				\
 }
 #else /* CONFIG_DEBUG_LOCK_ALLOC */
 #define	mutex_enter_nested(mp, subclass)			\
 {								\
 	ASSERT3P(mutex_owner(mp), !=, current);			\
+	spl_mutex_lockdep_off_maybe(mp);			\
 	mutex_lock(MUTEX(mp));					\
+	spl_mutex_lockdep_on_maybe(mp);				\
 	spl_mutex_set_owner(mp);				\
 }
 #endif /*  CONFIG_DEBUG_LOCK_ALLOC */
@@ -132,10 +183,12 @@
  */
 #define	mutex_exit(mp)						\
 {								\
+	spl_mutex_lockdep_off_maybe(mp);			\
 	spin_lock(&(mp)->m_lock);				\
 	spl_mutex_clear_owner(mp);				\
 	mutex_unlock(MUTEX(mp));				\
 	spin_unlock(&(mp)->m_lock);				\
+	spl_mutex_lockdep_on_maybe(mp);				\
 }
 
 int spl_mutex_init(void);
diff -Naur spl-0.6.5.7/include/sys/random.h spl-0.6.5.7.new/include/sys/random.h
--- spl-0.6.5.7/include/sys/random.h	2013-03-22 23:19:11.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/random.h	2016-08-01 16:43:33.091793020 +0200
@@ -35,11 +35,6 @@
 	return 0;
 }
 
-static __inline__ int
-random_get_pseudo_bytes(uint8_t *ptr, size_t len)
-{
-	get_random_bytes((void *)ptr,(int)len);
-	return 0;
-}
+extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
 
 #endif	/* _SPL_RANDOM_H */
diff -Naur spl-0.6.5.7/include/sys/rwlock.h spl-0.6.5.7.new/include/sys/rwlock.h
--- spl-0.6.5.7/include/sys/rwlock.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/rwlock.h	2016-08-01 16:43:34.279796338 +0200
@@ -30,55 +30,86 @@
 #include <linux/rwsem_compat.h>
 
 typedef enum {
-        RW_DRIVER  = 2,
-        RW_DEFAULT = 4
+	RW_DRIVER	= 2,
+	RW_DEFAULT	= 4,
+	RW_NOLOCKDEP	= 5
 } krw_type_t;
 
 typedef enum {
-        RW_NONE   = 0,
-        RW_WRITER = 1,
-        RW_READER = 2
+	RW_NONE		= 0,
+	RW_WRITER	= 1,
+	RW_READER	= 2
 } krw_t;
 
+/*
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner
+ * field, so we don't need our own.
+ */
 typedef struct {
-        struct rw_semaphore rw_rwlock;
-        kthread_t *rw_owner;
+	struct rw_semaphore rw_rwlock;
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+	kthread_t *rw_owner;
+#endif
+#ifdef CONFIG_LOCKDEP
+	krw_type_t	rw_type;
+#endif /* CONFIG_LOCKDEP */
 } krwlock_t;
 
-#define SEM(rwp)                        ((struct rw_semaphore *)(rwp))
+#define SEM(rwp)	(&(rwp)->rw_rwlock)
 
 static inline void
 spl_rw_set_owner(krwlock_t *rwp)
 {
-        unsigned long flags;
-
-        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
-        rwp->rw_owner = current;
-        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
+/*
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write,
+ * downgrade_write and __init_rwsem will set/clear owner for us.
+ */
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+	rwp->rw_owner = current;
+#endif
 }
 
 static inline void
 spl_rw_clear_owner(krwlock_t *rwp)
 {
-        unsigned long flags;
-
-        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
-        rwp->rw_owner = NULL;
-        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+	rwp->rw_owner = NULL;
+#endif
 }
 
 static inline kthread_t *
 rw_owner(krwlock_t *rwp)
 {
-        unsigned long flags;
-        kthread_t *owner;
-
-        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
-        owner = rwp->rw_owner;
-        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+	return SEM(rwp)->owner;
+#else
+	return rwp->rw_owner;
+#endif
+}
 
-        return owner;
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
+{
+	rwp->rw_type = type;
 }
+static inline void
+spl_rw_lockdep_off_maybe(krwlock_t *rwp)		\
+{							\
+	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
+		lockdep_off();				\
+}
+static inline void
+spl_rw_lockdep_on_maybe(krwlock_t *rwp)			\
+{							\
+	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
+		lockdep_on();				\
+}
+#else  /* CONFIG_LOCKDEP */
+#define spl_rw_set_type(rwp, type)
+#define spl_rw_lockdep_off_maybe(rwp)
+#define spl_rw_lockdep_on_maybe(rwp)
+#endif /* CONFIG_LOCKDEP */
 
 static inline int
 RW_READ_HELD(krwlock_t *rwp)
@@ -89,7 +120,7 @@
 static inline int
 RW_WRITE_HELD(krwlock_t *rwp)
 {
-	return (spl_rwsem_is_locked(SEM(rwp)) && rw_owner(rwp) == current);
+	return (rw_owner(rwp) == current);
 }
 
 static inline int
@@ -104,107 +135,101 @@
  * will be correctly located in the users code which is important
  * for the built in kernel lock analysis tools
  */
-#define rw_init(rwp, name, type, arg)                                   \
-({                                                                      \
-        static struct lock_class_key __key;                             \
-                                                                        \
-        __init_rwsem(SEM(rwp), #rwp, &__key);                           \
-        spl_rw_clear_owner(rwp);                                        \
-})
-
-#define rw_destroy(rwp)                                                 \
-({                                                                      \
-        VERIFY(!RW_LOCK_HELD(rwp));                                     \
-})
-
-#define rw_tryenter(rwp, rw)                                            \
-({                                                                      \
-        int _rc_ = 0;                                                   \
-                                                                        \
-        switch (rw) {                                                   \
-        case RW_READER:                                                 \
-                _rc_ = down_read_trylock(SEM(rwp));                     \
-                break;                                                  \
-        case RW_WRITER:                                                 \
-                if ((_rc_ = down_write_trylock(SEM(rwp))))              \
-                        spl_rw_set_owner(rwp);                          \
-                break;                                                  \
-        default:                                                        \
-                VERIFY(0);                                              \
-        }                                                               \
-        _rc_;                                                           \
-})
-
-#define rw_enter(rwp, rw)                                               \
-({                                                                      \
-        switch (rw) {                                                   \
-        case RW_READER:                                                 \
-                down_read(SEM(rwp));                                    \
-                break;                                                  \
-        case RW_WRITER:                                                 \
-                down_write(SEM(rwp));                                   \
-                spl_rw_set_owner(rwp);                                  \
-                break;                                                  \
-        default:                                                        \
-                VERIFY(0);                                              \
-        }                                                               \
-})
-
-#define rw_exit(rwp)                                                    \
-({                                                                      \
-        if (RW_WRITE_HELD(rwp)) {                                       \
-                spl_rw_clear_owner(rwp);                                \
-                up_write(SEM(rwp));                                     \
-        } else {                                                        \
-                ASSERT(RW_READ_HELD(rwp));                              \
-                up_read(SEM(rwp));                                      \
-        }                                                               \
-})
-
-#define rw_downgrade(rwp)                                               \
-({                                                                      \
-        spl_rw_clear_owner(rwp);                                        \
-        downgrade_write(SEM(rwp));                                      \
+#define rw_init(rwp, name, type, arg)					\
+({									\
+	static struct lock_class_key __key;				\
+	ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP);		\
+									\
+	__init_rwsem(SEM(rwp), #rwp, &__key);				\
+	spl_rw_clear_owner(rwp);					\
+	spl_rw_set_type(rwp, type);					\
+})
+
+#define rw_destroy(rwp)							\
+({									\
+	VERIFY(!RW_LOCK_HELD(rwp));					\
+})
+
+#define rw_tryenter(rwp, rw)						\
+({									\
+	int _rc_ = 0;							\
+									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	switch (rw) {							\
+	case RW_READER:							\
+		_rc_ = down_read_trylock(SEM(rwp));			\
+		break;							\
+	case RW_WRITER:							\
+		if ((_rc_ = down_write_trylock(SEM(rwp))))		\
+			spl_rw_set_owner(rwp);				\
+		break;							\
+	default:							\
+		VERIFY(0);						\
+	}								\
+	spl_rw_lockdep_on_maybe(rwp);					\
+	_rc_;								\
+})
+
+#define rw_enter(rwp, rw)						\
+({									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	switch (rw) {							\
+	case RW_READER:							\
+		down_read(SEM(rwp));					\
+		break;							\
+	case RW_WRITER:							\
+		down_write(SEM(rwp));					\
+		spl_rw_set_owner(rwp);					\
+		break;							\
+	default:							\
+		VERIFY(0);						\
+	}								\
+	spl_rw_lockdep_on_maybe(rwp);					\
+})
+
+#define rw_exit(rwp)							\
+({									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	if (RW_WRITE_HELD(rwp)) {					\
+		spl_rw_clear_owner(rwp);				\
+		up_write(SEM(rwp));					\
+	} else {							\
+		ASSERT(RW_READ_HELD(rwp));				\
+		up_read(SEM(rwp));					\
+	}								\
+	spl_rw_lockdep_on_maybe(rwp);					\
+})
+
+#define rw_downgrade(rwp)						\
+({									\
+	spl_rw_lockdep_off_maybe(rwp);					\
+	spl_rw_clear_owner(rwp);					\
+	downgrade_write(SEM(rwp));					\
+	spl_rw_lockdep_on_maybe(rwp);					\
 })
 
-#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
 /*
- * For the generic implementations of rw-semaphores the following is
- * true.  If your semaphore implementation internally represents the
- * semaphore state differently then special case handling is required.
- * - if activity/count is 0 then there are no active readers or writers
- * - if activity/count is +ve then that is the number of active readers
- * - if activity/count is -1 then there is one active writer
+ * This implementation of rw_tryupgrade() behaves slightly differently
+ * from its counterparts on other platforms.  It drops the RW_READER lock
+ * and then acquires the RW_WRITER lock leaving a small window where no
+ * lock is held.  On other platforms the lock is never released during
+ * the upgrade process.  This is necessary under Linux because the kernel
+ * does not provide an upgrade function.
  */
-
-extern void __up_read_locked(struct rw_semaphore *);
-extern int __down_write_trylock_locked(struct rw_semaphore *);
-
-#define rw_tryupgrade(rwp)                                              \
-({                                                                      \
-        unsigned long _flags_;                                          \
-        int _rc_ = 0;                                                   \
-                                                                        \
-        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, _flags_);           \
-        if ((list_empty(&SEM(rwp)->wait_list)) &&                       \
-            (SEM(rwp)->activity == 1)) {                                \
-                __up_read_locked(SEM(rwp));                             \
-                VERIFY(_rc_ = __down_write_trylock_locked(SEM(rwp)));   \
-                (rwp)->rw_owner = current;                              \
-        }                                                               \
-        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, _flags_);      \
-        _rc_;                                                           \
+#define rw_tryupgrade(rwp)						\
+({									\
+	int _rc_ = 0;							\
+									\
+	if (RW_WRITE_HELD(rwp)) {					\
+		_rc_ = 1;						\
+	} else {							\
+		spl_rw_lockdep_off_maybe(rwp);				\
+		if ((_rc_ = rwsem_tryupgrade(SEM(rwp))))		\
+			spl_rw_set_owner(rwp);				\
+		spl_rw_lockdep_on_maybe(rwp);				\
+	}								\
+	_rc_;								\
 })
-#else
-/*
- * rw_tryupgrade() can be implemented correctly but for each supported
- * arch we will need a custom implementation.  For the x86 implementation
- * it looks like a custom cmpxchg() to atomically check and promote the
- * rwsem would be safe.  For now that's not worth the trouble so in this
- * case rw_tryupgrade() has just been disabled.
- */
-#define rw_tryupgrade(rwp)      ({ 0; })
-#endif
 
 int spl_rw_init(void);
 void spl_rw_fini(void);
diff -Naur spl-0.6.5.7/include/sys/sunldi.h spl-0.6.5.7.new/include/sys/sunldi.h
--- spl-0.6.5.7/include/sys/sunldi.h	2013-03-22 23:19:11.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/sunldi.h	2016-08-01 16:43:33.091793020 +0200
@@ -34,23 +34,4 @@
 
 #define SECTOR_SIZE 512
 
-typedef struct modlinkage {
-	int ml_rev;
-	struct modlfs *ml_modlfs;
-	struct modldrv *ml_modldrv;
-	major_t ml_major;
-	unsigned ml_minors;
-	void *pad1;
-} modlinkage_t;
-
-typedef struct ldi_ident {
-	char li_modname[MAXNAMELEN];
-	dev_t li_dev;
-} *ldi_ident_t;
-
-typedef struct block_device *ldi_handle_t;
-
-extern int ldi_ident_from_mod(struct modlinkage *modlp, ldi_ident_t *lip);
-extern void ldi_ident_release(ldi_ident_t li);
-
 #endif /* SPL_SUNLDI_H */
diff -Naur spl-0.6.5.7/include/sys/sysmacros.h spl-0.6.5.7.new/include/sys/sysmacros.h
--- spl-0.6.5.7/include/sys/sysmacros.h	2015-12-24 01:31:01.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/sysmacros.h	2016-08-01 16:43:34.280796341 +0200
@@ -158,6 +158,9 @@
 extern void spl_setup(void);
 extern void spl_cleanup(void);
 
+#define	highbit(x)		__fls(x)
+#define	lowbit(x)		__ffs(x)
+
 #define	highbit64(x)		fls64(x)
 #define	makedevice(maj,min)	makedev(maj,min)
 
diff -Naur spl-0.6.5.7/include/sys/taskq.h spl-0.6.5.7.new/include/sys/taskq.h
--- spl-0.6.5.7/include/sys/taskq.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/taskq.h	2016-08-01 16:43:33.091793020 +0200
@@ -1,4 +1,4 @@
-/*****************************************************************************\
+/*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -20,10 +20,10 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
-\*****************************************************************************/
+ */
 
 #ifndef _SPL_TASKQ_H
-#define _SPL_TASKQ_H
+#define	_SPL_TASKQ_H
 
 #include <linux/module.h>
 #include <linux/gfp.h>
@@ -32,55 +32,67 @@
 #include <linux/kthread.h>
 #include <sys/types.h>
 #include <sys/thread.h>
+#include <sys/rwlock.h>
 
-#define TASKQ_NAMELEN		31
+#define	TASKQ_NAMELEN		31
 
-#define TASKQ_PREPOPULATE	0x00000001
-#define TASKQ_CPR_SAFE		0x00000002
-#define TASKQ_DYNAMIC		0x00000004
-#define TASKQ_THREADS_CPU_PCT	0x00000008
-#define TASKQ_DC_BATCH		0x00000010
-#define TASKQ_ACTIVE		0x80000000
+#define	TASKQ_PREPOPULATE	0x00000001
+#define	TASKQ_CPR_SAFE		0x00000002
+#define	TASKQ_DYNAMIC		0x00000004
+#define	TASKQ_THREADS_CPU_PCT	0x00000008
+#define	TASKQ_DC_BATCH		0x00000010
+#define	TASKQ_ACTIVE		0x80000000
 
 /*
  * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
  * KM_SLEEP/KM_NOSLEEP.  TQ_NOQUEUE/TQ_NOALLOC are set particularly
  * large so as not to conflict with already used GFP_* defines.
  */
-#define TQ_SLEEP		0x00000000
-#define TQ_NOSLEEP		0x00000001
-#define TQ_PUSHPAGE		0x00000002
-#define TQ_NOQUEUE		0x01000000
-#define TQ_NOALLOC		0x02000000
-#define TQ_NEW			0x04000000
-#define TQ_FRONT		0x08000000
+#define	TQ_SLEEP		0x00000000
+#define	TQ_NOSLEEP		0x00000001
+#define	TQ_PUSHPAGE		0x00000002
+#define	TQ_NOQUEUE		0x01000000
+#define	TQ_NOALLOC		0x02000000
+#define	TQ_NEW			0x04000000
+#define	TQ_FRONT		0x08000000
+
+/*
+ * spin_lock(lock) and spin_lock_nested(lock,0) are equivalent,
+ * so TQ_LOCK_DYNAMIC must not evaluate to 0
+ */
+typedef enum tq_lock_role {
+	TQ_LOCK_GENERAL =	0,
+	TQ_LOCK_DYNAMIC =	1,
+} tq_lock_role_t;
 
 typedef unsigned long taskqid_t;
 typedef void (task_func_t)(void *);
 
 typedef struct taskq {
-	spinlock_t		tq_lock;       /* protects taskq_t */
-	unsigned long		tq_lock_flags; /* interrupt state */
-	char			*tq_name;      /* taskq name */
-	struct list_head	tq_thread_list;/* list of all threads */
-	struct list_head	tq_active_list;/* list of active threads */
-	int			tq_nactive;    /* # of active threads */
-	int			tq_nthreads;   /* # of existing threads */
-	int			tq_nspawn;     /* # of threads being spawned */
-	int			tq_maxthreads; /* # of threads maximum */
-	int			tq_pri;        /* priority */
-	int			tq_minalloc;   /* min task_t pool size */
-	int			tq_maxalloc;   /* max task_t pool size */
-	int			tq_nalloc;     /* cur task_t pool size */
-	uint_t			tq_flags;      /* flags */
-	taskqid_t		tq_next_id;    /* next pend/work id */
-	taskqid_t		tq_lowest_id;  /* lowest pend/work id */
-	struct list_head	tq_free_list;  /* free task_t's */
-	struct list_head	tq_pend_list;  /* pending task_t's */
-	struct list_head	tq_prio_list;  /* priority pending task_t's */
-	struct list_head	tq_delay_list; /* delayed task_t's */
-	wait_queue_head_t	tq_work_waitq; /* new work waitq */
-	wait_queue_head_t	tq_wait_waitq; /* wait waitq */
+	spinlock_t		tq_lock;	/* protects taskq_t */
+	char			*tq_name;	/* taskq name */
+	int			tq_instance;	/* instance of tq_name */
+	struct list_head	tq_thread_list;	/* list of all threads */
+	struct list_head	tq_active_list;	/* list of active threads */
+	int			tq_nactive;	/* # of active threads */
+	int			tq_nthreads;	/* # of existing threads */
+	int			tq_nspawn;	/* # of threads being spawned */
+	int			tq_maxthreads;	/* # of threads maximum */
+	int			tq_pri;		/* priority */
+	int			tq_minalloc;	/* min taskq_ent_t pool size */
+	int			tq_maxalloc;	/* max taskq_ent_t pool size */
+	int			tq_nalloc;	/* cur taskq_ent_t pool size */
+	uint_t			tq_flags;	/* flags */
+	taskqid_t		tq_next_id;	/* next pend/work id */
+	taskqid_t		tq_lowest_id;	/* lowest pend/work id */
+	struct list_head	tq_free_list;	/* free taskq_ent_t's */
+	struct list_head	tq_pend_list;	/* pending taskq_ent_t's */
+	struct list_head	tq_prio_list;	/* priority pending taskq_ent_t's */
+	struct list_head	tq_delay_list;	/* delayed taskq_ent_t's */
+	struct list_head	tq_taskqs;	/* all taskq_t's */
+	wait_queue_head_t	tq_work_waitq;	/* new work waitq */
+	wait_queue_head_t	tq_wait_waitq;	/* wait waitq */
+	tq_lock_role_t		tq_lock_class;	/* class when taking tq_lock */
 } taskq_t;
 
 typedef struct taskq_ent {
@@ -93,10 +105,11 @@
 	void			*tqent_arg;
 	taskq_t			*tqent_taskq;
 	uintptr_t		tqent_flags;
+	unsigned long		tqent_birth;
 } taskq_ent_t;
 
-#define TQENT_FLAG_PREALLOC     0x1
-#define TQENT_FLAG_CANCEL       0x2
+#define	TQENT_FLAG_PREALLOC	0x1
+#define	TQENT_FLAG_CANCEL	0x2
 
 typedef struct taskq_thread {
 	struct list_head	tqt_thread_list;
@@ -111,6 +124,10 @@
 /* Global system-wide dynamic task queue available for all consumers */
 extern taskq_t *system_taskq;
 
+/* List of all taskqs */
+extern struct list_head tq_list;
+extern struct rw_semaphore tq_list_sem;
+
 extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
 extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
     uint_t, clock_t);
@@ -124,11 +141,11 @@
 extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
 extern void taskq_wait(taskq_t *);
 extern int taskq_cancel_id(taskq_t *, taskqid_t);
-extern int taskq_member(taskq_t *, void *);
+extern int taskq_member(taskq_t *, kthread_t *);
 
-#define taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
+#define	taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
     taskq_create(name, nthreads, pri, min, max, flags)
-#define taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
+#define	taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
     taskq_create(name, nthreads, maxclsyspri, min, max, flags)
 
 int spl_taskq_init(void);
diff -Naur spl-0.6.5.7/include/sys/time.h spl-0.6.5.7.new/include/sys/time.h
--- spl-0.6.5.7/include/sys/time.h	2015-12-24 01:31:01.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/time.h	2016-08-01 16:43:34.276796330 +0200
@@ -46,6 +46,9 @@
 #define	MSEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / MILLISEC))
 #define	NSEC2MSEC(n)	((n) / (NANOSEC / MILLISEC))
 
+#define NSEC2SEC(n)     ((n) / (NANOSEC / SEC))
+#define SEC2NSEC(m)     ((hrtime_t)(m) * (NANOSEC / SEC))
+
 static const int hz = HZ;
 
 #define	TIMESPEC_OVERFLOW(ts)		\
diff -Naur spl-0.6.5.7/include/sys/tsd.h spl-0.6.5.7.new/include/sys/tsd.h
--- spl-0.6.5.7/include/sys/tsd.h	2013-03-22 23:19:11.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/tsd.h	2016-08-01 16:43:31.475788506 +0200
@@ -35,6 +35,7 @@
 
 extern int tsd_set(uint_t, void *);
 extern void *tsd_get(uint_t);
+extern void *tsd_get_by_thread(uint_t, kthread_t *);
 extern void tsd_create(uint_t *, dtor_func_t);
 extern void tsd_destroy(uint_t *);
 extern void tsd_exit(void);
diff -Naur spl-0.6.5.7/include/sys/user.h spl-0.6.5.7.new/include/sys/user.h
--- spl-0.6.5.7/include/sys/user.h	2015-12-24 01:18:07.000000000 +0100
+++ spl-0.6.5.7.new/include/sys/user.h	2016-08-01 16:43:24.019767680 +0200
@@ -30,8 +30,8 @@
  * about the Linux task_struct. Since this is internal to our compatibility
  * layer, we make it an opaque type.
  *
- * XXX: If the descriptor changes under us, we would get an incorrect
- * reference.
+ * XXX: If the descriptor changes under us and we do not do a getf() between
+ * the change and using it, we would get an incorrect reference.
  */
 
 struct uf_info;
diff -Naur spl-0.6.5.7/man/man5/spl-module-parameters.5 spl-0.6.5.7.new/man/man5/spl-module-parameters.5
--- spl-0.6.5.7/man/man5/spl-module-parameters.5	2015-12-17 18:46:53.000000000 +0100
+++ spl-0.6.5.7.new/man/man5/spl-module-parameters.5	2016-08-01 16:43:33.091793020 +0200
@@ -44,6 +44,20 @@
 .sp
 .ne 2
 .na
+\fBspl_kmem_cache_kmem_threads\fR (uint)
+.ad
+.RS 12n
+The number of threads created for the spl_kmem_cache task queue.  This task
+queue is responsible for allocating new slabs for use by the kmem caches.
+For the majority of systems and workloads only a small number of threads are
+required.
+.sp
+Default value: \fB4\fR
+.RE
+
+.sp
+.ne 2
+.na
 \fBspl_kmem_cache_reclaim\fR (uint)
 .ad
 .RS 12n
@@ -237,6 +251,20 @@
 .RE
 
 .sp
+.ne 2
+.na
+\fBspl_taskq_kick\fR (uint)
+.ad
+.RS 12n
+Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will
+scan all the taskqs. If any of them have a pending task more than 5 seconds old,
+it will kick it to spawn more threads. This can be used if you find a rare
+deadlock occurs because one or more taskqs didn't spawn a thread when it should.
+.sp
+Default value: \fB0\fR
+.RE
+
+.sp
 .ne 2
 .na
 \fBspl_taskq_thread_bind\fR (int)
diff -Naur spl-0.6.5.7/module/spl/spl-condvar.c spl-0.6.5.7.new/module/spl/spl-condvar.c
--- spl-0.6.5.7/module/spl/spl-condvar.c	2016-05-13 04:46:57.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-condvar.c	2016-08-01 16:43:34.278796336 +0200
@@ -26,6 +26,7 @@
 
 #include <sys/condvar.h>
 #include <sys/time.h>
+#include <linux/hrtimer.h>
 
 void
 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
@@ -238,7 +239,7 @@
 	DEFINE_WAIT(wait);
 	kmutex_t *m;
 	hrtime_t time_left, now;
-	unsigned long time_left_us;
+	ktime_t ktime_left;
 
 	ASSERT(cvp);
 	ASSERT(mp);
@@ -258,7 +259,6 @@
 		atomic_dec(&cvp->cv_refs);
 		return (-1);
 	}
-	time_left_us = time_left / NSEC_PER_USEC;
 
 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 	atomic_inc(&cvp->cv_waiters);
@@ -273,7 +273,9 @@
 	 * Allow a 100 us range to give kernel an opportunity to coalesce
 	 * interrupts
 	 */
-	usleep_range(time_left_us, time_left_us + 100);
+	ktime_left = ktime_set(0, time_left);
+	schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC,
+	    HRTIMER_MODE_REL);
 
 	/* No more waiters a different mutex could be used */
 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
@@ -290,15 +292,15 @@
 
 	mutex_enter(mp);
 	time_left = expire_time - gethrtime();
-	return (time_left > 0 ? time_left : -1);
+	return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
 }
 
 /*
  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
  */
-clock_t
-cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
-    int flag)
+static clock_t
+cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag, int state)
 {
 	if (res > 1) {
 		/*
@@ -312,10 +314,27 @@
 	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
 		tim += gethrtime();
 
-	return (__cv_timedwait_hires(cvp, mp, tim, TASK_UNINTERRUPTIBLE));
+	return (__cv_timedwait_hires(cvp, mp, tim, state));
+}
+
+clock_t
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+	    TASK_UNINTERRUPTIBLE));
 }
 EXPORT_SYMBOL(cv_timedwait_hires);
 
+clock_t
+cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+	    TASK_INTERRUPTIBLE));
+}
+EXPORT_SYMBOL(cv_timedwait_sig_hires);
+
 void
 __cv_signal(kcondvar_t *cvp)
 {
diff -Naur spl-0.6.5.7/module/spl/spl-generic.c spl-0.6.5.7.new/module/spl/spl-generic.c
--- spl-0.6.5.7/module/spl/spl-generic.c	2016-05-13 04:46:57.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-generic.c	2016-08-01 16:43:33.091793020 +0200
@@ -41,6 +41,8 @@
 #include <sys/kstat.h>
 #include <sys/file.h>
 #include <linux/ctype.h>
+#include <sys/disp.h>
+#include <sys/random.h>
 #include <linux/kmod.h>
 #include <linux/math64_compat.h>
 #include <linux/proc_compat.h>
@@ -56,6 +58,112 @@
 proc_t p0 = { 0 };
 EXPORT_SYMBOL(p0);
 
+/*
+ * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
+ *
+ * "Further scramblings of Marsaglia's xorshift generators"
+ * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ *
+ * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
+ * is to provide bytes containing random numbers. It is mapped to /dev/urandom
+ * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
+ * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
+ * we can implement it using a fast PRNG that we seed using Linux' actual
+ * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
+ * with an independent seed so that all calls to random_get_pseudo_bytes() are
+ * free of atomic instructions.
+ *
+ * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
+ * to generate words larger than 128 bits will paradoxically be limited to
+ * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
+ * 128-bit words and selecting the first will implicitly select the second. If
+ * a caller finds this behavior undesireable, random_get_bytes() should be used
+ * instead.
+ *
+ * XXX: Linux interrupt handlers that trigger within the critical section
+ * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
+ * see the same numbers. Nothing in the code currently calls this in an
+ * interrupt handler, so this is considered to be okay. If that becomes a
+ * problem, we could create a set of per-cpu variables for interrupt handlers
+ * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
+ * true.
+ */
+static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy);
+
+/*
+ * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
+ * file:
+ *
+ * http://xorshift.di.unimi.it/xorshift128plus.c
+ */
+
+static inline uint64_t
+spl_rand_next(uint64_t *s) {
+	uint64_t s1 = s[0];
+	const uint64_t s0 = s[1];
+	s[0] = s0;
+	s1 ^= s1 << 23; // a
+	s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
+	return (s[1] + s0);
+}
+
+static inline void
+spl_rand_jump(uint64_t *s) {
+	static const uint64_t JUMP[] = { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
+
+	uint64_t s0 = 0;
+	uint64_t s1 = 0;
+	int i, b;
+	for(i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
+		for(b = 0; b < 64; b++) {
+			if (JUMP[i] & 1ULL << b) {
+				s0 ^= s[0];
+				s1 ^= s[1];
+			}
+			(void) spl_rand_next(s);
+		}
+
+	s[0] = s0;
+	s[1] = s1;
+}
+
+int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+	uint64_t *xp, s[2];
+
+	ASSERT(ptr);
+
+	xp = get_cpu_var(spl_pseudo_entropy);
+
+	s[0] = xp[0];
+	s[1] = xp[1];
+
+	while (len) {
+		union {
+			uint64_t ui64;
+			uint8_t byte[sizeof (uint64_t)];
+		}entropy;
+		int i = MIN(len, sizeof (uint64_t));
+
+		len -= i;
+		entropy.ui64 = spl_rand_next(s);
+
+		while (i--)
+			*ptr++ = entropy.byte[i];
+	}
+
+	xp[0] = s[0];
+	xp[1] = s[1];
+
+	put_cpu_var(spl_pseudo_entropy);
+
+	return (0);
+}
+
+
+EXPORT_SYMBOL(random_get_pseudo_bytes);
+
 #if BITS_PER_LONG == 32
 /*
  * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
@@ -491,29 +599,58 @@
 
 	rc = spl_kmem_init();
 	if (rc)
-		goto out1;
+		return (rc);
 
 	rc = spl_vmem_init();
-	if (rc)
-		goto out2;
-
-	rc = spl_kmem_cache_init();
-	if (rc)
-		goto out3;
+	if (rc) {
+		spl_kmem_fini();
+		return (rc);
+	}
 
 	return (rc);
-out3:
-	spl_vmem_fini();
-out2:
-	spl_kmem_fini();
-out1:
-	return (rc);
+}
+
+/*
+ * We initialize the random number generator with 128 bits of entropy from the
+ * system random number generator. In the improbable case that we have a zero
+ * seed, we fallback to the system jiffies, unless it is also zero, in which
+ * situation we use a preprogrammed seed. We step forward by 2^64 iterations to
+ * initialize each of the per-cpu seeds so that the sequences generated on each
+ * CPU are guaranteed to never overlap in practice.
+ */
+static void __init
+spl_random_init(void)
+{
+	uint64_t s[2];
+	int i;
+
+	get_random_bytes(s, sizeof (s));
+
+	if (s[0] == 0 && s[1] == 0) {
+		if (jiffies != 0) {
+			s[0] = jiffies;
+			s[1] = ~0 - jiffies;
+		} else {
+			(void) memcpy(s, "improbable seed", sizeof (s));
+		}
+		printk("SPL: get_random_bytes() returned 0 "
+		    "when generating random seed. Setting initial seed to "
+		    "0x%016llx%016llx.", cpu_to_be64(s[0]), cpu_to_be64(s[1]));
+	}
+
+	for (i = 0; i < NR_CPUS; i++) {
+		uint64_t *wordp = per_cpu(spl_pseudo_entropy, i);
+
+		spl_rand_jump(s);
+
+		wordp[0] = s[0];
+		wordp[1] = s[1];
+	}
 }
 
 static void
 spl_kvmem_fini(void)
 {
-	spl_kmem_cache_fini();
 	spl_vmem_fini();
 	spl_kmem_fini();
 }
@@ -523,6 +660,8 @@
 {
 	int rc = 0;
 
+	spl_random_init();
+
 	if ((rc = spl_kvmem_init()))
 		goto out1;
 
@@ -532,38 +671,43 @@
 	if ((rc = spl_rw_init()))
 		goto out3;
 
-	if ((rc = spl_taskq_init()))
+	if ((rc = spl_tsd_init()))
 		goto out4;
 
-	if ((rc = spl_vn_init()))
+	if ((rc = spl_taskq_init()))
 		goto out5;
 
-	if ((rc = spl_proc_init()))
+	if ((rc = spl_kmem_cache_init()))
 		goto out6;
 
-	if ((rc = spl_kstat_init()))
+	if ((rc = spl_vn_init()))
 		goto out7;
 
-	if ((rc = spl_tsd_init()))
+	if ((rc = spl_proc_init()))
 		goto out8;
 
-	if ((rc = spl_zlib_init()))
+	if ((rc = spl_kstat_init()))
 		goto out9;
 
+	if ((rc = spl_zlib_init()))
+		goto out10;
+
 	printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
 	       SPL_META_RELEASE, SPL_DEBUG_STR);
 	return (rc);
 
+out10:
+	spl_kstat_fini();
 out9:
-	spl_tsd_fini();
+	spl_proc_fini();
 out8:
-	spl_kstat_fini();
+	spl_vn_fini();
 out7:
-	spl_proc_fini();
+	spl_kmem_cache_fini();
 out6:
-	spl_vn_fini();
-out5:
 	spl_taskq_fini();
+out5:
+	spl_tsd_fini();
 out4:
 	spl_rw_fini();
 out3:
@@ -584,11 +728,12 @@
 	printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
 	       SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
 	spl_zlib_fini();
-	spl_tsd_fini();
 	spl_kstat_fini();
 	spl_proc_fini();
 	spl_vn_fini();
+	spl_kmem_cache_fini();
 	spl_taskq_fini();
+	spl_tsd_fini();
 	spl_rw_fini();
 	spl_mutex_fini();
 	spl_kvmem_fini();
diff -Naur spl-0.6.5.7/module/spl/spl-kmem-cache.c spl-0.6.5.7.new/module/spl/spl-kmem-cache.c
--- spl-0.6.5.7/module/spl/spl-kmem-cache.c	2016-05-13 04:46:56.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-kmem-cache.c	2016-08-01 16:43:34.280796341 +0200
@@ -88,7 +88,7 @@
 unsigned int spl_kmem_cache_magazine_size = 0;
 module_param(spl_kmem_cache_magazine_size, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
-	"Default magazine size (2-256), set automatically (0)\n");
+	"Default magazine size (2-256), set automatically (0)");
 
 /*
  * The default behavior is to report the number of objects remaining in the
@@ -1149,15 +1149,13 @@
  * It is responsible for allocating a new slab, linking it in to the list
  * of partial slabs, and then waking any waiters.
  */
-static void
-spl_cache_grow_work(void *data)
+static int
+__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
 {
-	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
-	spl_kmem_cache_t *skc = ska->ska_cache;
 	spl_kmem_slab_t *sks;
 
 	fstrans_cookie_t cookie = spl_fstrans_mark();
-	sks = spl_slab_alloc(skc, ska->ska_flags);
+	sks = spl_slab_alloc(skc, flags);
 	spl_fstrans_unmark(cookie);
 
 	spin_lock(&skc->skc_lock);
@@ -1165,15 +1163,29 @@
 		skc->skc_slab_total++;
 		skc->skc_obj_total += sks->sks_objs;
 		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+
+		smp_mb__before_atomic();
+		clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+		smp_mb__after_atomic();
+		wake_up_all(&skc->skc_waitq);
 	}
+	spin_unlock(&skc->skc_lock);
+
+	return (sks == NULL ? -ENOMEM : 0);
+}
+
+static void
+spl_cache_grow_work(void *data)
+{
+	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+	spl_kmem_cache_t *skc = ska->ska_cache;
+
+	(void)__spl_cache_grow(skc, ska->ska_flags);
 
 	atomic_dec(&skc->skc_ref);
 	smp_mb__before_atomic();
 	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
-	clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
 	smp_mb__after_atomic();
-	wake_up_all(&skc->skc_waitq);
-	spin_unlock(&skc->skc_lock);
 
 	kfree(ska);
 }
@@ -1214,6 +1226,21 @@
 	}
 
 	/*
+	 * To reduce the overhead of context switch and improve NUMA locality,
+	 * it tries to allocate a new slab in the current process context with
+	 * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
+	 * allocation.
+	 *
+	 * However, this can't be applied to KVM_VMEM due to a bug that
+	 * __vmalloc() doesn't honor gfp flags in page table allocation.
+	 */
+	if (!(skc->skc_flags & KMC_VMEM)) {
+		rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
+		if (rc == 0)
+			return (0);
+	}
+
+	/*
 	 * This is handled by dispatching a work request to the global work
 	 * queue.  This allows us to asynchronously allocate a new slab while
 	 * retaining the ability to safely fall back to a smaller synchronous
diff -Naur spl-0.6.5.7/module/spl/spl-kmem-cache.c.orig spl-0.6.5.7.new/module/spl/spl-kmem-cache.c.orig
--- spl-0.6.5.7/module/spl/spl-kmem-cache.c.orig	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/module/spl/spl-kmem-cache.c.orig	2016-05-13 04:46:56.000000000 +0200
@@ -0,0 +1,1734 @@
+/*
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *  For details, see <http://zfsonlinux.org/>.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/kmem.h>
+#include <sys/kmem_cache.h>
+#include <sys/taskq.h>
+#include <sys/timer.h>
+#include <sys/vmem.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/mm_compat.h>
+#include <linux/wait_compat.h>
+#include <linux/prefetch.h>
+
+/*
+ * Within the scope of spl-kmem.c file the kmem_cache_* definitions
+ * are removed to allow access to the real Linux slab allocator.
+ */
+#undef kmem_cache_destroy
+#undef kmem_cache_create
+#undef kmem_cache_alloc
+#undef kmem_cache_free
+
+
+/*
+ * Linux 3.16 replaced smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}()
+ * with smp_mb__{before,after}_atomic() because they were redundant. This is
+ * only used inside our SLAB allocator, so we implement an internal wrapper
+ * here to give us smp_mb__{before,after}_atomic() on older kernels.
+ */
+#ifndef smp_mb__before_atomic
+#define	smp_mb__before_atomic(x) smp_mb__before_clear_bit(x)
+#endif
+
+#ifndef smp_mb__after_atomic
+#define	smp_mb__after_atomic(x) smp_mb__after_clear_bit(x)
+#endif
+
+/*
+ * Cache expiration was implemented because it was part of the default Solaris
+ * kmem_cache behavior.  The idea is that per-cpu objects which haven't been
+ * accessed in several seconds should be returned to the cache.  On the other
+ * hand Linux slabs never move objects back to the slabs unless there is
+ * memory pressure on the system.  By default the Linux method is enabled
+ * because it has been shown to improve responsiveness on low memory systems.
+ * This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
+ */
+unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;
+EXPORT_SYMBOL(spl_kmem_cache_expire);
+module_param(spl_kmem_cache_expire, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
+
+/*
+ * Cache magazines are an optimization designed to minimize the cost of
+ * allocating memory.  They do this by keeping a per-cpu cache of recently
+ * freed objects, which can then be reallocated without taking a lock. This
+ * can improve performance on highly contended caches.  However, because
+ * objects in magazines will prevent otherwise empty slabs from being
+ * immediately released this may not be ideal for low memory machines.
+ *
+ * For this reason spl_kmem_cache_magazine_size can be used to set a maximum
+ * magazine size.  When this value is set to 0 the magazine size will be
+ * automatically determined based on the object size.  Otherwise magazines
+ * will be limited to 2-256 objects per magazine (i.e per cpu).  Magazines
+ * may never be entirely disabled in this implementation.
+ */
+unsigned int spl_kmem_cache_magazine_size = 0;
+module_param(spl_kmem_cache_magazine_size, uint, 0444);
+MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
+	"Default magazine size (2-256), set automatically (0)\n");
+
+/*
+ * The default behavior is to report the number of objects remaining in the
+ * cache.  This allows the Linux VM to repeatedly reclaim objects from the
+ * cache when memory is low satisfy other memory allocations.  Alternately,
+ * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
+ * is reclaimed.  This may increase the likelihood of out of memory events.
+ */
+unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
+module_param(spl_kmem_cache_reclaim, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
+
+unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
+module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
+
+unsigned int spl_kmem_cache_obj_per_slab_min = SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN;
+module_param(spl_kmem_cache_obj_per_slab_min, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab_min,
+	"Minimal number of objects per slab");
+
+unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE;
+module_param(spl_kmem_cache_max_size, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
+
+/*
+ * For small objects the Linux slab allocator should be used to make the most
+ * efficient use of the memory.  However, large objects are not supported by
+ * the Linux slab and therefore the SPL implementation is preferred.  A cutoff
+ * of 16K was determined to be optimal for architectures using 4K pages.
+ */
+#if PAGE_SIZE == 4096
+unsigned int spl_kmem_cache_slab_limit = 16384;
+#else
+unsigned int spl_kmem_cache_slab_limit = 0;
+#endif
+module_param(spl_kmem_cache_slab_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
+	"Objects less than N bytes use the Linux slab");
+
+/*
+ * This value defaults to a threshold designed to avoid allocations which
+ * have been deemed costly by the kernel.
+ */
+unsigned int spl_kmem_cache_kmem_limit =
+    ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) /
+    SPL_KMEM_CACHE_OBJ_PER_SLAB;
+module_param(spl_kmem_cache_kmem_limit, uint, 0644);
+MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
+	"Objects less than N bytes use the kmalloc");
+
+/*
+ * The number of threads available to allocate new slabs for caches.  This
+ * should not need to be tuned but it is available for performance analysis.
+ */
+unsigned int spl_kmem_cache_kmem_threads = 4;
+module_param(spl_kmem_cache_kmem_threads, uint, 0444);
+MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
+	"Number of spl_kmem_cache threads");
+
+/*
+ * Slab allocation interfaces
+ *
+ * While the Linux slab implementation was inspired by the Solaris
+ * implementation I cannot use it to emulate the Solaris APIs.  I
+ * require two features which are not provided by the Linux slab.
+ *
+ * 1) Constructors AND destructors.  Recent versions of the Linux
+ *    kernel have removed support for destructors.  This is a deal
+ *    breaker for the SPL which contains particularly expensive
+ *    initializers for mutex's, condition variables, etc.  We also
+ *    require a minimal level of cleanup for these data types unlike
+ *    many Linux data types which do need to be explicitly destroyed.
+ *
+ * 2) Virtual address space backed slab.  Callers of the Solaris slab
+ *    expect it to work well for both small are very large allocations.
+ *    Because of memory fragmentation the Linux slab which is backed
+ *    by kmalloc'ed memory performs very badly when confronted with
+ *    large numbers of large allocations.  Basing the slab on the
+ *    virtual address space removes the need for contiguous pages
+ *    and greatly improve performance for large allocations.
+ *
+ * For these reasons, the SPL has its own slab implementation with
+ * the needed features.  It is not as highly optimized as either the
+ * Solaris or Linux slabs, but it should get me most of what is
+ * needed until it can be optimized or obsoleted by another approach.
+ *
+ * One serious concern I do have about this method is the relatively
+ * small virtual address space on 32bit arches.  This will seriously
+ * constrain the size of the slab caches and their performance.
+ */
+
+struct list_head spl_kmem_cache_list;   /* List of caches */
+struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
+taskq_t *spl_kmem_cache_taskq;		/* Task queue for ageing / reclaim */
+
+static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
+
+SPL_SHRINKER_CALLBACK_FWD_DECLARE(spl_kmem_cache_generic_shrinker);
+SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker,
+	spl_kmem_cache_generic_shrinker, KMC_DEFAULT_SEEKS);
+
+static void *
+kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
+{
+	gfp_t lflags = kmem_flags_convert(flags);
+	void *ptr;
+
+	if (skc->skc_flags & KMC_KMEM) {
+		ASSERT(ISP2(size));
+		ptr = (void *)__get_free_pages(lflags, get_order(size));
+	} else {
+		ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
+	}
+
+	/* Resulting allocated memory will be page aligned */
+	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
+
+	return (ptr);
+}
+
+static void
+kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
+{
+	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
+
+	/*
+	 * The Linux direct reclaim path uses this out of band value to
+	 * determine if forward progress is being made.  Normally this is
+	 * incremented by kmem_freepages() which is part of the various
+	 * Linux slab implementations.  However, since we are using none
+	 * of that infrastructure we are responsible for incrementing it.
+	 */
+	if (current->reclaim_state)
+		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
+
+	if (skc->skc_flags & KMC_KMEM) {
+		ASSERT(ISP2(size));
+		free_pages((unsigned long)ptr, get_order(size));
+	} else {
+		vfree(ptr);
+	}
+}
+
+/*
+ * Required space for each aligned sks.
+ */
+static inline uint32_t
+spl_sks_size(spl_kmem_cache_t *skc)
+{
+	return (P2ROUNDUP_TYPED(sizeof (spl_kmem_slab_t),
+	    skc->skc_obj_align, uint32_t));
+}
+
+/*
+ * Required space for each aligned object.
+ */
+static inline uint32_t
+spl_obj_size(spl_kmem_cache_t *skc)
+{
+	uint32_t align = skc->skc_obj_align;
+
+	return (P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
+	    P2ROUNDUP_TYPED(sizeof (spl_kmem_obj_t), align, uint32_t));
+}
+
+/*
+ * Lookup the spl_kmem_object_t for an object given that object.
+ */
+static inline spl_kmem_obj_t *
+spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
+{
+	return (obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
+	    skc->skc_obj_align, uint32_t));
+}
+
+/*
+ * Required space for each offslab object taking in to account alignment
+ * restrictions and the power-of-two requirement of kv_alloc().
+ */
+static inline uint32_t
+spl_offslab_size(spl_kmem_cache_t *skc)
+{
+	return (1UL << (fls64(spl_obj_size(skc)) + 1));
+}
+
+/*
+ * It's important that we pack the spl_kmem_obj_t structure and the
+ * actual objects in to one large address space to minimize the number
+ * of calls to the allocator.  It is far better to do a few large
+ * allocations and then subdivide it ourselves.  Now which allocator
+ * we use requires balancing a few trade offs.
+ *
+ * For small objects we use kmem_alloc() because as long as you are
+ * only requesting a small number of pages (ideally just one) its cheap.
+ * However, when you start requesting multiple pages with kmem_alloc()
+ * it gets increasingly expensive since it requires contiguous pages.
+ * For this reason we shift to vmem_alloc() for slabs of large objects
+ * which removes the need for contiguous pages.  We do not use
+ * vmem_alloc() in all cases because there is significant locking
+ * overhead in __get_vm_area_node().  This function takes a single
+ * global lock when acquiring an available virtual address range which
+ * serializes all vmem_alloc()'s for all slab caches.  Using slightly
+ * different allocation functions for small and large objects should
+ * give us the best of both worlds.
+ *
+ * KMC_ONSLAB                       KMC_OFFSLAB
+ *
+ * +------------------------+       +-----------------+
+ * | spl_kmem_slab_t --+-+  |       | spl_kmem_slab_t |---+-+
+ * | skc_obj_size    <-+ |  |       +-----------------+   | |
+ * | spl_kmem_obj_t      |  |                             | |
+ * | skc_obj_size    <---+  |       +-----------------+   | |
+ * | spl_kmem_obj_t      |  |       | skc_obj_size    | <-+ |
+ * | ...                 v  |       | spl_kmem_obj_t  |     |
+ * +------------------------+       +-----------------+     v
+ */
+static spl_kmem_slab_t *
+spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_slab_t *sks;
+	spl_kmem_obj_t *sko, *n;
+	void *base, *obj;
+	uint32_t obj_size, offslab_size = 0;
+	int i,  rc = 0;
+
+	base = kv_alloc(skc, skc->skc_slab_size, flags);
+	if (base == NULL)
+		return (NULL);
+
+	sks = (spl_kmem_slab_t *)base;
+	sks->sks_magic = SKS_MAGIC;
+	sks->sks_objs = skc->skc_slab_objs;
+	sks->sks_age = jiffies;
+	sks->sks_cache = skc;
+	INIT_LIST_HEAD(&sks->sks_list);
+	INIT_LIST_HEAD(&sks->sks_free_list);
+	sks->sks_ref = 0;
+	obj_size = spl_obj_size(skc);
+
+	if (skc->skc_flags & KMC_OFFSLAB)
+		offslab_size = spl_offslab_size(skc);
+
+	for (i = 0; i < sks->sks_objs; i++) {
+		if (skc->skc_flags & KMC_OFFSLAB) {
+			obj = kv_alloc(skc, offslab_size, flags);
+			if (!obj) {
+				rc = -ENOMEM;
+				goto out;
+			}
+		} else {
+			obj = base + spl_sks_size(skc) + (i * obj_size);
+		}
+
+		ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
+		sko = spl_sko_from_obj(skc, obj);
+		sko->sko_addr = obj;
+		sko->sko_magic = SKO_MAGIC;
+		sko->sko_slab = sks;
+		INIT_LIST_HEAD(&sko->sko_list);
+		list_add_tail(&sko->sko_list, &sks->sks_free_list);
+	}
+
+out:
+	if (rc) {
+		if (skc->skc_flags & KMC_OFFSLAB)
+			list_for_each_entry_safe(sko,
+			    n, &sks->sks_free_list, sko_list)
+				kv_free(skc, sko->sko_addr, offslab_size);
+
+		kv_free(skc, base, skc->skc_slab_size);
+		sks = NULL;
+	}
+
+	return (sks);
+}
+
+/*
+ * Remove a slab from complete or partial list, it must be called with
+ * the 'skc->skc_lock' held but the actual free must be performed
+ * outside the lock to prevent deadlocking on vmem addresses.
+ */
+static void
+spl_slab_free(spl_kmem_slab_t *sks,
+    struct list_head *sks_list, struct list_head *sko_list)
+{
+	spl_kmem_cache_t *skc;
+
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(sks->sks_ref == 0);
+
+	skc = sks->sks_cache;
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	/*
+	 * Update slab/objects counters in the cache, then remove the
+	 * slab from the skc->skc_partial_list.  Finally add the slab
+	 * and all its objects in to the private work lists where the
+	 * destructors will be called and the memory freed to the system.
+	 */
+	skc->skc_obj_total -= sks->sks_objs;
+	skc->skc_slab_total--;
+	list_del(&sks->sks_list);
+	list_add(&sks->sks_list, sks_list);
+	list_splice_init(&sks->sks_free_list, sko_list);
+}
+
+/*
+ * Reclaim empty slabs at the end of the partial list.
+ */
+static void
+spl_slab_reclaim(spl_kmem_cache_t *skc)
+{
+	spl_kmem_slab_t *sks, *m;
+	spl_kmem_obj_t *sko, *n;
+	LIST_HEAD(sks_list);
+	LIST_HEAD(sko_list);
+	uint32_t size = 0;
+
+	/*
+	 * Empty slabs and objects must be moved to a private list so they
+	 * can be safely freed outside the spin lock.  All empty slabs are
+	 * at the end of skc->skc_partial_list, therefore once a non-empty
+	 * slab is found we can stop scanning.
+	 */
+	spin_lock(&skc->skc_lock);
+	list_for_each_entry_safe_reverse(sks, m,
+	    &skc->skc_partial_list, sks_list) {
+
+		if (sks->sks_ref > 0)
+			break;
+
+		spl_slab_free(sks, &sks_list, &sko_list);
+	}
+	spin_unlock(&skc->skc_lock);
+
+	/*
+	 * The following two loops ensure all the object destructors are
+	 * run, any offslab objects are freed, and the slabs themselves
+	 * are freed.  This is all done outside the skc->skc_lock since
+	 * this allows the destructor to sleep, and allows us to perform
+	 * a conditional reschedule when a freeing a large number of
+	 * objects and slabs back to the system.
+	 */
+	if (skc->skc_flags & KMC_OFFSLAB)
+		size = spl_offslab_size(skc);
+
+	list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
+		ASSERT(sko->sko_magic == SKO_MAGIC);
+
+		if (skc->skc_flags & KMC_OFFSLAB)
+			kv_free(skc, sko->sko_addr, size);
+	}
+
+	list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
+		ASSERT(sks->sks_magic == SKS_MAGIC);
+		kv_free(skc, sks, skc->skc_slab_size);
+	}
+}
+
+static spl_kmem_emergency_t *
+spl_emergency_search(struct rb_root *root, void *obj)
+{
+	struct rb_node *node = root->rb_node;
+	spl_kmem_emergency_t *ske;
+	unsigned long address = (unsigned long)obj;
+
+	while (node) {
+		ske = container_of(node, spl_kmem_emergency_t, ske_node);
+
+		if (address < ske->ske_obj)
+			node = node->rb_left;
+		else if (address > ske->ske_obj)
+			node = node->rb_right;
+		else
+			return (ske);
+	}
+
+	return (NULL);
+}
+
+static int
+spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	spl_kmem_emergency_t *ske_tmp;
+	unsigned long address = ske->ske_obj;
+
+	while (*new) {
+		ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
+
+		parent = *new;
+		if (address < ske_tmp->ske_obj)
+			new = &((*new)->rb_left);
+		else if (address > ske_tmp->ske_obj)
+			new = &((*new)->rb_right);
+		else
+			return (0);
+	}
+
+	rb_link_node(&ske->ske_node, parent, new);
+	rb_insert_color(&ske->ske_node, root);
+
+	return (1);
+}
+
+/*
+ * Allocate a single emergency object and track it in a red black tree.
+ */
+static int
+spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
+{
+	gfp_t lflags = kmem_flags_convert(flags);
+	spl_kmem_emergency_t *ske;
+	int order = get_order(skc->skc_obj_size);
+	int empty;
+
+	/* Last chance use a partial slab if one now exists */
+	spin_lock(&skc->skc_lock);
+	empty = list_empty(&skc->skc_partial_list);
+	spin_unlock(&skc->skc_lock);
+	if (!empty)
+		return (-EEXIST);
+
+	ske = kmalloc(sizeof (*ske), lflags);
+	if (ske == NULL)
+		return (-ENOMEM);
+
+	ske->ske_obj = __get_free_pages(lflags, order);
+	if (ske->ske_obj == 0) {
+		kfree(ske);
+		return (-ENOMEM);
+	}
+
+	spin_lock(&skc->skc_lock);
+	empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
+	if (likely(empty)) {
+		skc->skc_obj_total++;
+		skc->skc_obj_emergency++;
+		if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
+			skc->skc_obj_emergency_max = skc->skc_obj_emergency;
+	}
+	spin_unlock(&skc->skc_lock);
+
+	if (unlikely(!empty)) {
+		free_pages(ske->ske_obj, order);
+		kfree(ske);
+		return (-EINVAL);
+	}
+
+	*obj = (void *)ske->ske_obj;
+
+	return (0);
+}
+
+/*
+ * Locate the passed object in the red black tree and free it.
+ */
+static int
+spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_emergency_t *ske;
+	int order = get_order(skc->skc_obj_size);
+
+	spin_lock(&skc->skc_lock);
+	ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
+	if (ske) {
+		rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
+		skc->skc_obj_emergency--;
+		skc->skc_obj_total--;
+	}
+	spin_unlock(&skc->skc_lock);
+
+	if (ske == NULL)
+		return (-ENOENT);
+
+	free_pages(ske->ske_obj, order);
+	kfree(ske);
+
+	return (0);
+}
+
+/*
+ * Release objects from the per-cpu magazine back to their slab.  The flush
+ * argument contains the max number of entries to remove from the magazine.
+ */
+static void
+__spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
+{
+	int i, count = MIN(flush, skm->skm_avail);
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	for (i = 0; i < count; i++)
+		spl_cache_shrink(skc, skm->skm_objs[i]);
+
+	skm->skm_avail -= count;
+	memmove(skm->skm_objs, &(skm->skm_objs[count]),
+	    sizeof (void *) * skm->skm_avail);
+}
+
+static void
+spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
+{
+	spin_lock(&skc->skc_lock);
+	__spl_cache_flush(skc, skm, flush);
+	spin_unlock(&skc->skc_lock);
+}
+
+static void
+spl_magazine_age(void *data)
+{
+	spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
+	spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
+
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+	ASSERT(skm->skm_cpu == smp_processor_id());
+	ASSERT(irqs_disabled());
+
+	/* There are no available objects or they are too young to age out */
+	if ((skm->skm_avail == 0) ||
+	    time_before(jiffies, skm->skm_age + skc->skc_delay * HZ))
+		return;
+
+	/*
+	 * Because we're executing in interrupt context we may have
+	 * interrupted the holder of this lock.  To avoid a potential
+	 * deadlock return if the lock is contended.
+	 */
+	if (!spin_trylock(&skc->skc_lock))
+		return;
+
+	__spl_cache_flush(skc, skm, skm->skm_refill);
+	spin_unlock(&skc->skc_lock);
+}
+
+/*
+ * Called regularly to keep a downward pressure on the cache.
+ *
+ * Objects older than skc->skc_delay seconds in the per-cpu magazines will
+ * be returned to the caches.  This is done to prevent idle magazines from
+ * holding memory which could be better used elsewhere.  The delay is
+ * present to prevent thrashing the magazine.
+ *
+ * The newly released objects may result in empty partial slabs.  Those
+ * slabs should be released to the system.  Otherwise moving the objects
+ * out of the magazines is just wasted work.
+ */
+static void
+spl_cache_age(void *data)
+{
+	spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data;
+	taskqid_t id = 0;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+
+	/* Dynamically disabled at run time */
+	if (!(spl_kmem_cache_expire & KMC_EXPIRE_AGE))
+		return;
+
+	atomic_inc(&skc->skc_ref);
+
+	if (!(skc->skc_flags & KMC_NOMAGAZINE))
+		on_each_cpu(spl_magazine_age, skc, 1);
+
+	spl_slab_reclaim(skc);
+
+	while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
+		id = taskq_dispatch_delay(
+		    spl_kmem_cache_taskq, spl_cache_age, skc, TQ_SLEEP,
+		    ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
+
+		/* Destroy issued after dispatch immediately cancel it */
+		if (test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && id)
+			taskq_cancel_id(spl_kmem_cache_taskq, id);
+	}
+
+	spin_lock(&skc->skc_lock);
+	skc->skc_taskqid = id;
+	spin_unlock(&skc->skc_lock);
+
+	atomic_dec(&skc->skc_ref);
+}
+
+/*
+ * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
+ * When on-slab we want to target spl_kmem_cache_obj_per_slab.  However,
+ * for very small objects we may end up with more than this so as not
+ * to waste space in the minimal allocation of a single page.  Also for
+ * very large objects we may use as few as spl_kmem_cache_obj_per_slab_min,
+ * lower than this and we will fail.
+ */
+static int
+spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
+{
+	uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs;
+
+	if (skc->skc_flags & KMC_OFFSLAB) {
+		tgt_objs = spl_kmem_cache_obj_per_slab;
+		tgt_size = P2ROUNDUP(sizeof (spl_kmem_slab_t), PAGE_SIZE);
+
+		if ((skc->skc_flags & KMC_KMEM) &&
+		    (spl_obj_size(skc) > (SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE)))
+			return (-ENOSPC);
+	} else {
+		sks_size = spl_sks_size(skc);
+		obj_size = spl_obj_size(skc);
+		max_size = (spl_kmem_cache_max_size * 1024 * 1024);
+		tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size);
+
+		/*
+		 * KMC_KMEM slabs are allocated by __get_free_pages() which
+		 * rounds up to the nearest order.  Knowing this the size
+		 * should be rounded up to the next power of two with a hard
+		 * maximum defined by the maximum allowed allocation order.
+		 */
+		if (skc->skc_flags & KMC_KMEM) {
+			max_size = SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE;
+			tgt_size = MIN(max_size,
+			    PAGE_SIZE * (1 << MAX(get_order(tgt_size) - 1, 1)));
+		}
+
+		if (tgt_size <= max_size) {
+			tgt_objs = (tgt_size - sks_size) / obj_size;
+		} else {
+			tgt_objs = (max_size - sks_size) / obj_size;
+			tgt_size = (tgt_objs * obj_size) + sks_size;
+		}
+	}
+
+	if (tgt_objs == 0)
+		return (-ENOSPC);
+
+	*objs = tgt_objs;
+	*size = tgt_size;
+
+	return (0);
+}
+
+/*
+ * Make a guess at reasonable per-cpu magazine size based on the size of
+ * each object and the cost of caching N of them in each magazine.  Long
+ * term this should really adapt based on an observed usage heuristic.
+ */
+static int
+spl_magazine_size(spl_kmem_cache_t *skc)
+{
+	uint32_t obj_size = spl_obj_size(skc);
+	int size;
+
+	if (spl_kmem_cache_magazine_size > 0)
+		return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
+
+	/* Per-magazine sizes below assume a 4Kib page size */
+	if (obj_size > (PAGE_SIZE * 256))
+		size = 4;  /* Minimum 4Mib per-magazine */
+	else if (obj_size > (PAGE_SIZE * 32))
+		size = 16; /* Minimum 2Mib per-magazine */
+	else if (obj_size > (PAGE_SIZE))
+		size = 64; /* Minimum 256Kib per-magazine */
+	else if (obj_size > (PAGE_SIZE / 4))
+		size = 128; /* Minimum 128Kib per-magazine */
+	else
+		size = 256;
+
+	return (size);
+}
+
+/*
+ * Allocate a per-cpu magazine to associate with a specific core.
+ */
+static spl_kmem_magazine_t *
+spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
+{
+	spl_kmem_magazine_t *skm;
+	int size = sizeof (spl_kmem_magazine_t) +
+	    sizeof (void *) * skc->skc_mag_size;
+
+	skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+	if (skm) {
+		skm->skm_magic = SKM_MAGIC;
+		skm->skm_avail = 0;
+		skm->skm_size = skc->skc_mag_size;
+		skm->skm_refill = skc->skc_mag_refill;
+		skm->skm_cache = skc;
+		skm->skm_age = jiffies;
+		skm->skm_cpu = cpu;
+	}
+
+	return (skm);
+}
+
+/*
+ * Free a per-cpu magazine associated with a specific core.
+ */
+static void
+spl_magazine_free(spl_kmem_magazine_t *skm)
+{
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+	ASSERT(skm->skm_avail == 0);
+	kfree(skm);
+}
+
+/*
+ * Create all pre-cpu magazines of reasonable sizes.
+ */
+static int
+spl_magazine_create(spl_kmem_cache_t *skc)
+{
+	int i;
+
+	if (skc->skc_flags & KMC_NOMAGAZINE)
+		return (0);
+
+	skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
+	    num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
+	skc->skc_mag_size = spl_magazine_size(skc);
+	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
+
+	for_each_possible_cpu(i) {
+		skc->skc_mag[i] = spl_magazine_alloc(skc, i);
+		if (!skc->skc_mag[i]) {
+			for (i--; i >= 0; i--)
+				spl_magazine_free(skc->skc_mag[i]);
+
+			kfree(skc->skc_mag);
+			return (-ENOMEM);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Destroy all pre-cpu magazines.
+ */
+static void
+spl_magazine_destroy(spl_kmem_cache_t *skc)
+{
+	spl_kmem_magazine_t *skm;
+	int i;
+
+	if (skc->skc_flags & KMC_NOMAGAZINE)
+		return;
+
+	for_each_possible_cpu(i) {
+		skm = skc->skc_mag[i];
+		spl_cache_flush(skc, skm, skm->skm_avail);
+		spl_magazine_free(skm);
+	}
+
+	kfree(skc->skc_mag);
+}
+
+/*
+ * Create a object cache based on the following arguments:
+ * name		cache name
+ * size		cache object size
+ * align	cache object alignment
+ * ctor		cache object constructor
+ * dtor		cache object destructor
+ * reclaim	cache object reclaim
+ * priv		cache private data for ctor/dtor/reclaim
+ * vmp		unused must be NULL
+ * flags
+ *	KMC_NOTOUCH	Disable cache object aging (unsupported)
+ *	KMC_NODEBUG	Disable debugging (unsupported)
+ *	KMC_NOHASH      Disable hashing (unsupported)
+ *	KMC_QCACHE	Disable qcache (unsupported)
+ *	KMC_NOMAGAZINE	Enabled for kmem/vmem, Disabled for Linux slab
+ *	KMC_KMEM	Force kmem backed cache
+ *	KMC_VMEM        Force vmem backed cache
+ *	KMC_SLAB        Force Linux slab backed cache
+ *	KMC_OFFSLAB	Locate objects off the slab
+ */
+spl_kmem_cache_t *
+spl_kmem_cache_create(char *name, size_t size, size_t align,
+    spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim,
+    void *priv, void *vmp, int flags)
+{
+	gfp_t lflags = kmem_flags_convert(KM_SLEEP);
+	spl_kmem_cache_t *skc;
+	int rc;
+
+	/*
+	 * Unsupported flags
+	 */
+	ASSERT0(flags & KMC_NOMAGAZINE);
+	ASSERT0(flags & KMC_NOHASH);
+	ASSERT0(flags & KMC_QCACHE);
+	ASSERT(vmp == NULL);
+
+	might_sleep();
+
+	skc = kzalloc(sizeof (*skc), lflags);
+	if (skc == NULL)
+		return (NULL);
+
+	skc->skc_magic = SKC_MAGIC;
+	skc->skc_name_size = strlen(name) + 1;
+	skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
+	if (skc->skc_name == NULL) {
+		kfree(skc);
+		return (NULL);
+	}
+	strncpy(skc->skc_name, name, skc->skc_name_size);
+
+	skc->skc_ctor = ctor;
+	skc->skc_dtor = dtor;
+	skc->skc_reclaim = reclaim;
+	skc->skc_private = priv;
+	skc->skc_vmp = vmp;
+	skc->skc_linux_cache = NULL;
+	skc->skc_flags = flags;
+	skc->skc_obj_size = size;
+	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
+	skc->skc_delay = SPL_KMEM_CACHE_DELAY;
+	skc->skc_reap = SPL_KMEM_CACHE_REAP;
+	atomic_set(&skc->skc_ref, 0);
+
+	INIT_LIST_HEAD(&skc->skc_list);
+	INIT_LIST_HEAD(&skc->skc_complete_list);
+	INIT_LIST_HEAD(&skc->skc_partial_list);
+	skc->skc_emergency_tree = RB_ROOT;
+	spin_lock_init(&skc->skc_lock);
+	init_waitqueue_head(&skc->skc_waitq);
+	skc->skc_slab_fail = 0;
+	skc->skc_slab_create = 0;
+	skc->skc_slab_destroy = 0;
+	skc->skc_slab_total = 0;
+	skc->skc_slab_alloc = 0;
+	skc->skc_slab_max = 0;
+	skc->skc_obj_total = 0;
+	skc->skc_obj_alloc = 0;
+	skc->skc_obj_max = 0;
+	skc->skc_obj_deadlock = 0;
+	skc->skc_obj_emergency = 0;
+	skc->skc_obj_emergency_max = 0;
+
+	/*
+	 * Verify the requested alignment restriction is sane.
+	 */
+	if (align) {
+		VERIFY(ISP2(align));
+		VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
+		VERIFY3U(align, <=, PAGE_SIZE);
+		skc->skc_obj_align = align;
+	}
+
+	/*
+	 * When no specific type of slab is requested (kmem, vmem, or
+	 * linuxslab) then select a cache type based on the object size
+	 * and default tunables.
+	 */
+	if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {
+
+		/*
+		 * Objects smaller than spl_kmem_cache_slab_limit can
+		 * use the Linux slab for better space-efficiency.  By
+		 * default this functionality is disabled until its
+		 * performance characteristics are fully understood.
+		 */
+		if (spl_kmem_cache_slab_limit &&
+		    size <= (size_t)spl_kmem_cache_slab_limit)
+			skc->skc_flags |= KMC_SLAB;
+
+		/*
+		 * Small objects, less than spl_kmem_cache_kmem_limit per
+		 * object should use kmem because their slabs are small.
+		 */
+		else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit)
+			skc->skc_flags |= KMC_KMEM;
+
+		/*
+		 * All other objects are considered large and are placed
+		 * on vmem backed slabs.
+		 */
+		else
+			skc->skc_flags |= KMC_VMEM;
+	}
+
+	/*
+	 * Given the type of slab allocate the required resources.
+	 */
+	if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+		rc = spl_slab_size(skc,
+		    &skc->skc_slab_objs, &skc->skc_slab_size);
+		if (rc)
+			goto out;
+
+		rc = spl_magazine_create(skc);
+		if (rc)
+			goto out;
+	} else {
+		unsigned long slabflags = 0;
+
+		if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
+			rc = EINVAL;
+			goto out;
+		}
+
+#if defined(SLAB_USERCOPY)
+		/*
+		 * Required for PAX-enabled kernels if the slab is to be
+		 * used for coping between user and kernel space.
+		 */
+		slabflags |= SLAB_USERCOPY;
+#endif
+
+		skc->skc_linux_cache = kmem_cache_create(
+		    skc->skc_name, size, align, slabflags, NULL);
+		if (skc->skc_linux_cache == NULL) {
+			rc = ENOMEM;
+			goto out;
+		}
+
+#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
+		skc->skc_linux_cache->allocflags |= __GFP_COMP;
+#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
+		skc->skc_linux_cache->gfpflags |= __GFP_COMP;
+#endif
+		skc->skc_flags |= KMC_NOMAGAZINE;
+	}
+
+	if (spl_kmem_cache_expire & KMC_EXPIRE_AGE)
+		skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
+		    spl_cache_age, skc, TQ_SLEEP,
+		    ddi_get_lbolt() + skc->skc_delay / 3 * HZ);
+
+	down_write(&spl_kmem_cache_sem);
+	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
+	up_write(&spl_kmem_cache_sem);
+
+	return (skc);
+out:
+	kfree(skc->skc_name);
+	kfree(skc);
+	return (NULL);
+}
+EXPORT_SYMBOL(spl_kmem_cache_create);
+
+/*
+ * Register a move callback for cache defragmentation.
+ * XXX: Unimplemented but harmless to stub out for now.
+ */
+void
+spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
+    kmem_cbrc_t (move)(void *, void *, size_t, void *))
+{
+	ASSERT(move != NULL);
+}
+EXPORT_SYMBOL(spl_kmem_cache_set_move);
+
+/*
+ * Destroy a cache and all objects associated with the cache.
+ */
+void
+spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
+{
+	DECLARE_WAIT_QUEUE_HEAD(wq);
+	taskqid_t id;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB));
+
+	down_write(&spl_kmem_cache_sem);
+	list_del_init(&skc->skc_list);
+	up_write(&spl_kmem_cache_sem);
+
+	/* Cancel any and wait for any pending delayed tasks */
+	VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	spin_lock(&skc->skc_lock);
+	id = skc->skc_taskqid;
+	spin_unlock(&skc->skc_lock);
+
+	taskq_cancel_id(spl_kmem_cache_taskq, id);
+
+	/*
+	 * Wait until all current callers complete, this is mainly
+	 * to catch the case where a low memory situation triggers a
+	 * cache reaping action which races with this destroy.
+	 */
+	wait_event(wq, atomic_read(&skc->skc_ref) == 0);
+
+	if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
+		spl_magazine_destroy(skc);
+		spl_slab_reclaim(skc);
+	} else {
+		ASSERT(skc->skc_flags & KMC_SLAB);
+		kmem_cache_destroy(skc->skc_linux_cache);
+	}
+
+	spin_lock(&skc->skc_lock);
+
+	/*
+	 * Validate there are no objects in use and free all the
+	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
+	 */
+	ASSERT3U(skc->skc_slab_alloc, ==, 0);
+	ASSERT3U(skc->skc_obj_alloc, ==, 0);
+	ASSERT3U(skc->skc_slab_total, ==, 0);
+	ASSERT3U(skc->skc_obj_total, ==, 0);
+	ASSERT3U(skc->skc_obj_emergency, ==, 0);
+	ASSERT(list_empty(&skc->skc_complete_list));
+
+	spin_unlock(&skc->skc_lock);
+
+	kfree(skc->skc_name);
+	kfree(skc);
+}
+EXPORT_SYMBOL(spl_kmem_cache_destroy);
+
+/*
+ * Allocate an object from a slab attached to the cache.  This is used to
+ * repopulate the per-cpu magazine caches in batches when they run low.
+ */
+static void *
+spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
+{
+	spl_kmem_obj_t *sko;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
+	ASSERT(sko->sko_magic == SKO_MAGIC);
+	ASSERT(sko->sko_addr != NULL);
+
+	/* Remove from sks_free_list */
+	list_del_init(&sko->sko_list);
+
+	sks->sks_age = jiffies;
+	sks->sks_ref++;
+	skc->skc_obj_alloc++;
+
+	/* Track max obj usage statistics */
+	if (skc->skc_obj_alloc > skc->skc_obj_max)
+		skc->skc_obj_max = skc->skc_obj_alloc;
+
+	/* Track max slab usage statistics */
+	if (sks->sks_ref == 1) {
+		skc->skc_slab_alloc++;
+
+		if (skc->skc_slab_alloc > skc->skc_slab_max)
+			skc->skc_slab_max = skc->skc_slab_alloc;
+	}
+
+	return (sko->sko_addr);
+}
+
+/*
+ * Generic slab allocation function to run by the global work queues.
+ * It is responsible for allocating a new slab, linking it in to the list
+ * of partial slabs, and then waking any waiters.
+ */
+static void
+spl_cache_grow_work(void *data)
+{
+	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+	spl_kmem_cache_t *skc = ska->ska_cache;
+	spl_kmem_slab_t *sks;
+
+	fstrans_cookie_t cookie = spl_fstrans_mark();
+	sks = spl_slab_alloc(skc, ska->ska_flags);
+	spl_fstrans_unmark(cookie);
+
+	spin_lock(&skc->skc_lock);
+	if (sks) {
+		skc->skc_slab_total++;
+		skc->skc_obj_total += sks->sks_objs;
+		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+	}
+
+	atomic_dec(&skc->skc_ref);
+	smp_mb__before_atomic();
+	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
+	clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+	smp_mb__after_atomic();
+	wake_up_all(&skc->skc_waitq);
+	spin_unlock(&skc->skc_lock);
+
+	kfree(ska);
+}
+
+/*
+ * Returns non-zero when a new slab should be available.
+ */
+static int
+spl_cache_grow_wait(spl_kmem_cache_t *skc)
+{
+	return (!test_bit(KMC_BIT_GROWING, &skc->skc_flags));
+}
+
+/*
+ * No available objects on any slabs, create a new slab.  Note that this
+ * functionality is disabled for KMC_SLAB caches which are backed by the
+ * Linux slab.
+ */
+static int
+spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
+{
+	int remaining, rc = 0;
+
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
+	might_sleep();
+	*obj = NULL;
+
+	/*
+	 * Before allocating a new slab wait for any reaping to complete and
+	 * then return so the local magazine can be rechecked for new objects.
+	 */
+	if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
+		rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
+		    TASK_UNINTERRUPTIBLE);
+		return (rc ? rc : -EAGAIN);
+	}
+
+	/*
+	 * This is handled by dispatching a work request to the global work
+	 * queue.  This allows us to asynchronously allocate a new slab while
+	 * retaining the ability to safely fall back to a smaller synchronous
+	 * allocations to ensure forward progress is always maintained.
+	 */
+	if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
+		spl_kmem_alloc_t *ska;
+
+		ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
+		if (ska == NULL) {
+			clear_bit_unlock(KMC_BIT_GROWING, &skc->skc_flags);
+			smp_mb__after_atomic();
+			wake_up_all(&skc->skc_waitq);
+			return (-ENOMEM);
+		}
+
+		atomic_inc(&skc->skc_ref);
+		ska->ska_cache = skc;
+		ska->ska_flags = flags;
+		taskq_init_ent(&ska->ska_tqe);
+		taskq_dispatch_ent(spl_kmem_cache_taskq,
+		    spl_cache_grow_work, ska, 0, &ska->ska_tqe);
+	}
+
+	/*
+	 * The goal here is to only detect the rare case where a virtual slab
+	 * allocation has deadlocked.  We must be careful to minimize the use
+	 * of emergency objects which are more expensive to track.  Therefore,
+	 * we set a very long timeout for the asynchronous allocation and if
+	 * the timeout is reached the cache is flagged as deadlocked.  From
+	 * this point only new emergency objects will be allocated until the
+	 * asynchronous allocation completes and clears the deadlocked flag.
+	 */
+	if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
+		rc = spl_emergency_alloc(skc, flags, obj);
+	} else {
+		remaining = wait_event_timeout(skc->skc_waitq,
+		    spl_cache_grow_wait(skc), HZ / 10);
+
+		if (!remaining) {
+			spin_lock(&skc->skc_lock);
+			if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
+				set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+				skc->skc_obj_deadlock++;
+			}
+			spin_unlock(&skc->skc_lock);
+		}
+
+		rc = -ENOMEM;
+	}
+
+	return (rc);
+}
+
+/*
+ * Refill a per-cpu magazine with objects from the slabs for this cache.
+ * Ideally the magazine can be repopulated using existing objects which have
+ * been released, however if we are unable to locate enough free objects new
+ * slabs of objects will be created.  On success NULL is returned, otherwise
+ * the address of a single emergency object is returned for use by the caller.
+ */
+static void *
+spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
+{
+	spl_kmem_slab_t *sks;
+	int count = 0, rc, refill;
+	void *obj = NULL;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
+	spin_lock(&skc->skc_lock);
+
+	while (refill > 0) {
+		/* No slabs available we may need to grow the cache */
+		if (list_empty(&skc->skc_partial_list)) {
+			spin_unlock(&skc->skc_lock);
+
+			local_irq_enable();
+			rc = spl_cache_grow(skc, flags, &obj);
+			local_irq_disable();
+
+			/* Emergency object for immediate use by caller */
+			if (rc == 0 && obj != NULL)
+				return (obj);
+
+			if (rc)
+				goto out;
+
+			/* Rescheduled to different CPU skm is not local */
+			if (skm != skc->skc_mag[smp_processor_id()])
+				goto out;
+
+			/*
+			 * Potentially rescheduled to the same CPU but
+			 * allocations may have occurred from this CPU while
+			 * we were sleeping so recalculate max refill.
+			 */
+			refill = MIN(refill, skm->skm_size - skm->skm_avail);
+
+			spin_lock(&skc->skc_lock);
+			continue;
+		}
+
+		/* Grab the next available slab */
+		sks = list_entry((&skc->skc_partial_list)->next,
+		    spl_kmem_slab_t, sks_list);
+		ASSERT(sks->sks_magic == SKS_MAGIC);
+		ASSERT(sks->sks_ref < sks->sks_objs);
+		ASSERT(!list_empty(&sks->sks_free_list));
+
+		/*
+		 * Consume as many objects as needed to refill the requested
+		 * cache.  We must also be careful not to overfill it.
+		 */
+		while (sks->sks_ref < sks->sks_objs && refill-- > 0 &&
+		    ++count) {
+			ASSERT(skm->skm_avail < skm->skm_size);
+			ASSERT(count < skm->skm_size);
+			skm->skm_objs[skm->skm_avail++] =
+			    spl_cache_obj(skc, sks);
+		}
+
+		/* Move slab to skc_complete_list when full */
+		if (sks->sks_ref == sks->sks_objs) {
+			list_del(&sks->sks_list);
+			list_add(&sks->sks_list, &skc->skc_complete_list);
+		}
+	}
+
+	spin_unlock(&skc->skc_lock);
+out:
+	return (NULL);
+}
+
+/*
+ * Release an object back to the slab from which it came.
+ */
+static void
+spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_slab_t *sks = NULL;
+	spl_kmem_obj_t *sko = NULL;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(spin_is_locked(&skc->skc_lock));
+
+	sko = spl_sko_from_obj(skc, obj);
+	ASSERT(sko->sko_magic == SKO_MAGIC);
+	sks = sko->sko_slab;
+	ASSERT(sks->sks_magic == SKS_MAGIC);
+	ASSERT(sks->sks_cache == skc);
+	list_add(&sko->sko_list, &sks->sks_free_list);
+
+	sks->sks_age = jiffies;
+	sks->sks_ref--;
+	skc->skc_obj_alloc--;
+
+	/*
+	 * Move slab to skc_partial_list when no longer full.  Slabs
+	 * are added to the head to keep the partial list is quasi-full
+	 * sorted order.  Fuller at the head, emptier at the tail.
+	 */
+	if (sks->sks_ref == (sks->sks_objs - 1)) {
+		list_del(&sks->sks_list);
+		list_add(&sks->sks_list, &skc->skc_partial_list);
+	}
+
+	/*
+	 * Move empty slabs to the end of the partial list so
+	 * they can be easily found and freed during reclamation.
+	 */
+	if (sks->sks_ref == 0) {
+		list_del(&sks->sks_list);
+		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+		skc->skc_slab_alloc--;
+	}
+}
+
+/*
+ * Allocate an object from the per-cpu magazine, or if the magazine
+ * is empty directly allocate from a slab and repopulate the magazine.
+ */
+void *
+spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	spl_kmem_magazine_t *skm;
+	void *obj = NULL;
+
+	ASSERT0(flags & ~KM_PUBLIC_MASK);
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	/*
+	 * Allocate directly from a Linux slab.  All optimizations are left
+	 * to the underlying cache we only need to guarantee that KM_SLEEP
+	 * callers will never fail.
+	 */
+	if (skc->skc_flags & KMC_SLAB) {
+		struct kmem_cache *slc = skc->skc_linux_cache;
+		do {
+			obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
+		} while ((obj == NULL) && !(flags & KM_NOSLEEP));
+
+		goto ret;
+	}
+
+	local_irq_disable();
+
+restart:
+	/*
+	 * Safe to update per-cpu structure without lock, but
+	 * in the restart case we must be careful to reacquire
+	 * the local magazine since this may have changed
+	 * when we need to grow the cache.
+	 */
+	skm = skc->skc_mag[smp_processor_id()];
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	if (likely(skm->skm_avail)) {
+		/* Object available in CPU cache, use it */
+		obj = skm->skm_objs[--skm->skm_avail];
+		skm->skm_age = jiffies;
+	} else {
+		obj = spl_cache_refill(skc, skm, flags);
+		if ((obj == NULL) && !(flags & KM_NOSLEEP))
+			goto restart;
+
+		local_irq_enable();
+		goto ret;
+	}
+
+	local_irq_enable();
+	ASSERT(obj);
+	ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
+
+ret:
+	/* Pre-emptively migrate object to CPU L1 cache */
+	if (obj) {
+		if (obj && skc->skc_ctor)
+			skc->skc_ctor(obj, skc->skc_private, flags);
+		else
+			prefetchw(obj);
+	}
+
+	return (obj);
+}
+EXPORT_SYMBOL(spl_kmem_cache_alloc);
+
+/*
+ * Free an object back to the local per-cpu magazine, there is no
+ * guarantee that this is the same magazine the object was originally
+ * allocated from.  We may need to flush entire from the magazine
+ * back to the slabs to make space.
+ */
+void
+spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
+{
+	spl_kmem_magazine_t *skm;
+	unsigned long flags;
+	int do_reclaim = 0;
+	int do_emergency = 0;
+
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	/*
+	 * Run the destructor
+	 */
+	if (skc->skc_dtor)
+		skc->skc_dtor(obj, skc->skc_private);
+
+	/*
+	 * Free the object from the Linux underlying Linux slab.
+	 */
+	if (skc->skc_flags & KMC_SLAB) {
+		kmem_cache_free(skc->skc_linux_cache, obj);
+		return;
+	}
+
+	/*
+	 * While a cache has outstanding emergency objects all freed objects
+	 * must be checked.  However, since emergency objects will never use
+	 * a virtual address these objects can be safely excluded as an
+	 * optimization.
+	 */
+	if (!is_vmalloc_addr(obj)) {
+		spin_lock(&skc->skc_lock);
+		do_emergency = (skc->skc_obj_emergency > 0);
+		spin_unlock(&skc->skc_lock);
+
+		if (do_emergency && (spl_emergency_free(skc, obj) == 0))
+			return;
+	}
+
+	local_irq_save(flags);
+
+	/*
+	 * Safe to update per-cpu structure without lock, but
+	 * no remote memory allocation tracking is being performed
+	 * it is entirely possible to allocate an object from one
+	 * CPU cache and return it to another.
+	 */
+	skm = skc->skc_mag[smp_processor_id()];
+	ASSERT(skm->skm_magic == SKM_MAGIC);
+
+	/*
+	 * Per-CPU cache full, flush it to make space for this object,
+	 * this may result in an empty slab which can be reclaimed once
+	 * interrupts are re-enabled.
+	 */
+	if (unlikely(skm->skm_avail >= skm->skm_size)) {
+		spl_cache_flush(skc, skm, skm->skm_refill);
+		do_reclaim = 1;
+	}
+
+	/* Available space in cache, use it */
+	skm->skm_objs[skm->skm_avail++] = obj;
+
+	local_irq_restore(flags);
+
+	if (do_reclaim)
+		spl_slab_reclaim(skc);
+}
+EXPORT_SYMBOL(spl_kmem_cache_free);
+
+/*
+ * The generic shrinker function for all caches.  Under Linux a shrinker
+ * may not be tightly coupled with a slab cache.  In fact Linux always
+ * systematically tries calling all registered shrinker callbacks which
+ * report that they contain unused objects.  Because of this we only
+ * register one shrinker function in the shim layer for all slab caches.
+ * We always attempt to shrink all caches when this generic shrinker
+ * is called.
+ *
+ * If sc->nr_to_scan is zero, the caller is requesting a query of the
+ * number of objects which can potentially be freed.  If it is nonzero,
+ * the request is to free that many objects.
+ *
+ * Linux kernels >= 3.12 have the count_objects and scan_objects callbacks
+ * in struct shrinker and also require the shrinker to return the number
+ * of objects freed.
+ *
+ * Older kernels require the shrinker to return the number of freeable
+ * objects following the freeing of nr_to_free.
+ *
+ * Linux semantics differ from those under Solaris, which are to
+ * free all available objects which may (and probably will) be more
+ * objects than the requested nr_to_scan.
+ */
+static spl_shrinker_t
+__spl_kmem_cache_generic_shrinker(struct shrinker *shrink,
+    struct shrink_control *sc)
+{
+	spl_kmem_cache_t *skc;
+	int alloc = 0;
+
+	/*
+	 * No shrinking in a transaction context.  Can cause deadlocks.
+	 */
+	if (sc->nr_to_scan && spl_fstrans_check())
+		return (SHRINK_STOP);
+
+	down_read(&spl_kmem_cache_sem);
+	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
+		if (sc->nr_to_scan) {
+#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
+			uint64_t oldalloc = skc->skc_obj_alloc;
+			spl_kmem_cache_reap_now(skc,
+			    MAX(sc->nr_to_scan>>fls64(skc->skc_slab_objs), 1));
+			if (oldalloc > skc->skc_obj_alloc)
+				alloc += oldalloc - skc->skc_obj_alloc;
+#else
+			spl_kmem_cache_reap_now(skc,
+			    MAX(sc->nr_to_scan>>fls64(skc->skc_slab_objs), 1));
+			alloc += skc->skc_obj_alloc;
+#endif /* HAVE_SPLIT_SHRINKER_CALLBACK */
+		} else {
+			/* Request to query number of freeable objects */
+			alloc += skc->skc_obj_alloc;
+		}
+	}
+	up_read(&spl_kmem_cache_sem);
+
+	/*
+	 * When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.
+	 * This functionality only exists to work around a rare issue where
+	 * shrink_slabs() is repeatedly invoked by many cores causing the
+	 * system to thrash.
+	 */
+	if ((spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) && sc->nr_to_scan)
+		return (SHRINK_STOP);
+
+	return (MAX(alloc, 0));
+}
+
+SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);
+
+/*
+ * Call the registered reclaim function for a cache.  Depending on how
+ * many and which objects are released it may simply repopulate the
+ * local magazine which will then need to age-out.  Objects which cannot
+ * fit in the magazine we will be released back to their slabs which will
+ * also need to age out before being release.  This is all just best
+ * effort and we do not want to thrash creating and destroying slabs.
+ */
+void
+spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
+{
+	ASSERT(skc->skc_magic == SKC_MAGIC);
+	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
+
+	atomic_inc(&skc->skc_ref);
+
+	/*
+	 * Execute the registered reclaim callback if it exists.
+	 */
+	if (skc->skc_flags & KMC_SLAB) {
+		if (skc->skc_reclaim)
+			skc->skc_reclaim(skc->skc_private);
+		goto out;
+	}
+
+	/*
+	 * Prevent concurrent cache reaping when contended.
+	 */
+	if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
+		goto out;
+
+	/*
+	 * When a reclaim function is available it may be invoked repeatedly
+	 * until at least a single slab can be freed.  This ensures that we
+	 * do free memory back to the system.  This helps minimize the chance
+	 * of an OOM event when the bulk of memory is used by the slab.
+	 *
+	 * When free slabs are already available the reclaim callback will be
+	 * skipped.  Additionally, if no forward progress is detected despite
+	 * a reclaim function the cache will be skipped to avoid deadlock.
+	 *
+	 * Longer term this would be the correct place to add the code which
+	 * repacks the slabs in order minimize fragmentation.
+	 */
+	if (skc->skc_reclaim) {
+		uint64_t objects = UINT64_MAX;
+		int do_reclaim;
+
+		do {
+			spin_lock(&skc->skc_lock);
+			do_reclaim =
+			    (skc->skc_slab_total > 0) &&
+			    ((skc->skc_slab_total-skc->skc_slab_alloc) == 0) &&
+			    (skc->skc_obj_alloc < objects);
+
+			objects = skc->skc_obj_alloc;
+			spin_unlock(&skc->skc_lock);
+
+			if (do_reclaim)
+				skc->skc_reclaim(skc->skc_private);
+
+		} while (do_reclaim);
+	}
+
+	/* Reclaim from the magazine and free all now empty slabs. */
+	if (spl_kmem_cache_expire & KMC_EXPIRE_MEM) {
+		spl_kmem_magazine_t *skm;
+		unsigned long irq_flags;
+
+		local_irq_save(irq_flags);
+		skm = skc->skc_mag[smp_processor_id()];
+		spl_cache_flush(skc, skm, skm->skm_avail);
+		local_irq_restore(irq_flags);
+	}
+
+	spl_slab_reclaim(skc);
+	clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
+	smp_mb__after_atomic();
+	wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
+out:
+	atomic_dec(&skc->skc_ref);
+}
+EXPORT_SYMBOL(spl_kmem_cache_reap_now);
+
+/*
+ * Reap all free slabs from all registered caches.
+ */
+void
+spl_kmem_reap(void)
+{
+	struct shrink_control sc;
+
+	sc.nr_to_scan = KMC_REAP_CHUNK;
+	sc.gfp_mask = GFP_KERNEL;
+
+	(void) __spl_kmem_cache_generic_shrinker(NULL, &sc);
+}
+EXPORT_SYMBOL(spl_kmem_reap);
+
+int
+spl_kmem_cache_init(void)
+{
+	init_rwsem(&spl_kmem_cache_sem);
+	INIT_LIST_HEAD(&spl_kmem_cache_list);
+	spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
+	    spl_kmem_cache_kmem_threads, maxclsyspri,
+	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
+	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+	spl_register_shrinker(&spl_kmem_cache_shrinker);
+
+	return (0);
+}
+
+void
+spl_kmem_cache_fini(void)
+{
+	spl_unregister_shrinker(&spl_kmem_cache_shrinker);
+	taskq_destroy(spl_kmem_cache_taskq);
+}
diff -Naur spl-0.6.5.7/module/spl/spl-kobj.c spl-0.6.5.7.new/module/spl/spl-kobj.c
--- spl-0.6.5.7/module/spl/spl-kobj.c	2016-05-13 04:46:56.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-kobj.c	2016-08-01 16:43:32.403791098 +0200
@@ -57,10 +57,15 @@
 EXPORT_SYMBOL(kobj_close_file);
 
 int
-kobj_read_file(struct _buf *file, char *buf, ssize_t size, offset_t off)
+kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
 {
-	return (vn_rdwr(UIO_READ, file->vp, buf, size, off,
-	       UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL));
+	ssize_t resid;
+
+	if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
+	    UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
+		return (-1);
+
+	return (size - resid);
 } /* kobj_read_file() */
 EXPORT_SYMBOL(kobj_read_file);
 
diff -Naur spl-0.6.5.7/module/spl/spl-proc.c spl-0.6.5.7.new/module/spl/spl-proc.c
--- spl-0.6.5.7/module/spl/spl-proc.c	2016-05-13 04:46:56.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-proc.c	2016-08-01 16:43:26.107773512 +0200
@@ -29,6 +29,7 @@
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/vmem.h>
+#include <sys/taskq.h>
 #include <linux/ctype.h>
 #include <linux/kmod.h>
 #include <linux/seq_file.h>
@@ -49,6 +50,8 @@
 static struct proc_dir_entry *proc_spl = NULL;
 static struct proc_dir_entry *proc_spl_kmem = NULL;
 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
+static struct proc_dir_entry *proc_spl_taskq_all = NULL;
+static struct proc_dir_entry *proc_spl_taskq = NULL;
 struct proc_dir_entry *proc_spl_kstat = NULL;
 
 static int
@@ -216,6 +219,176 @@
 }
 
 static void
+taskq_seq_show_headers(struct seq_file *f)
+{
+	seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
+	    "taskq", "act", "nthr", "spwn", "maxt", "pri",
+	    "mina", "maxa", "cura", "flags");
+}
+
+/* indices into the lheads array below */
+#define	LHEAD_PEND	0
+#define LHEAD_PRIO	1
+#define LHEAD_DELAY	2
+#define LHEAD_WAIT	3
+#define LHEAD_ACTIVE	4
+#define LHEAD_SIZE	5
+
+static int
+taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
+{
+	taskq_t *tq = p;
+	taskq_thread_t *tqt;
+	wait_queue_t *wq;
+	struct task_struct *tsk;
+	taskq_ent_t *tqe;
+	char name[100];
+	struct list_head *lheads[LHEAD_SIZE], *lh;
+	static char *list_names[LHEAD_SIZE] =
+	    {"pend", "prio", "delay", "wait", "active" };
+	int i, j, have_lheads = 0;
+	unsigned long wflags, flags;
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
+
+	/* get the various lists and check whether they're empty */
+	lheads[LHEAD_PEND] = &tq->tq_pend_list;
+	lheads[LHEAD_PRIO] = &tq->tq_prio_list;
+	lheads[LHEAD_DELAY] = &tq->tq_delay_list;
+	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
+	lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
+
+	for (i = 0; i < LHEAD_SIZE; ++i) {
+		if (list_empty(lheads[i]))
+			lheads[i] = NULL;
+		else
+			++have_lheads;
+	}
+
+	/* early return in non-"all" mode if lists are all empty */
+	if (!allflag && !have_lheads) {
+		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		return (0);
+	}
+
+	/* unlock the waitq quickly */
+	if (!lheads[LHEAD_WAIT])
+		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+
+	/* show the base taskq contents */
+	snprintf(name, sizeof(name), "%s/%d", tq->tq_name, tq->tq_instance);
+	seq_printf(f, "%-25s ", name);
+	seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
+	    tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
+	    tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
+	    tq->tq_nalloc, tq->tq_flags);
+
+	/* show the active list */
+	if (lheads[LHEAD_ACTIVE]) {
+		j = 0;
+		list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
+			if (j == 0)
+				seq_printf(f, "\t%s:", list_names[LHEAD_ACTIVE]);
+			else if (j == 2) {
+				seq_printf(f, "\n\t       ");
+				j = 0;
+			}
+			seq_printf(f, " [%d]%pf(%ps)",
+			    tqt->tqt_thread->pid,
+			    tqt->tqt_task->tqent_func,
+			    tqt->tqt_task->tqent_arg);
+			++j;
+		}
+		seq_printf(f, "\n");
+	}
+
+	for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
+		if (lheads[i]) {
+			j = 0;
+			list_for_each(lh, lheads[i]) {
+				/* show the wait waitq list */
+				if (i == LHEAD_WAIT) {
+					wq = list_entry(lh, wait_queue_t, task_list);
+					if (j == 0)
+						seq_printf(f, "\t%s:",
+						    list_names[i]);
+					else if (j == 12) {
+						seq_printf(f, "\n\t     ");
+						j = 0;
+					}
+					tsk = wq->private;
+					seq_printf(f, " %d", tsk->pid);
+				/* pend, prio and delay lists */
+				} else {
+					tqe = list_entry(lh, taskq_ent_t,
+					    tqent_list);
+					if (j == 0)
+						seq_printf(f, "\t%s:",
+						    list_names[i]);
+					else if (j == 2) {
+						seq_printf(f, "\n\t     ");
+						j = 0;
+					}
+					seq_printf(f, " %pf(%ps)",
+					    tqe->tqent_func,
+					    tqe->tqent_arg);
+				}
+				++j;
+			}
+			seq_printf(f, "\n");
+		}
+	if (lheads[LHEAD_WAIT])
+		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	return (0);
+}
+
+static int
+taskq_all_seq_show(struct seq_file *f, void *p)
+{
+	return (taskq_seq_show_impl(f, p, B_TRUE));
+}
+
+static int
+taskq_seq_show(struct seq_file *f, void *p)
+{
+	return (taskq_seq_show_impl(f, p, B_FALSE));
+}
+
+static void *
+taskq_seq_start(struct seq_file *f, loff_t *pos)
+{
+	struct list_head *p;
+	loff_t n = *pos;
+
+	down_read(&tq_list_sem);
+	if (!n)
+		taskq_seq_show_headers(f);
+
+	p = tq_list.next;
+	while (n--) {
+		p = p->next;
+		if (p == &tq_list)
+		return (NULL);
+	}
+
+	return (list_entry(p, taskq_t, tq_taskqs));
+}
+
+static void *
+taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	taskq_t *tq = p;
+
+	++*pos;
+	return ((tq->tq_taskqs.next == &tq_list) ?
+	       NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
+}
+
+static void
 slab_seq_show_headers(struct seq_file *f)
 {
         seq_printf(f,
@@ -325,6 +498,52 @@
         .release        = seq_release,
 };
 
+static void
+taskq_seq_stop(struct seq_file *f, void *v)
+{
+	up_read(&tq_list_sem);
+}
+
+static struct seq_operations taskq_all_seq_ops = {
+	.show  = taskq_all_seq_show,
+	.start = taskq_seq_start,
+	.next  = taskq_seq_next,
+	.stop  = taskq_seq_stop,
+};
+
+static struct seq_operations taskq_seq_ops = {
+	.show  = taskq_seq_show,
+	.start = taskq_seq_start,
+	.next  = taskq_seq_next,
+	.stop  = taskq_seq_stop,
+};
+
+static int
+proc_taskq_all_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &taskq_all_seq_ops);
+}
+
+static int
+proc_taskq_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &taskq_seq_ops);
+}
+
+static struct file_operations proc_taskq_all_operations = {
+	.open           = proc_taskq_all_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static struct file_operations proc_taskq_operations = {
+	.open           = proc_taskq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
 static struct ctl_table spl_kmem_table[] = {
 #ifdef DEBUG_KMEM
         {
@@ -476,6 +695,20 @@
 		goto out;
 	}
 
+	proc_spl_taskq_all = proc_create_data("taskq-all", 0444,
+		proc_spl, &proc_taskq_all_operations, NULL);
+	if (proc_spl_taskq_all == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
+	proc_spl_taskq = proc_create_data("taskq", 0444,
+		proc_spl, &proc_taskq_operations, NULL);
+	if (proc_spl_taskq == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+
         proc_spl_kmem = proc_mkdir("kmem", proc_spl);
         if (proc_spl_kmem == NULL) {
                 rc = -EUNATCH;
@@ -499,6 +732,8 @@
 		remove_proc_entry("kstat", proc_spl);
 	        remove_proc_entry("slab", proc_spl_kmem);
 		remove_proc_entry("kmem", proc_spl);
+		remove_proc_entry("taskq-all", proc_spl);
+		remove_proc_entry("taskq", proc_spl);
 		remove_proc_entry("spl", NULL);
 	        unregister_sysctl_table(spl_header);
 	}
@@ -512,6 +747,8 @@
 	remove_proc_entry("kstat", proc_spl);
         remove_proc_entry("slab", proc_spl_kmem);
 	remove_proc_entry("kmem", proc_spl);
+	remove_proc_entry("taskq-all", proc_spl);
+	remove_proc_entry("taskq", proc_spl);
 	remove_proc_entry("spl", NULL);
 
         ASSERT(spl_header != NULL);
diff -Naur spl-0.6.5.7/module/spl/spl-rwlock.c spl-0.6.5.7.new/module/spl/spl-rwlock.c
--- spl-0.6.5.7/module/spl/spl-rwlock.c	2016-05-13 04:46:56.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-rwlock.c	2016-08-01 16:43:34.281796344 +0200
@@ -32,65 +32,55 @@
 
 #define DEBUG_SUBSYSTEM S_RWLOCK
 
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-
-/*
- * From lib/rwsem-spinlock.c but modified such that the caller is
- * responsible for acquiring and dropping the sem->wait_lock.
- */
-struct rwsem_waiter {
-        struct list_head list;
-        struct task_struct *task;
-        unsigned int flags;
-#define RWSEM_WAITING_FOR_READ  0x00000001
-#define RWSEM_WAITING_FOR_WRITE 0x00000002
-};
-
-/* wake a single writer */
-static struct rw_semaphore *
-__rwsem_wake_one_writer_locked(struct rw_semaphore *sem)
+#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
 {
-        struct rwsem_waiter *waiter;
-        struct task_struct *tsk;
-
-        sem->activity = -1;
-
-        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-        list_del(&waiter->list);
-
-        tsk = waiter->task;
-        smp_mb();
-        waiter->task = NULL;
-        wake_up_process(tsk);
-        put_task_struct(tsk);
-        return sem;
+	int ret = 0;
+	unsigned long flags;
+	spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags);
+	if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE &&
+	    list_empty(&rwsem->wait_list)) {
+		ret = 1;
+		RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE;
+	}
+	spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
+	return (ret);
 }
-
-/* release a read lock on the semaphore */
-void
-__up_read_locked(struct rw_semaphore *sem)
+#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT)
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
 {
-        if (--sem->activity == 0 && !list_empty(&sem->wait_list))
-                (void)__rwsem_wake_one_writer_locked(sem);
+	long val;
+	val = atomic_long_cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
+	    SPL_RWSEM_SINGLE_WRITER_VALUE);
+	return (val == SPL_RWSEM_SINGLE_READER_VALUE);
 }
-EXPORT_SYMBOL(__up_read_locked);
-
-/* trylock for writing -- returns 1 if successful, 0 if contention */
-int
-__down_write_trylock_locked(struct rw_semaphore *sem)
+#else
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
 {
-        int ret = 0;
-
-        if (sem->activity == 0 && list_empty(&sem->wait_list)) {
-                sem->activity = -1;
-                ret = 1;
-        }
-
-        return ret;
+	typeof (rwsem->count) val;
+	val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
+	    SPL_RWSEM_SINGLE_WRITER_VALUE);
+	return (val == SPL_RWSEM_SINGLE_READER_VALUE);
 }
-EXPORT_SYMBOL(__down_write_trylock_locked);
+#endif
 
+int
+rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+	if (__rwsem_tryupgrade(rwsem)) {
+		rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+		rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+		rwsem->owner = current;
 #endif
+		return (1);
+	}
+	return (0);
+}
+EXPORT_SYMBOL(rwsem_tryupgrade);
 
 int spl_rw_init(void) { return 0; }
 void spl_rw_fini(void) { }
diff -Naur spl-0.6.5.7/module/spl/spl-taskq.c spl-0.6.5.7.new/module/spl/spl-taskq.c
--- spl-0.6.5.7/module/spl/spl-taskq.c	2016-05-13 04:46:57.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-taskq.c	2016-08-01 16:43:34.279796338 +0200
@@ -1,4 +1,4 @@
-/*****************************************************************************\
+/*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -20,12 +20,13 @@
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
- *****************************************************************************
+ *
  *  Solaris Porting Layer (SPL) Task Queue Implementation.
-\*****************************************************************************/
+ */
 
 #include <sys/taskq.h>
 #include <sys/kmem.h>
+#include <sys/tsd.h>
 
 int spl_taskq_thread_bind = 0;
 module_param(spl_taskq_thread_bind, int, 0644);
@@ -39,12 +40,12 @@
 int spl_taskq_thread_priority = 1;
 module_param(spl_taskq_thread_priority, int, 0644);
 MODULE_PARM_DESC(spl_taskq_thread_priority,
-    "Allow non-default priority for taskq threads");
+	"Allow non-default priority for taskq threads");
 
 int spl_taskq_thread_sequential = 4;
 module_param(spl_taskq_thread_sequential, int, 0644);
 MODULE_PARM_DESC(spl_taskq_thread_sequential,
-    "Create new taskq threads after N sequential tasks");
+	"Create new taskq threads after N sequential tasks");
 
 /* Global system-wide dynamic task queue available for all consumers */
 taskq_t *system_taskq;
@@ -54,16 +55,38 @@
 static taskq_t *dynamic_taskq;
 static taskq_thread_t *taskq_thread_create(taskq_t *);
 
+/* List of all taskqs */
+LIST_HEAD(tq_list);
+DECLARE_RWSEM(tq_list_sem);
+static uint_t taskq_tsd;
+
 static int
 task_km_flags(uint_t flags)
 {
 	if (flags & TQ_NOSLEEP)
-		return KM_NOSLEEP;
+		return (KM_NOSLEEP);
 
 	if (flags & TQ_PUSHPAGE)
-		return KM_PUSHPAGE;
+		return (KM_PUSHPAGE);
+
+	return (KM_SLEEP);
+}
+
+/*
+ * taskq_find_by_name - Find the largest instance number of a named taskq.
+ */
+static int
+taskq_find_by_name(const char *name)
+{
+	struct list_head *tql;
+	taskq_t *tq;
 
-	return KM_SLEEP;
+	list_for_each_prev(tql, &tq_list) {
+		tq = list_entry(tql, taskq_t, tq_taskqs);
+		if (strcmp(name, tq->tq_name) == 0)
+			return tq->tq_instance;
+	}
+	return (-1);
 }
 
 /*
@@ -71,7 +94,7 @@
  * is not attached to the free, work, or pending taskq lists.
  */
 static taskq_ent_t *
-task_alloc(taskq_t *tq, uint_t flags)
+task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
 {
 	taskq_ent_t *t;
 	int count = 0;
@@ -111,18 +134,19 @@
 		 * end up delaying the task allocation by one second, thereby
 		 * throttling the task dispatch rate.
 		 */
-		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
 		schedule_timeout(HZ / 100);
-		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+		spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
+		    tq->tq_lock_class);
 		if (count < 100) {
 			count++;
 			goto retry;
 		}
 	}
 
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
-	t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
+	t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
+	spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
 
 	if (t) {
 		taskq_init_ent(t);
@@ -145,7 +169,7 @@
 	ASSERT(list_empty(&t->tqent_list));
 	ASSERT(!timer_pending(&t->tqent_timer));
 
-	kmem_free(t, sizeof(taskq_ent_t));
+	kmem_free(t, sizeof (taskq_ent_t));
 	tq->tq_nalloc--;
 }
 
@@ -187,15 +211,17 @@
 	taskq_ent_t *w, *t = (taskq_ent_t *)data;
 	taskq_t *tq = t->tqent_taskq;
 	struct list_head *l;
+	unsigned long flags;
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 
 	if (t->tqent_flags & TQENT_FLAG_CANCEL) {
 		ASSERT(list_empty(&t->tqent_list));
-		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
 		return;
 	}
 
+	t->tqent_birth = jiffies;
 	/*
 	 * The priority list must be maintained in strict task id order
 	 * from lowest to highest for lowest_id to be easily calculable.
@@ -211,7 +237,7 @@
 	if (l == &tq->tq_prio_list)
 		list_add(&t->tqent_list, &tq->tq_prio_list);
 
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	wake_up(&tq->tq_work_waitq);
 }
@@ -378,10 +404,11 @@
 {
 	int active = 0;
 	int rc;
+	unsigned long flags;
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 	rc = (taskq_find(tq, id, &active) == NULL);
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	return (rc);
 }
@@ -401,10 +428,11 @@
 taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
 {
 	int rc;
+	unsigned long flags;
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 	rc = (id < tq->tq_lowest_id);
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	return (rc);
 }
@@ -419,8 +447,8 @@
 void
 taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
 {
-	wait_event(tq->tq_wait_waitq,
-	    taskq_wait_outstanding_check(tq, id ? id : tq->tq_next_id - 1));
+	id = id ? id : tq->tq_next_id - 1;
+	wait_event(tq->tq_wait_waitq, taskq_wait_outstanding_check(tq, id));
 }
 EXPORT_SYMBOL(taskq_wait_outstanding);
 
@@ -428,10 +456,11 @@
 taskq_wait_check(taskq_t *tq)
 {
 	int rc;
+	unsigned long flags;
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 	rc = (tq->tq_lowest_id == tq->tq_next_id);
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	return (rc);
 }
@@ -448,37 +477,10 @@
 }
 EXPORT_SYMBOL(taskq_wait);
 
-static int
-taskq_member_impl(taskq_t *tq, void *t)
-{
-	struct list_head *l;
-	taskq_thread_t *tqt;
-	int found = 0;
-
-	ASSERT(tq);
-	ASSERT(t);
-	ASSERT(spin_is_locked(&tq->tq_lock));
-
-	list_for_each(l, &tq->tq_thread_list) {
-		tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
-		if (tqt->tqt_thread == (struct task_struct *)t) {
-			found = 1;
-			break;
-		}
-	}
-	return (found);
-}
-
 int
-taskq_member(taskq_t *tq, void *t)
+taskq_member(taskq_t *tq, kthread_t *t)
 {
-	int found;
-
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
-	found = taskq_member_impl(tq, t);
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
-
-	return (found);
+	return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
 }
 EXPORT_SYMBOL(taskq_member);
 
@@ -494,10 +496,11 @@
 	taskq_ent_t *t;
 	int active = 0;
 	int rc = ENOENT;
+	unsigned long flags;
 
 	ASSERT(tq);
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 	t = taskq_find(tq, id, &active);
 	if (t && !active) {
 		list_del_init(&t->tqent_list);
@@ -517,9 +520,10 @@
 		 * drop the lock before synchronously cancelling the timer.
 		 */
 		if (timer_pending(&t->tqent_timer)) {
-			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+			spin_unlock_irqrestore(&tq->tq_lock, flags);
 			del_timer_sync(&t->tqent_timer);
-			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+			spin_lock_irqsave_nested(&tq->tq_lock, flags,
+			    tq->tq_lock_class);
 		}
 
 		if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
@@ -527,7 +531,7 @@
 
 		rc = 0;
 	}
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	if (active) {
 		taskq_wait_id(tq, id);
@@ -545,11 +549,12 @@
 {
 	taskq_ent_t *t;
 	taskqid_t rc = 0;
+	unsigned long irqflags;
 
 	ASSERT(tq);
 	ASSERT(func);
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
 
 	/* Taskq being destroyed and all tasks drained */
 	if (!(tq->tq_flags & TASKQ_ACTIVE))
@@ -557,16 +562,22 @@
 
 	/* Do not queue the task unless there is idle thread for it */
 	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
-	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
-		goto out;
+	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
+		/* Dynamic taskq may be able to spawn another thread */
+		if (!(tq->tq_flags & TASKQ_DYNAMIC) || taskq_thread_spawn(tq) == 0)
+			goto out;
+	}
 
-	if ((t = task_alloc(tq, flags)) == NULL)
+	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
 		goto out;
 
 	spin_lock(&t->tqent_lock);
 
+	/* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
+	if (flags & TQ_NOQUEUE)
+		list_add(&t->tqent_list, &tq->tq_prio_list);
 	/* Queue to the priority list instead of the pending list */
-	if (flags & TQ_FRONT)
+	else if (flags & TQ_FRONT)
 		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
 	else
 		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
@@ -579,6 +590,7 @@
 	t->tqent_timer.data = 0;
 	t->tqent_timer.function = NULL;
 	t->tqent_timer.expires = 0;
+	t->tqent_birth = jiffies;
 
 	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
 
@@ -587,10 +599,10 @@
 	wake_up(&tq->tq_work_waitq);
 out:
 	/* Spawn additional taskq threads if required. */
-	if (tq->tq_nactive == tq->tq_nthreads)
+	if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
 		(void) taskq_thread_spawn(tq);
 
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 	return (rc);
 }
 EXPORT_SYMBOL(taskq_dispatch);
@@ -601,17 +613,18 @@
 {
 	taskqid_t rc = 0;
 	taskq_ent_t *t;
+	unsigned long irqflags;
 
 	ASSERT(tq);
 	ASSERT(func);
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
 
 	/* Taskq being destroyed and all tasks drained */
 	if (!(tq->tq_flags & TASKQ_ACTIVE))
 		goto out;
 
-	if ((t = task_alloc(tq, flags)) == NULL)
+	if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
 		goto out;
 
 	spin_lock(&t->tqent_lock);
@@ -636,19 +649,21 @@
 	/* Spawn additional taskq threads if required. */
 	if (tq->tq_nactive == tq->tq_nthreads)
 		(void) taskq_thread_spawn(tq);
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 	return (rc);
 }
 EXPORT_SYMBOL(taskq_dispatch_delay);
 
 void
 taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
-   taskq_ent_t *t)
+    taskq_ent_t *t)
 {
+	unsigned long irqflags;
 	ASSERT(tq);
 	ASSERT(func);
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
+	    tq->tq_lock_class);
 
 	/* Taskq being destroyed and all tasks drained */
 	if (!(tq->tq_flags & TASKQ_ACTIVE)) {
@@ -656,6 +671,13 @@
 		goto out;
 	}
 
+	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
+		/* Dynamic taskq may be able to spawn another thread */
+		if (!(tq->tq_flags & TASKQ_DYNAMIC) || taskq_thread_spawn(tq) == 0)
+			goto out2;
+		flags |= TQ_FRONT;
+	}
+
 	spin_lock(&t->tqent_lock);
 
 	/*
@@ -675,6 +697,7 @@
 	t->tqent_func = func;
 	t->tqent_arg = arg;
 	t->tqent_taskq = tq;
+	t->tqent_birth = jiffies;
 
 	spin_unlock(&t->tqent_lock);
 
@@ -683,14 +706,15 @@
 	/* Spawn additional taskq threads if required. */
 	if (tq->tq_nactive == tq->tq_nthreads)
 		(void) taskq_thread_spawn(tq);
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+out2:
+	spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 }
 EXPORT_SYMBOL(taskq_dispatch_ent);
 
 int
 taskq_empty_ent(taskq_ent_t *t)
 {
-	return list_empty(&t->tqent_list);
+	return (list_empty(&t->tqent_list));
 }
 EXPORT_SYMBOL(taskq_empty_ent);
 
@@ -737,16 +761,18 @@
 taskq_thread_spawn_task(void *arg)
 {
 	taskq_t *tq = (taskq_t *)arg;
+	unsigned long flags;
 
-	(void) taskq_thread_create(tq);
-
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
-	tq->tq_nspawn--;
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	if (taskq_thread_create(tq) == NULL) {
+		/* restore spawning count if failed */
+		spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+		tq->tq_nspawn--;
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+	}
 }
 
 /*
- * Spawn addition threads for dynamic taskqs (TASKQ_DYNMAIC) the current
+ * Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
  * number of threads is insufficient to handle the pending tasks.  These
  * new threads must be created by the dedicated dynamic_taskq to avoid
  * deadlocks between thread creation and memory reclaim.  The system_taskq
@@ -796,7 +822,7 @@
 	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
 	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
 	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
-	    (spl_taskq_thread_dynamic));/* Dynamic taskqs are allowed */
+	    (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
 }
 
 static int
@@ -808,8 +834,10 @@
 	taskq_t *tq;
 	taskq_ent_t *t;
 	int seq_tasks = 0;
+	unsigned long flags;
 
 	ASSERT(tqt);
+	ASSERT(tqt->tqt_tq);
 	tq = tqt->tqt_tq;
 	current->flags |= PF_NOFREEZE;
 
@@ -819,7 +847,16 @@
 	sigprocmask(SIG_BLOCK, &blocked, NULL);
 	flush_signals(current);
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	tsd_set(taskq_tsd, tq);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	/*
+	 * If we are dynamically spawned, decrease spawning count. Note that
+	 * we could be created during taskq_create, in which case we shouldn't
+	 * do the decrement. But it's fine because taskq_create will reset
+	 * tq_nspawn later.
+	 */
+	if (tq->tq_flags & TASKQ_DYNAMIC)
+		tq->tq_nspawn--;
 
 	/* Immediately exit if more threads than allowed were created. */
 	if (tq->tq_nthreads >= tq->tq_maxthreads)
@@ -841,12 +878,13 @@
 			}
 
 			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
-			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+			spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 			schedule();
 			seq_tasks = 0;
 
-			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+			spin_lock_irqsave_nested(&tq->tq_lock, flags,
+			    tq->tq_lock_class);
 			remove_wait_queue(&tq->tq_work_waitq, &wait);
 		} else {
 			__set_current_state(TASK_RUNNING);
@@ -855,27 +893,32 @@
 		if ((t = taskq_next_ent(tq)) != NULL) {
 			list_del_init(&t->tqent_list);
 
-			/* In order to support recursively dispatching a
+			/*
+			 * In order to support recursively dispatching a
 			 * preallocated taskq_ent_t, tqent_id must be
-			 * stored prior to executing tqent_func. */
+			 * stored prior to executing tqent_func.
+			 */
 			tqt->tqt_id = t->tqent_id;
 			tqt->tqt_task = t;
 
-			/* We must store a copy of the flags prior to
+			/*
+			 * We must store a copy of the flags prior to
 			 * servicing the task (servicing a prealloc'd task
 			 * returns the ownership of the tqent back to
 			 * the caller of taskq_dispatch). Thus,
-			 * tqent_flags _may_ change within the call. */
+			 * tqent_flags _may_ change within the call.
+			 */
 			tqt->tqt_flags = t->tqent_flags;
 
 			taskq_insert_in_order(tq, tqt);
 			tq->tq_nactive++;
-			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+			spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 			/* Perform the requested task */
 			t->tqent_func(t->tqent_arg);
 
-			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+			spin_lock_irqsave_nested(&tq->tq_lock, flags,
+			    tq->tq_lock_class);
 			tq->tq_nactive--;
 			list_del_init(&tqt->tqt_active_list);
 			tqt->tqt_task = NULL;
@@ -884,8 +927,10 @@
 			if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
 				task_done(tq, t);
 
-			/* When the current lowest outstanding taskqid is
-			 * done calculate the new lowest outstanding id */
+			/*
+			 * When the current lowest outstanding taskqid is
+			 * done calculate the new lowest outstanding id
+			 */
 			if (tq->tq_lowest_id == tqt->tqt_id) {
 				tq->tq_lowest_id = taskq_lowest_id(tq);
 				ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
@@ -913,7 +958,9 @@
 	list_del_init(&tqt->tqt_thread_list);
 error:
 	kmem_free(tqt, sizeof (taskq_thread_t));
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+	tsd_set(taskq_tsd, NULL);
 
 	return (0);
 }
@@ -957,6 +1004,7 @@
 	taskq_t *tq;
 	taskq_thread_t *tqt;
 	int count = 0, rc = 0, i;
+	unsigned long irqflags;
 
 	ASSERT(name != NULL);
 	ASSERT(minalloc >= 0);
@@ -979,32 +1027,36 @@
 	spin_lock_init(&tq->tq_lock);
 	INIT_LIST_HEAD(&tq->tq_thread_list);
 	INIT_LIST_HEAD(&tq->tq_active_list);
-	tq->tq_name       = strdup(name);
-	tq->tq_nactive    = 0;
-	tq->tq_nthreads   = 0;
-	tq->tq_nspawn     = 0;
+	tq->tq_name = strdup(name);
+	tq->tq_nactive = 0;
+	tq->tq_nthreads = 0;
+	tq->tq_nspawn = 0;
 	tq->tq_maxthreads = nthreads;
-	tq->tq_pri        = pri;
-	tq->tq_minalloc   = minalloc;
-	tq->tq_maxalloc   = maxalloc;
-	tq->tq_nalloc     = 0;
-	tq->tq_flags      = (flags | TASKQ_ACTIVE);
-	tq->tq_next_id    = 1;
-	tq->tq_lowest_id  = 1;
+	tq->tq_pri = pri;
+	tq->tq_minalloc = minalloc;
+	tq->tq_maxalloc = maxalloc;
+	tq->tq_nalloc = 0;
+	tq->tq_flags = (flags | TASKQ_ACTIVE);
+	tq->tq_next_id = 1;
+	tq->tq_lowest_id = 1;
 	INIT_LIST_HEAD(&tq->tq_free_list);
 	INIT_LIST_HEAD(&tq->tq_pend_list);
 	INIT_LIST_HEAD(&tq->tq_prio_list);
 	INIT_LIST_HEAD(&tq->tq_delay_list);
 	init_waitqueue_head(&tq->tq_work_waitq);
 	init_waitqueue_head(&tq->tq_wait_waitq);
+	tq->tq_lock_class = TQ_LOCK_GENERAL;
+	INIT_LIST_HEAD(&tq->tq_taskqs);
 
 	if (flags & TASKQ_PREPOPULATE) {
-		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+		spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
+		    tq->tq_lock_class);
 
 		for (i = 0; i < minalloc; i++)
-			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
+			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
+			    &irqflags));
 
-		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		spin_unlock_irqrestore(&tq->tq_lock, irqflags);
 	}
 
 	if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
@@ -1020,10 +1072,20 @@
 
 	/* Wait for all threads to be started before potential destroy */
 	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
+	/*
+	 * taskq_thread might have touched nspawn, but we don't want them to
+	 * because they're not dynamically spawned. So we reset it to 0
+	 */
+	tq->tq_nspawn = 0;
 
 	if (rc) {
 		taskq_destroy(tq);
 		tq = NULL;
+	} else {
+		down_write(&tq_list_sem);
+		tq->tq_instance = taskq_find_by_name(name) + 1;
+		list_add_tail(&tq->tq_taskqs, &tq_list);
+		up_write(&tq_list_sem);
 	}
 
 	return (tq);
@@ -1036,11 +1098,12 @@
 	struct task_struct *thread;
 	taskq_thread_t *tqt;
 	taskq_ent_t *t;
+	unsigned long flags;
 
 	ASSERT(tq);
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 	tq->tq_flags &= ~TASKQ_ACTIVE;
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	/*
 	 * When TASKQ_ACTIVE is clear new tasks may not be added nor may
@@ -1051,7 +1114,18 @@
 
 	taskq_wait(tq);
 
-	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	/* remove taskq from global list used by the kstats */
+	down_write(&tq_list_sem);
+	list_del(&tq->tq_taskqs);
+	up_write(&tq_list_sem);
+
+	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	/* wait for spawning threads to insert themselves to the list */
+	while (tq->tq_nspawn) {
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+		schedule_timeout_interruptible(1);
+		spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+	}
 
 	/*
 	 * Signal each thread to exit and block until it does.  Each thread
@@ -1063,11 +1137,12 @@
 		tqt = list_entry(tq->tq_thread_list.next,
 		    taskq_thread_t, tqt_thread_list);
 		thread = tqt->tqt_thread;
-		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 		kthread_stop(thread);
 
-		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+		spin_lock_irqsave_nested(&tq->tq_lock, flags,
+		    tq->tq_lock_class);
 	}
 
 	while (!list_empty(&tq->tq_free_list)) {
@@ -1089,16 +1164,75 @@
 	ASSERT(list_empty(&tq->tq_prio_list));
 	ASSERT(list_empty(&tq->tq_delay_list));
 
-	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	strfree(tq->tq_name);
 	kmem_free(tq, sizeof (taskq_t));
 }
 EXPORT_SYMBOL(taskq_destroy);
 
+
+static unsigned int spl_taskq_kick = 0;
+
+/*
+ * 2.6.36 API Change
+ * module_param_cb is introduced to take kernel_param_ops and
+ * module_param_call is marked as obsolete. Also set and get operations
+ * were changed to take a 'const struct kernel_param *'.
+ */
+static int
+#ifdef module_param_cb
+param_set_taskq_kick(const char *val, const struct kernel_param *kp)
+#else
+param_set_taskq_kick(const char *val, struct kernel_param *kp)
+#endif
+{
+	int ret;
+	taskq_t *tq;
+	taskq_ent_t *t;
+	unsigned long flags;
+
+	ret = param_set_uint(val, kp);
+	if (ret < 0 || !spl_taskq_kick)
+		return (ret);
+	/* reset value */
+	spl_taskq_kick = 0;
+
+	down_read(&tq_list_sem);
+	list_for_each_entry(tq, &tq_list, tq_taskqs) {
+		spin_lock_irqsave_nested(&tq->tq_lock, flags,
+		    tq->tq_lock_class);
+		/* Check if the first pending is older than 5 seconds */
+		t = taskq_next_ent(tq);
+		if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
+			(void) taskq_thread_spawn(tq);
+			printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
+			    tq->tq_name, tq->tq_instance);
+		}
+		spin_unlock_irqrestore(&tq->tq_lock, flags);
+	}
+	up_read(&tq_list_sem);
+	return (ret);
+}
+
+#ifdef module_param_cb
+static const struct kernel_param_ops param_ops_taskq_kick = {
+        .set = param_set_taskq_kick,
+        .get = param_get_uint,
+};
+module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
+#else
+module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
+    &spl_taskq_kick, 0644);
+#endif
+MODULE_PARM_DESC(spl_taskq_kick,
+    "Write nonzero to kick stuck taskqs to spawn more threads");
+
 int
 spl_taskq_init(void)
 {
+	tsd_create(&taskq_tsd, NULL);
+
 	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
 	if (system_taskq == NULL)
@@ -1111,6 +1245,13 @@
 		return (1);
 	}
 
+	/*
+	 * This is used to annotate tq_lock, so
+	 *   taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
+	 * does not trigger a lockdep warning re: possible recursive locking
+	 */
+	dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
+
 	return (0);
 }
 
@@ -1122,4 +1263,6 @@
 
 	taskq_destroy(system_taskq);
 	system_taskq = NULL;
+
+	tsd_destroy(&taskq_tsd);
 }
diff -Naur spl-0.6.5.7/module/spl/spl-taskq.c.orig spl-0.6.5.7.new/module/spl/spl-taskq.c.orig
--- spl-0.6.5.7/module/spl/spl-taskq.c.orig	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/module/spl/spl-taskq.c.orig	2016-05-13 04:46:57.000000000 +0200
@@ -0,0 +1,1125 @@
+/*****************************************************************************\
+ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ *  Copyright (C) 2007 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  UCRL-CODE-235197
+ *
+ *  This file is part of the SPL, Solaris Porting Layer.
+ *  For details, see <http://zfsonlinux.org/>.
+ *
+ *  The SPL is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
+ *****************************************************************************
+ *  Solaris Porting Layer (SPL) Task Queue Implementation.
+\*****************************************************************************/
+
+#include <sys/taskq.h>
+#include <sys/kmem.h>
+
+int spl_taskq_thread_bind = 0;
+module_param(spl_taskq_thread_bind, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
+
+
+int spl_taskq_thread_dynamic = 0;
+module_param(spl_taskq_thread_dynamic, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
+
+int spl_taskq_thread_priority = 1;
+module_param(spl_taskq_thread_priority, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_priority,
+    "Allow non-default priority for taskq threads");
+
+int spl_taskq_thread_sequential = 4;
+module_param(spl_taskq_thread_sequential, int, 0644);
+MODULE_PARM_DESC(spl_taskq_thread_sequential,
+    "Create new taskq threads after N sequential tasks");
+
+/* Global system-wide dynamic task queue available for all consumers */
+taskq_t *system_taskq;
+EXPORT_SYMBOL(system_taskq);
+
+/* Private dedicated taskq for creating new taskq threads on demand. */
+static taskq_t *dynamic_taskq;
+static taskq_thread_t *taskq_thread_create(taskq_t *);
+
+static int
+task_km_flags(uint_t flags)
+{
+	if (flags & TQ_NOSLEEP)
+		return KM_NOSLEEP;
+
+	if (flags & TQ_PUSHPAGE)
+		return KM_PUSHPAGE;
+
+	return KM_SLEEP;
+}
+
+/*
+ * NOTE: Must be called with tq->tq_lock held, returns a list_t which
+ * is not attached to the free, work, or pending taskq lists.
+ */
+static taskq_ent_t *
+task_alloc(taskq_t *tq, uint_t flags)
+{
+	taskq_ent_t *t;
+	int count = 0;
+
+	ASSERT(tq);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+retry:
+	/* Acquire taskq_ent_t's from free list if available */
+	if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
+		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
+
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_CANCEL));
+		ASSERT(!timer_pending(&t->tqent_timer));
+
+		list_del_init(&t->tqent_list);
+		return (t);
+	}
+
+	/* Free list is empty and memory allocations are prohibited */
+	if (flags & TQ_NOALLOC)
+		return (NULL);
+
+	/* Hit maximum taskq_ent_t pool size */
+	if (tq->tq_nalloc >= tq->tq_maxalloc) {
+		if (flags & TQ_NOSLEEP)
+			return (NULL);
+
+		/*
+		 * Sleep periodically polling the free list for an available
+		 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
+		 * but we cannot block forever waiting for an taskq_ent_t to
+		 * show up in the free list, otherwise a deadlock can happen.
+		 *
+		 * Therefore, we need to allocate a new task even if the number
+		 * of allocated tasks is above tq->tq_maxalloc, but we still
+		 * end up delaying the task allocation by one second, thereby
+		 * throttling the task dispatch rate.
+		 */
+		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		schedule_timeout(HZ / 100);
+		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+		if (count < 100) {
+			count++;
+			goto retry;
+		}
+	}
+
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	if (t) {
+		taskq_init_ent(t);
+		tq->tq_nalloc++;
+	}
+
+	return (t);
+}
+
+/*
+ * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
+ * to already be removed from the free, work, or pending taskq lists.
+ */
+static void
+task_free(taskq_t *tq, taskq_ent_t *t)
+{
+	ASSERT(tq);
+	ASSERT(t);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+	ASSERT(list_empty(&t->tqent_list));
+	ASSERT(!timer_pending(&t->tqent_timer));
+
+	kmem_free(t, sizeof(taskq_ent_t));
+	tq->tq_nalloc--;
+}
+
+/*
+ * NOTE: Must be called with tq->tq_lock held, either destroys the
+ * taskq_ent_t if too many exist or moves it to the free list for later use.
+ */
+static void
+task_done(taskq_t *tq, taskq_ent_t *t)
+{
+	ASSERT(tq);
+	ASSERT(t);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	/* Wake tasks blocked in taskq_wait_id() */
+	wake_up_all(&t->tqent_waitq);
+
+	list_del_init(&t->tqent_list);
+
+	if (tq->tq_nalloc <= tq->tq_minalloc) {
+		t->tqent_id = 0;
+		t->tqent_func = NULL;
+		t->tqent_arg = NULL;
+		t->tqent_flags = 0;
+
+		list_add_tail(&t->tqent_list, &tq->tq_free_list);
+	} else {
+		task_free(tq, t);
+	}
+}
+
+/*
+ * When a delayed task timer expires remove it from the delay list and
+ * add it to the priority list in order for immediate processing.
+ */
+static void
+task_expire(unsigned long data)
+{
+	taskq_ent_t *w, *t = (taskq_ent_t *)data;
+	taskq_t *tq = t->tqent_taskq;
+	struct list_head *l;
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	if (t->tqent_flags & TQENT_FLAG_CANCEL) {
+		ASSERT(list_empty(&t->tqent_list));
+		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+		return;
+	}
+
+	/*
+	 * The priority list must be maintained in strict task id order
+	 * from lowest to highest for lowest_id to be easily calculable.
+	 */
+	list_del(&t->tqent_list);
+	list_for_each_prev(l, &tq->tq_prio_list) {
+		w = list_entry(l, taskq_ent_t, tqent_list);
+		if (w->tqent_id < t->tqent_id) {
+			list_add(&t->tqent_list, l);
+			break;
+		}
+	}
+	if (l == &tq->tq_prio_list)
+		list_add(&t->tqent_list, &tq->tq_prio_list);
+
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	wake_up(&tq->tq_work_waitq);
+}
+
+/*
+ * Returns the lowest incomplete taskqid_t.  The taskqid_t may
+ * be queued on the pending list, on the priority list, on the
+ * delay list, or on the work list currently being handled, but
+ * it is not 100% complete yet.
+ */
+static taskqid_t
+taskq_lowest_id(taskq_t *tq)
+{
+	taskqid_t lowest_id = tq->tq_next_id;
+	taskq_ent_t *t;
+	taskq_thread_t *tqt;
+
+	ASSERT(tq);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	if (!list_empty(&tq->tq_pend_list)) {
+		t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
+		lowest_id = MIN(lowest_id, t->tqent_id);
+	}
+
+	if (!list_empty(&tq->tq_prio_list)) {
+		t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
+		lowest_id = MIN(lowest_id, t->tqent_id);
+	}
+
+	if (!list_empty(&tq->tq_delay_list)) {
+		t = list_entry(tq->tq_delay_list.next, taskq_ent_t, tqent_list);
+		lowest_id = MIN(lowest_id, t->tqent_id);
+	}
+
+	if (!list_empty(&tq->tq_active_list)) {
+		tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
+		    tqt_active_list);
+		ASSERT(tqt->tqt_id != 0);
+		lowest_id = MIN(lowest_id, tqt->tqt_id);
+	}
+
+	return (lowest_id);
+}
+
+/*
+ * Insert a task into a list keeping the list sorted by increasing taskqid.
+ */
+static void
+taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
+{
+	taskq_thread_t *w;
+	struct list_head *l;
+
+	ASSERT(tq);
+	ASSERT(tqt);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	list_for_each_prev(l, &tq->tq_active_list) {
+		w = list_entry(l, taskq_thread_t, tqt_active_list);
+		if (w->tqt_id < tqt->tqt_id) {
+			list_add(&tqt->tqt_active_list, l);
+			break;
+		}
+	}
+	if (l == &tq->tq_active_list)
+		list_add(&tqt->tqt_active_list, &tq->tq_active_list);
+}
+
+/*
+ * Find and return a task from the given list if it exists.  The list
+ * must be in lowest to highest task id order.
+ */
+static taskq_ent_t *
+taskq_find_list(taskq_t *tq, struct list_head *lh, taskqid_t id)
+{
+	struct list_head *l;
+	taskq_ent_t *t;
+
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	list_for_each(l, lh) {
+		t = list_entry(l, taskq_ent_t, tqent_list);
+
+		if (t->tqent_id == id)
+			return (t);
+
+		if (t->tqent_id > id)
+			break;
+	}
+
+	return (NULL);
+}
+
+/*
+ * Find an already dispatched task given the task id regardless of what
+ * state it is in.  If a task is still pending or executing it will be
+ * returned and 'active' set appropriately.  If the task has already
+ * been run then NULL is returned.
+ */
+static taskq_ent_t *
+taskq_find(taskq_t *tq, taskqid_t id, int *active)
+{
+	taskq_thread_t *tqt;
+	struct list_head *l;
+	taskq_ent_t *t;
+
+	ASSERT(spin_is_locked(&tq->tq_lock));
+	*active = 0;
+
+	t = taskq_find_list(tq, &tq->tq_delay_list, id);
+	if (t)
+		return (t);
+
+	t = taskq_find_list(tq, &tq->tq_prio_list, id);
+	if (t)
+		return (t);
+
+	t = taskq_find_list(tq, &tq->tq_pend_list, id);
+	if (t)
+		return (t);
+
+	list_for_each(l, &tq->tq_active_list) {
+		tqt = list_entry(l, taskq_thread_t, tqt_active_list);
+		if (tqt->tqt_id == id) {
+			t = tqt->tqt_task;
+			*active = 1;
+			return (t);
+		}
+	}
+
+	return (NULL);
+}
+
+/*
+ * Theory for the taskq_wait_id(), taskq_wait_outstanding(), and
+ * taskq_wait() functions below.
+ *
+ * Taskq waiting is accomplished by tracking the lowest outstanding task
+ * id and the next available task id.  As tasks are dispatched they are
+ * added to the tail of the pending, priority, or delay lists.  As worker
+ * threads become available the tasks are removed from the heads of these
+ * lists and linked to the worker threads.  This ensures the lists are
+ * kept sorted by lowest to highest task id.
+ *
+ * Therefore the lowest outstanding task id can be quickly determined by
+ * checking the head item from all of these lists.  This value is stored
+ * with the taskq as the lowest id.  It only needs to be recalculated when
+ * either the task with the current lowest id completes or is canceled.
+ *
+ * By blocking until the lowest task id exceeds the passed task id the
+ * taskq_wait_outstanding() function can be easily implemented.  Similarly,
+ * by blocking until the lowest task id matches the next task id taskq_wait()
+ * can be implemented.
+ *
+ * Callers should be aware that when there are multiple worked threads it
+ * is possible for larger task ids to complete before smaller ones.  Also
+ * when the taskq contains delay tasks with small task ids callers may
+ * block for a considerable length of time waiting for them to expire and
+ * execute.
+ */
+static int
+taskq_wait_id_check(taskq_t *tq, taskqid_t id)
+{
+	int active = 0;
+	int rc;
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	rc = (taskq_find(tq, id, &active) == NULL);
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	return (rc);
+}
+
+/*
+ * The taskq_wait_id() function blocks until the passed task id completes.
+ * This does not guarantee that all lower task ids have completed.
+ */
+void
+taskq_wait_id(taskq_t *tq, taskqid_t id)
+{
+	wait_event(tq->tq_wait_waitq, taskq_wait_id_check(tq, id));
+}
+EXPORT_SYMBOL(taskq_wait_id);
+
+static int
+taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
+{
+	int rc;
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	rc = (id < tq->tq_lowest_id);
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	return (rc);
+}
+
+/*
+ * The taskq_wait_outstanding() function will block until all tasks with a
+ * lower taskqid than the passed 'id' have been completed.  Note that all
+ * task id's are assigned monotonically at dispatch time.  Zero may be
+ * passed for the id to indicate all tasks dispatch up to this point,
+ * but not after, should be waited for.
+ */
+void
+taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
+{
+	wait_event(tq->tq_wait_waitq,
+	    taskq_wait_outstanding_check(tq, id ? id : tq->tq_next_id - 1));
+}
+EXPORT_SYMBOL(taskq_wait_outstanding);
+
+static int
+taskq_wait_check(taskq_t *tq)
+{
+	int rc;
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	rc = (tq->tq_lowest_id == tq->tq_next_id);
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	return (rc);
+}
+
+/*
+ * The taskq_wait() function will block until the taskq is empty.
+ * This means that if a taskq re-dispatches work to itself taskq_wait()
+ * callers will block indefinitely.
+ */
+void
+taskq_wait(taskq_t *tq)
+{
+	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq));
+}
+EXPORT_SYMBOL(taskq_wait);
+
+static int
+taskq_member_impl(taskq_t *tq, void *t)
+{
+	struct list_head *l;
+	taskq_thread_t *tqt;
+	int found = 0;
+
+	ASSERT(tq);
+	ASSERT(t);
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	list_for_each(l, &tq->tq_thread_list) {
+		tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
+		if (tqt->tqt_thread == (struct task_struct *)t) {
+			found = 1;
+			break;
+		}
+	}
+	return (found);
+}
+
+int
+taskq_member(taskq_t *tq, void *t)
+{
+	int found;
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	found = taskq_member_impl(tq, t);
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	return (found);
+}
+EXPORT_SYMBOL(taskq_member);
+
+/*
+ * Cancel an already dispatched task given the task id.  Still pending tasks
+ * will be immediately canceled, and if the task is active the function will
+ * block until it completes.  Preallocated tasks which are canceled must be
+ * freed by the caller.
+ */
+int
+taskq_cancel_id(taskq_t *tq, taskqid_t id)
+{
+	taskq_ent_t *t;
+	int active = 0;
+	int rc = ENOENT;
+
+	ASSERT(tq);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	t = taskq_find(tq, id, &active);
+	if (t && !active) {
+		list_del_init(&t->tqent_list);
+		t->tqent_flags |= TQENT_FLAG_CANCEL;
+
+		/*
+		 * When canceling the lowest outstanding task id we
+		 * must recalculate the new lowest outstanding id.
+		 */
+		if (tq->tq_lowest_id == t->tqent_id) {
+			tq->tq_lowest_id = taskq_lowest_id(tq);
+			ASSERT3S(tq->tq_lowest_id, >, t->tqent_id);
+		}
+
+		/*
+		 * The task_expire() function takes the tq->tq_lock so drop
+		 * drop the lock before synchronously cancelling the timer.
+		 */
+		if (timer_pending(&t->tqent_timer)) {
+			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+			del_timer_sync(&t->tqent_timer);
+			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+		}
+
+		if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
+			task_done(tq, t);
+
+		rc = 0;
+	}
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	if (active) {
+		taskq_wait_id(tq, id);
+		rc = EBUSY;
+	}
+
+	return (rc);
+}
+EXPORT_SYMBOL(taskq_cancel_id);
+
+static int taskq_thread_spawn(taskq_t *tq);
+
+taskqid_t
+taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
+{
+	taskq_ent_t *t;
+	taskqid_t rc = 0;
+
+	ASSERT(tq);
+	ASSERT(func);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TASKQ_ACTIVE))
+		goto out;
+
+	/* Do not queue the task unless there is idle thread for it */
+	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
+	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
+		goto out;
+
+	if ((t = task_alloc(tq, flags)) == NULL)
+		goto out;
+
+	spin_lock(&t->tqent_lock);
+
+	/* Queue to the priority list instead of the pending list */
+	if (flags & TQ_FRONT)
+		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
+	else
+		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
+
+	t->tqent_id = rc = tq->tq_next_id;
+	tq->tq_next_id++;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	t->tqent_taskq = tq;
+	t->tqent_timer.data = 0;
+	t->tqent_timer.function = NULL;
+	t->tqent_timer.expires = 0;
+
+	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
+	spin_unlock(&t->tqent_lock);
+
+	wake_up(&tq->tq_work_waitq);
+out:
+	/* Spawn additional taskq threads if required. */
+	if (tq->tq_nactive == tq->tq_nthreads)
+		(void) taskq_thread_spawn(tq);
+
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	return (rc);
+}
+EXPORT_SYMBOL(taskq_dispatch);
+
+taskqid_t
+taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
+    uint_t flags, clock_t expire_time)
+{
+	taskqid_t rc = 0;
+	taskq_ent_t *t;
+
+	ASSERT(tq);
+	ASSERT(func);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TASKQ_ACTIVE))
+		goto out;
+
+	if ((t = task_alloc(tq, flags)) == NULL)
+		goto out;
+
+	spin_lock(&t->tqent_lock);
+
+	/* Queue to the delay list for subsequent execution */
+	list_add_tail(&t->tqent_list, &tq->tq_delay_list);
+
+	t->tqent_id = rc = tq->tq_next_id;
+	tq->tq_next_id++;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	t->tqent_taskq = tq;
+	t->tqent_timer.data = (unsigned long)t;
+	t->tqent_timer.function = task_expire;
+	t->tqent_timer.expires = (unsigned long)expire_time;
+	add_timer(&t->tqent_timer);
+
+	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
+	spin_unlock(&t->tqent_lock);
+out:
+	/* Spawn additional taskq threads if required. */
+	if (tq->tq_nactive == tq->tq_nthreads)
+		(void) taskq_thread_spawn(tq);
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	return (rc);
+}
+EXPORT_SYMBOL(taskq_dispatch_delay);
+
+void
+taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
+   taskq_ent_t *t)
+{
+	ASSERT(tq);
+	ASSERT(func);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* Taskq being destroyed and all tasks drained */
+	if (!(tq->tq_flags & TASKQ_ACTIVE)) {
+		t->tqent_id = 0;
+		goto out;
+	}
+
+	spin_lock(&t->tqent_lock);
+
+	/*
+	 * Mark it as a prealloc'd task.  This is important
+	 * to ensure that we don't free it later.
+	 */
+	t->tqent_flags |= TQENT_FLAG_PREALLOC;
+
+	/* Queue to the priority list instead of the pending list */
+	if (flags & TQ_FRONT)
+		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
+	else
+		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
+
+	t->tqent_id = tq->tq_next_id;
+	tq->tq_next_id++;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	t->tqent_taskq = tq;
+
+	spin_unlock(&t->tqent_lock);
+
+	wake_up(&tq->tq_work_waitq);
+out:
+	/* Spawn additional taskq threads if required. */
+	if (tq->tq_nactive == tq->tq_nthreads)
+		(void) taskq_thread_spawn(tq);
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+}
+EXPORT_SYMBOL(taskq_dispatch_ent);
+
+int
+taskq_empty_ent(taskq_ent_t *t)
+{
+	return list_empty(&t->tqent_list);
+}
+EXPORT_SYMBOL(taskq_empty_ent);
+
+void
+taskq_init_ent(taskq_ent_t *t)
+{
+	spin_lock_init(&t->tqent_lock);
+	init_waitqueue_head(&t->tqent_waitq);
+	init_timer(&t->tqent_timer);
+	INIT_LIST_HEAD(&t->tqent_list);
+	t->tqent_id = 0;
+	t->tqent_func = NULL;
+	t->tqent_arg = NULL;
+	t->tqent_flags = 0;
+	t->tqent_taskq = NULL;
+}
+EXPORT_SYMBOL(taskq_init_ent);
+
+/*
+ * Return the next pending task, preference is given to tasks on the
+ * priority list which were dispatched with TQ_FRONT.
+ */
+static taskq_ent_t *
+taskq_next_ent(taskq_t *tq)
+{
+	struct list_head *list;
+
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	if (!list_empty(&tq->tq_prio_list))
+		list = &tq->tq_prio_list;
+	else if (!list_empty(&tq->tq_pend_list))
+		list = &tq->tq_pend_list;
+	else
+		return (NULL);
+
+	return (list_entry(list->next, taskq_ent_t, tqent_list));
+}
+
+/*
+ * Spawns a new thread for the specified taskq.
+ */
+static void
+taskq_thread_spawn_task(void *arg)
+{
+	taskq_t *tq = (taskq_t *)arg;
+
+	(void) taskq_thread_create(tq);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	tq->tq_nspawn--;
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+}
+
+/*
+ * Spawn addition threads for dynamic taskqs (TASKQ_DYNMAIC) the current
+ * number of threads is insufficient to handle the pending tasks.  These
+ * new threads must be created by the dedicated dynamic_taskq to avoid
+ * deadlocks between thread creation and memory reclaim.  The system_taskq
+ * which is also a dynamic taskq cannot be safely used for this.
+ */
+static int
+taskq_thread_spawn(taskq_t *tq)
+{
+	int spawning = 0;
+
+	if (!(tq->tq_flags & TASKQ_DYNAMIC))
+		return (0);
+
+	if ((tq->tq_nthreads + tq->tq_nspawn < tq->tq_maxthreads) &&
+	    (tq->tq_flags & TASKQ_ACTIVE)) {
+		spawning = (++tq->tq_nspawn);
+		taskq_dispatch(dynamic_taskq, taskq_thread_spawn_task,
+		    tq, TQ_NOSLEEP);
+	}
+
+	return (spawning);
+}
+
+/*
+ * Threads in a dynamic taskq should only exit once it has been completely
+ * drained and no other threads are actively servicing tasks.  This prevents
+ * threads from being created and destroyed more than is required.
+ *
+ * The first thread is the thread list is treated as the primary thread.
+ * There is nothing special about the primary thread but in order to avoid
+ * all the taskq pids from changing we opt to make it long running.
+ */
+static int
+taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
+{
+	ASSERT(spin_is_locked(&tq->tq_lock));
+
+	if (!(tq->tq_flags & TASKQ_DYNAMIC))
+		return (0);
+
+	if (list_first_entry(&(tq->tq_thread_list), taskq_thread_t,
+	    tqt_thread_list) == tqt)
+		return (0);
+
+	return
+	    ((tq->tq_nspawn == 0) &&	/* No threads are being spawned */
+	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
+	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
+	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
+	    (spl_taskq_thread_dynamic));/* Dynamic taskqs are allowed */
+}
+
+static int
+taskq_thread(void *args)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	sigset_t blocked;
+	taskq_thread_t *tqt = args;
+	taskq_t *tq;
+	taskq_ent_t *t;
+	int seq_tasks = 0;
+
+	ASSERT(tqt);
+	tq = tqt->tqt_tq;
+	current->flags |= PF_NOFREEZE;
+
+	(void) spl_fstrans_mark();
+
+	sigfillset(&blocked);
+	sigprocmask(SIG_BLOCK, &blocked, NULL);
+	flush_signals(current);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/* Immediately exit if more threads than allowed were created. */
+	if (tq->tq_nthreads >= tq->tq_maxthreads)
+		goto error;
+
+	tq->tq_nthreads++;
+	list_add_tail(&tqt->tqt_thread_list, &tq->tq_thread_list);
+	wake_up(&tq->tq_wait_waitq);
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!kthread_should_stop()) {
+
+		if (list_empty(&tq->tq_pend_list) &&
+		    list_empty(&tq->tq_prio_list)) {
+
+			if (taskq_thread_should_stop(tq, tqt)) {
+				wake_up_all(&tq->tq_wait_waitq);
+				break;
+			}
+
+			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
+			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+			schedule();
+			seq_tasks = 0;
+
+			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+			remove_wait_queue(&tq->tq_work_waitq, &wait);
+		} else {
+			__set_current_state(TASK_RUNNING);
+		}
+
+		if ((t = taskq_next_ent(tq)) != NULL) {
+			list_del_init(&t->tqent_list);
+
+			/* In order to support recursively dispatching a
+			 * preallocated taskq_ent_t, tqent_id must be
+			 * stored prior to executing tqent_func. */
+			tqt->tqt_id = t->tqent_id;
+			tqt->tqt_task = t;
+
+			/* We must store a copy of the flags prior to
+			 * servicing the task (servicing a prealloc'd task
+			 * returns the ownership of the tqent back to
+			 * the caller of taskq_dispatch). Thus,
+			 * tqent_flags _may_ change within the call. */
+			tqt->tqt_flags = t->tqent_flags;
+
+			taskq_insert_in_order(tq, tqt);
+			tq->tq_nactive++;
+			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+			/* Perform the requested task */
+			t->tqent_func(t->tqent_arg);
+
+			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+			tq->tq_nactive--;
+			list_del_init(&tqt->tqt_active_list);
+			tqt->tqt_task = NULL;
+
+			/* For prealloc'd tasks, we don't free anything. */
+			if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
+				task_done(tq, t);
+
+			/* When the current lowest outstanding taskqid is
+			 * done calculate the new lowest outstanding id */
+			if (tq->tq_lowest_id == tqt->tqt_id) {
+				tq->tq_lowest_id = taskq_lowest_id(tq);
+				ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
+			}
+
+			/* Spawn additional taskq threads if required. */
+			if ((++seq_tasks) > spl_taskq_thread_sequential &&
+			    taskq_thread_spawn(tq))
+				seq_tasks = 0;
+
+			tqt->tqt_id = 0;
+			tqt->tqt_flags = 0;
+			wake_up_all(&tq->tq_wait_waitq);
+		} else {
+			if (taskq_thread_should_stop(tq, tqt))
+				break;
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+	}
+
+	__set_current_state(TASK_RUNNING);
+	tq->tq_nthreads--;
+	list_del_init(&tqt->tqt_thread_list);
+error:
+	kmem_free(tqt, sizeof (taskq_thread_t));
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	return (0);
+}
+
+static taskq_thread_t *
+taskq_thread_create(taskq_t *tq)
+{
+	static int last_used_cpu = 0;
+	taskq_thread_t *tqt;
+
+	tqt = kmem_alloc(sizeof (*tqt), KM_PUSHPAGE);
+	INIT_LIST_HEAD(&tqt->tqt_thread_list);
+	INIT_LIST_HEAD(&tqt->tqt_active_list);
+	tqt->tqt_tq = tq;
+	tqt->tqt_id = 0;
+
+	tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
+	    "%s", tq->tq_name);
+	if (tqt->tqt_thread == NULL) {
+		kmem_free(tqt, sizeof (taskq_thread_t));
+		return (NULL);
+	}
+
+	if (spl_taskq_thread_bind) {
+		last_used_cpu = (last_used_cpu + 1) % num_online_cpus();
+		kthread_bind(tqt->tqt_thread, last_used_cpu);
+	}
+
+	if (spl_taskq_thread_priority)
+		set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(tq->tq_pri));
+
+	wake_up_process(tqt->tqt_thread);
+
+	return (tqt);
+}
+
+taskq_t *
+taskq_create(const char *name, int nthreads, pri_t pri,
+    int minalloc, int maxalloc, uint_t flags)
+{
+	taskq_t *tq;
+	taskq_thread_t *tqt;
+	int count = 0, rc = 0, i;
+
+	ASSERT(name != NULL);
+	ASSERT(minalloc >= 0);
+	ASSERT(maxalloc <= INT_MAX);
+	ASSERT(!(flags & (TASKQ_CPR_SAFE))); /* Unsupported */
+
+	/* Scale the number of threads using nthreads as a percentage */
+	if (flags & TASKQ_THREADS_CPU_PCT) {
+		ASSERT(nthreads <= 100);
+		ASSERT(nthreads >= 0);
+		nthreads = MIN(nthreads, 100);
+		nthreads = MAX(nthreads, 0);
+		nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
+	}
+
+	tq = kmem_alloc(sizeof (*tq), KM_PUSHPAGE);
+	if (tq == NULL)
+		return (NULL);
+
+	spin_lock_init(&tq->tq_lock);
+	INIT_LIST_HEAD(&tq->tq_thread_list);
+	INIT_LIST_HEAD(&tq->tq_active_list);
+	tq->tq_name       = strdup(name);
+	tq->tq_nactive    = 0;
+	tq->tq_nthreads   = 0;
+	tq->tq_nspawn     = 0;
+	tq->tq_maxthreads = nthreads;
+	tq->tq_pri        = pri;
+	tq->tq_minalloc   = minalloc;
+	tq->tq_maxalloc   = maxalloc;
+	tq->tq_nalloc     = 0;
+	tq->tq_flags      = (flags | TASKQ_ACTIVE);
+	tq->tq_next_id    = 1;
+	tq->tq_lowest_id  = 1;
+	INIT_LIST_HEAD(&tq->tq_free_list);
+	INIT_LIST_HEAD(&tq->tq_pend_list);
+	INIT_LIST_HEAD(&tq->tq_prio_list);
+	INIT_LIST_HEAD(&tq->tq_delay_list);
+	init_waitqueue_head(&tq->tq_work_waitq);
+	init_waitqueue_head(&tq->tq_wait_waitq);
+
+	if (flags & TASKQ_PREPOPULATE) {
+		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+		for (i = 0; i < minalloc; i++)
+			task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
+
+		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+	}
+
+	if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
+		nthreads = 1;
+
+	for (i = 0; i < nthreads; i++) {
+		tqt = taskq_thread_create(tq);
+		if (tqt == NULL)
+			rc = 1;
+		else
+			count++;
+	}
+
+	/* Wait for all threads to be started before potential destroy */
+	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == count);
+
+	if (rc) {
+		taskq_destroy(tq);
+		tq = NULL;
+	}
+
+	return (tq);
+}
+EXPORT_SYMBOL(taskq_create);
+
+void
+taskq_destroy(taskq_t *tq)
+{
+	struct task_struct *thread;
+	taskq_thread_t *tqt;
+	taskq_ent_t *t;
+
+	ASSERT(tq);
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	tq->tq_flags &= ~TASKQ_ACTIVE;
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	/*
+	 * When TASKQ_ACTIVE is clear new tasks may not be added nor may
+	 * new worker threads be spawned for dynamic taskq.
+	 */
+	if (dynamic_taskq != NULL)
+		taskq_wait_outstanding(dynamic_taskq, 0);
+
+	taskq_wait(tq);
+
+	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+
+	/*
+	 * Signal each thread to exit and block until it does.  Each thread
+	 * is responsible for removing itself from the list and freeing its
+	 * taskq_thread_t.  This allows for idle threads to opt to remove
+	 * themselves from the taskq.  They can be recreated as needed.
+	 */
+	while (!list_empty(&tq->tq_thread_list)) {
+		tqt = list_entry(tq->tq_thread_list.next,
+		    taskq_thread_t, tqt_thread_list);
+		thread = tqt->tqt_thread;
+		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+		kthread_stop(thread);
+
+		spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+	}
+
+	while (!list_empty(&tq->tq_free_list)) {
+		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
+
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
+		list_del_init(&t->tqent_list);
+		task_free(tq, t);
+	}
+
+	ASSERT0(tq->tq_nthreads);
+	ASSERT0(tq->tq_nalloc);
+	ASSERT0(tq->tq_nspawn);
+	ASSERT(list_empty(&tq->tq_thread_list));
+	ASSERT(list_empty(&tq->tq_active_list));
+	ASSERT(list_empty(&tq->tq_free_list));
+	ASSERT(list_empty(&tq->tq_pend_list));
+	ASSERT(list_empty(&tq->tq_prio_list));
+	ASSERT(list_empty(&tq->tq_delay_list));
+
+	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+
+	strfree(tq->tq_name);
+	kmem_free(tq, sizeof (taskq_t));
+}
+EXPORT_SYMBOL(taskq_destroy);
+
+int
+spl_taskq_init(void)
+{
+	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
+	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+	if (system_taskq == NULL)
+		return (1);
+
+	dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
+	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
+	if (dynamic_taskq == NULL) {
+		taskq_destroy(system_taskq);
+		return (1);
+	}
+
+	return (0);
+}
+
+void
+spl_taskq_fini(void)
+{
+	taskq_destroy(dynamic_taskq);
+	dynamic_taskq = NULL;
+
+	taskq_destroy(system_taskq);
+	system_taskq = NULL;
+}
diff -Naur spl-0.6.5.7/module/spl/spl-tsd.c spl-0.6.5.7.new/module/spl/spl-tsd.c
--- spl-0.6.5.7/module/spl/spl-tsd.c	2016-05-13 04:46:56.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-tsd.c	2016-08-01 16:43:31.475788506 +0200
@@ -528,6 +528,33 @@
 EXPORT_SYMBOL(tsd_get);
 
 /*
+ * tsd_get_by_thread - get thread specific data for specified thread
+ * @key: lookup key
+ * @thread: thread to lookup
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy().  This
+ * implementation is designed to be fast and scalable, it does not
+ * lock the entire table only a single hash bin.
+ */
+void *
+tsd_get_by_thread(uint_t key, kthread_t *thread)
+{
+	tsd_hash_entry_t *entry;
+
+	ASSERT3P(tsd_hash_table, !=, NULL);
+
+	if ((key == 0) || (key > TSD_KEYS_MAX))
+		return (NULL);
+
+	entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
+	if (entry == NULL)
+		return (NULL);
+
+	return (entry->he_value);
+}
+EXPORT_SYMBOL(tsd_get_by_thread);
+
+/*
  * tsd_create - create thread specific data key
  * @keyp: lookup key address
  * @dtor: destructor called during tsd_destroy() or tsd_exit()
diff -Naur spl-0.6.5.7/module/spl/spl-vnode.c spl-0.6.5.7.new/module/spl/spl-vnode.c
--- spl-0.6.5.7/module/spl/spl-vnode.c	2016-05-13 04:46:56.000000000 +0200
+++ spl-0.6.5.7.new/module/spl/spl-vnode.c	2016-08-01 16:43:34.278796336 +0200
@@ -222,7 +222,6 @@
 	ASSERT(vp->v_file);
 	ASSERT(seg == UIO_SYSSPACE);
 	ASSERT((ioflag & ~FAPPEND) == 0);
-	ASSERT(x2 == RLIM64_INFINITY);
 
 	fp = vp->v_file;
 
@@ -353,7 +352,8 @@
 	if (rc)
 		return (ERR_PTR(rc));
 
-	spl_inode_lock(parent.dentry->d_inode);
+	/* use I_MUTEX_PARENT because vfs_unlink needs it */
+	spl_inode_lock_nested(parent.dentry->d_inode, I_MUTEX_PARENT);
 
 	dentry = lookup_one_len(basename, parent.dentry, len);
 	if (IS_ERR(dentry)) {
@@ -572,6 +572,9 @@
     offset_t offset, void *x6, void *x7)
 {
 	int error = EOPNOTSUPP;
+#ifdef FALLOC_FL_PUNCH_HOLE
+	int fstrans;
+#endif
 
 	if (cmd != F_FREESP || bfp->l_whence != 0)
 		return (EOPNOTSUPP);
@@ -582,12 +585,24 @@
 
 #ifdef FALLOC_FL_PUNCH_HOLE
 	/*
+	 * May enter XFS which generates a warning when PF_FSTRANS is set.
+	 * To avoid this the flag is cleared over vfs_sync() and then reset.
+	 */
+	fstrans = spl_fstrans_check();
+	if (fstrans)
+		current->flags &= ~(PF_FSTRANS);
+
+	/*
 	 * When supported by the underlying file system preferentially
 	 * use the fallocate() callback to preallocate the space.
 	 */
 	error = -spl_filp_fallocate(vp->v_file,
 	    FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
 	    bfp->l_start, bfp->l_len);
+
+	if (fstrans)
+		current->flags |= PF_FSTRANS;
+
 	if (error == 0)
 		return (0);
 #endif
@@ -656,6 +671,19 @@
 
 	fp = file_find(fd, current);
 	if (fp) {
+		lfp = fget(fd);
+		fput(fp->f_file);
+		/*
+		 * areleasef() can cause us to see a stale reference when
+		 * userspace has reused a file descriptor before areleasef()
+		 * has run. fput() the stale reference and replace it. We
+		 * retain the original reference count such that the concurrent
+		 * areleasef() will decrement its reference and terminate.
+		 */
+		if (lfp != fp->f_file) {
+			fp->f_file = lfp;
+			fp->f_vnode->v_file = lfp;
+		}
 		atomic_inc(&fp->f_ref);
 		spin_unlock(&vn_file_lock);
 		return (fp);
diff -Naur spl-0.6.5.7/module/splat/splat-kmem.c spl-0.6.5.7.new/module/splat/splat-kmem.c
--- spl-0.6.5.7/module/splat/splat-kmem.c	2016-05-13 04:46:57.000000000 +0200
+++ spl-0.6.5.7.new/module/splat/splat-kmem.c	2016-08-01 16:43:22.175762529 +0200
@@ -590,6 +590,9 @@
 	kmem_cache_data_t **kcd = NULL;
 	int i, rc = 0, objs = 0;
 
+	/* Limit size for low memory machines (1/128 of memory) */
+	size = MIN(size, (physmem * PAGE_SIZE) >> 7);
+
 	splat_vprint(file, name,
 	    "Testing size=%d, align=%d, flags=0x%04x\n",
 	    size, align, flags);
@@ -619,7 +622,7 @@
 	 * it to a single slab for the purposes of this test.
 	 */
 #ifdef _LP64
-	objs = SPL_KMEM_CACHE_OBJ_PER_SLAB * 4;
+	objs = kcp->kcp_cache->skc_slab_objs * 4;
 #else
 	objs = 1;
 #endif
diff -Naur spl-0.6.5.7/module/splat/splat-rwlock.c spl-0.6.5.7.new/module/splat/splat-rwlock.c
--- spl-0.6.5.7/module/splat/splat-rwlock.c	2016-05-13 04:46:57.000000000 +0200
+++ spl-0.6.5.7.new/module/splat/splat-rwlock.c	2016-08-01 16:43:34.280796341 +0200
@@ -55,8 +55,12 @@
 #define SPLAT_RWLOCK_TEST5_DESC		"Write downgrade"
 
 #define SPLAT_RWLOCK_TEST6_ID		0x0706
-#define SPLAT_RWLOCK_TEST6_NAME		"rw_tryupgrade"
-#define SPLAT_RWLOCK_TEST6_DESC		"Read upgrade"
+#define SPLAT_RWLOCK_TEST6_NAME		"rw_tryupgrade-1"
+#define SPLAT_RWLOCK_TEST6_DESC		"rwsem->count value"
+
+#define SPLAT_RWLOCK_TEST7_ID		0x0707
+#define SPLAT_RWLOCK_TEST7_NAME		"rw_tryupgrade-2"
+#define SPLAT_RWLOCK_TEST7_DESC		"Read upgrade"
 
 #define SPLAT_RWLOCK_TEST_MAGIC		0x115599DDUL
 #define SPLAT_RWLOCK_TEST_NAME		"rwlock_test"
@@ -580,19 +584,65 @@
 	splat_init_rw_priv(rwp, file);
 
 	rw_enter(&rwp->rw_rwlock, RW_READER);
-	if (!RW_READ_HELD(&rwp->rw_rwlock)) {
+	if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
+	    SPL_RWSEM_SINGLE_READER_VALUE) {
+		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+		             "We assumed single reader rwsem->count "
+			     "should be %ld, but is %ld\n",
+			     SPL_RWSEM_SINGLE_READER_VALUE,
+			     RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
+		rc = -ENOLCK;
+		goto out;
+	}
+	rw_exit(&rwp->rw_rwlock);
+
+	rw_enter(&rwp->rw_rwlock, RW_WRITER);
+	if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
+	    SPL_RWSEM_SINGLE_WRITER_VALUE) {
 		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+		             "We assumed single writer rwsem->count "
+			     "should be %ld, but is %ld\n",
+			     SPL_RWSEM_SINGLE_WRITER_VALUE,
+			     RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
+		rc = -ENOLCK;
+		goto out;
+	}
+	rc = 0;
+	splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
+		     "rwsem->count same as we assumed\n");
+out:
+	rw_exit(&rwp->rw_rwlock);
+	rw_destroy(&rwp->rw_rwlock);
+	kfree(rwp);
+
+	return rc;
+}
+
+static int
+splat_rwlock_test7(struct file *file, void *arg)
+{
+	rw_priv_t *rwp;
+	int rc;
+
+	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
+	if (rwp == NULL)
+		return -ENOMEM;
+
+	splat_init_rw_priv(rwp, file);
+
+	rw_enter(&rwp->rw_rwlock, RW_READER);
+	if (!RW_READ_HELD(&rwp->rw_rwlock)) {
+		splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
 		             "rwlock should be read lock: %d\n",
 			     RW_READ_HELD(&rwp->rw_rwlock));
 		rc = -ENOLCK;
 		goto out;
 	}
 
-#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
 	/* With one reader upgrade should never fail. */
 	rc = rw_tryupgrade(&rwp->rw_rwlock);
 	if (!rc) {
-		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+		splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
 			     "rwlock failed upgrade from reader: %d\n",
 			     RW_READ_HELD(&rwp->rw_rwlock));
 		rc = -ENOLCK;
@@ -600,7 +650,7 @@
 	}
 
 	if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) {
-		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should "
+		splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "rwlock should "
 			   "have 0 (not %d) reader and 1 (not %d) writer\n",
 			   RW_READ_HELD(&rwp->rw_rwlock),
 			   RW_WRITE_HELD(&rwp->rw_rwlock));
@@ -608,13 +658,8 @@
 	}
 
 	rc = 0;
-	splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
+	splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "%s",
 		     "rwlock properly upgraded\n");
-#else
-	rc = 0;
-	splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
-		     "rw_tryupgrade() is disabled for this arch\n");
-#endif
 out:
 	rw_exit(&rwp->rw_rwlock);
 	rw_destroy(&rwp->rw_rwlock);
@@ -652,6 +697,8 @@
 		      SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
 	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
 		      SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
+	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
+		      SPLAT_RWLOCK_TEST7_ID, splat_rwlock_test7);
 
 	return sub;
 }
@@ -660,6 +707,7 @@
 splat_rwlock_fini(splat_subsystem_t *sub)
 {
 	ASSERT(sub);
+	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST7_ID);
 	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
 	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
 	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);
diff -Naur spl-0.6.5.7/module/splat/splat-taskq.c spl-0.6.5.7.new/module/splat/splat-taskq.c
--- spl-0.6.5.7/module/splat/splat-taskq.c	2016-05-13 04:46:57.000000000 +0200
+++ spl-0.6.5.7.new/module/splat/splat-taskq.c	2016-08-01 16:43:34.280796341 +0200
@@ -1040,11 +1040,12 @@
 
 	error = (tq_arg->depth == SPLAT_TASKQ_DEPTH_MAX ? 0 : -EINVAL);
 
+	splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
+	              "Taskq '%s' destroying\n", tq_arg->name);
+
 	kmem_free(tqe, sizeof (taskq_ent_t));
 	kmem_free(tq_arg, sizeof (splat_taskq_arg_t));
 
-	splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
-	              "Taskq '%s' destroying\n", tq_arg->name);
 	taskq_destroy(tq);
 
 	return (error);
diff -Naur spl-0.6.5.7/rpm/generic/spl-dkms.spec.in.orig spl-0.6.5.7.new/rpm/generic/spl-dkms.spec.in.orig
--- spl-0.6.5.7/rpm/generic/spl-dkms.spec.in.orig	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/rpm/generic/spl-dkms.spec.in.orig	2016-08-01 16:43:14.783741881 +0200
@@ -0,0 +1,71 @@
+%{?!packager: %define packager Brian Behlendorf <behlendorf1@llnl.gov>}
+
+%define module  @PACKAGE@
+%define mkconf  scripts/dkms.mkconf
+
+Name:           %{module}-dkms
+
+Version:        @VERSION@
+Release:        @RELEASE@%{?dist}
+Summary:        Kernel module(s) (dkms)
+
+Group:          System Environment/Kernel
+License:        GPLv2+
+URL:            http://zfsonlinux.org/
+Source0:        %{module}-%{version}.tar.gz
+BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
+BuildArch:      noarch
+
+Requires:       dkms >= 2.2.0.2
+Requires:       gcc, make, perl
+Requires:       kernel-devel
+Provides:       %{module}-kmod = %{version}
+
+%description
+This package contains the dkms kernel modules required to emulate
+several interfaces provided by the Solaris kernel.
+
+%prep
+%setup -q -n %{module}-%{version}
+
+%build
+%{mkconf} -n %{module} -v %{version} -f dkms.conf
+
+%install
+if [ "$RPM_BUILD_ROOT" != "/" ]; then
+    rm -rf $RPM_BUILD_ROOT
+fi
+mkdir -p $RPM_BUILD_ROOT/usr/src/
+cp -rf ${RPM_BUILD_DIR}/%{module}-%{version} $RPM_BUILD_ROOT/usr/src/
+
+%clean
+if [ "$RPM_BUILD_ROOT" != "/" ]; then
+    rm -rf $RPM_BUILD_ROOT
+fi
+
+%files
+%defattr(-,root,root)
+/usr/src/%{module}-%{version}
+
+%post
+for POSTINST in /usr/lib/dkms/common.postinst; do
+    if [ -f $POSTINST ]; then
+        $POSTINST %{module} %{version}
+        exit $?
+    fi
+    echo "WARNING: $POSTINST does not exist."
+done
+echo -e "ERROR: DKMS version is too old and %{module} was not"
+echo -e "built with legacy DKMS support."
+echo -e "You must either rebuild %{module} with legacy postinst"
+echo -e "support or upgrade DKMS to a more current version."
+exit 1
+
+%preun
+echo -e "Uninstall of %{module} module (version %{version}) beginning:"
+dkms remove -m %{module} -v %{version} --all --rpm_safe_upgrade
+exit 0
+
+%changelog
+* %(date "+%a %b %d %Y") %packager %{version}-%{release}
+- Automatic build by DKMS
diff -Naur spl-0.6.5.7/rpm/generic/spl.spec.in spl-0.6.5.7.new/rpm/generic/spl.spec.in
--- spl-0.6.5.7/rpm/generic/spl.spec.in	2016-05-13 04:06:38.000000000 +0200
+++ spl-0.6.5.7.new/rpm/generic/spl.spec.in	2016-08-01 16:43:23.436766051 +0200
@@ -28,6 +28,7 @@
 
 %files
 %doc AUTHORS COPYING DISCLAIMER
+%{_bindir}/*
 %{_sbindir}/*
 %{_mandir}/man1/*
 %{_mandir}/man5/*
diff -Naur spl-0.6.5.7/rpm/generic/spl.spec.in.orig spl-0.6.5.7.new/rpm/generic/spl.spec.in.orig
--- spl-0.6.5.7/rpm/generic/spl.spec.in.orig	1970-01-01 01:00:00.000000000 +0100
+++ spl-0.6.5.7.new/rpm/generic/spl.spec.in.orig	2016-05-13 04:06:38.000000000 +0200
@@ -0,0 +1,73 @@
+Name:           @PACKAGE@
+Version:        @VERSION@
+Release:        @RELEASE@%{?dist}
+Summary:        Commands to control the kernel modules
+
+Group:          System Environment/Kernel
+License:        GPLv2+
+URL:            http://zfsonlinux.org/
+Source0:        %{name}-%{version}.tar.gz
+BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
+Requires:       %{name}-kmod = %{version}
+Provides:       %{name}-kmod-common = %{version}
+
+%description
+This package contains the commands to verify the SPL
+kernel modules are functioning properly.
+
+%prep
+%setup -q
+
+%build
+%configure --with-config=user
+make %{?_smp_mflags}
+
+%install
+%{__rm} -rf $RPM_BUILD_ROOT
+make install DESTDIR=%{?buildroot}
+
+%files
+%doc AUTHORS COPYING DISCLAIMER
+%{_sbindir}/*
+%{_mandir}/man1/*
+%{_mandir}/man5/*
+
+%changelog
+* Thu May 12 2016 Ned Bass <bass6@llnl.gov> - 0.6.5.7-1
+- Fix PPC build failure zfsonlinux/spl#516
+* Tue Mar 22 2016 Ned Bass <bass6@llnl.gov> - 0.6.5.6-1
+- Remove artificial architecture restrictions in packaging
+- Add support for s390[x] zfsonlinux/spl#537
+* Wed Mar 9 2016 Ned Bass <bass6@llnl.gov> - 0.6.5.5-1
+- Linux 4.5 compatibility zfsonlinux/spl#524
+- Create working debuginfo packages on Red Hat zfsonlinux/zfs#4224
+- Allow copy-builtin to run multiple times zfsonlinux/spl#526
+- Use safer flags for in-kernel memory allocations zfsonlinux/spl#523
+- Fix potential deadlock in cv_wait() zfsonlinux/zfs#4106
+- Fix livelock in shrinker zfsonlinux/zfs#3936
+* Fri Jan  8 2016 Ned Bass <bass6@llnl.gov> - 0.6.5.4-1
+- Build fixes on SPARC and some kernels
+- Fix taskq dynamic spawning deadlock
+- Fix builtin kernel builds
+- Fix crash due to overflow in P2ROUNDUP macro
+- Fix deadlock during direct memory reclaim
+* Tue Oct 13 2015 Ned Bass <bass6@llnl.gov> - 0.6.5.3-1
+- Fix CPU hotplug zfsonlinux/spl#482
+- Disable dynamic taskqs by default to avoid deadlock zfsonlinux/spl#484
+* Tue Sep 29 2015 Ned Bass <bass6@llnl.gov> - 0.6.5.2-1
+- Released 0.6.5.2-1
+- Fix PAX Patch/Grsec SLAB_USERCOPY panic zfsonlinux/zfs#3796
+- Always remove during dkms uninstall/update zfsonlinux/spl#476
+* Thu Sep 19 2015 Ned Bass <bass6@llnl.gov> - 0.6.5.1-1
+- Released 0.6.5.1-1, no changes from spl-0.6.5
+* Thu Sep 10 2015 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.5-1
+- Released 0.6.5-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.6.5
+* Wed Apr  8 2015 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.4-1
+- Released 0.6.4-1
+* Thu Jun 12 2014 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.3-1
+- Released 0.6.3-1
+* Wed Aug 21 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.2-1
+- Released 0.6.2-1
+* Fri Mar 22 2013 Brian Behlendorf <behlendorf1@llnl.gov> - 0.6.1-1
+- First official stable release.