If provided a two arguments (n x), it returns a list of n permutations

of x. If provided three (n x y) arguments, returns a list with two with n permutations of

each arguments, where each permutation is drawn from the pooled arguments.

Arguments:

n -- number of randomized versions of the original two groups to return

x -- group 1

y -- (default nil) group 2

Examples:

(use '(incanter core stats))

(sample-permutations 10 (range 10))

(sample-permutations 10 (range 10) (range 10 20))

;; extended example with plant-growth data

(use '(incanter core stats datasets charts))

;; load the plant-growth dataset

(def data (to-matrix (get-dataset :plant-growth)))

;; break the first column of the data into groups based on treatment (second column).

(def groups (group-on data 1 :cols 0))

;; define a function for the statistic of interest

(defn means-diff [x y] (minus (mean x) (mean y)))

;; calculate the difference in sample means between the two groups

(def samp-mean-diff (means-diff (first groups) (second groups))) ;; 0.371

;; create 500 permuted versions of the original two groups

(def permuted-groups (sample-permutations 1000 (first groups) (second groups)))

;; calculate the difference of means of the 500 samples

(def permuted-means-diffs1 (map means-diff (first permuted-groups) (second permuted-groups)))

;; use an indicator function that returns 1 when the randomized means diff is greater

;; than the original sample mean, and zero otherwise. Then take the mean of this sequence

;; of ones and zeros. That is the proportion of times you would see a value more extreme

;; than the sample mean (i.e. the p-value).

(mean (indicator #(> % samp-mean-diff) permuted-means-diffs1)) ;; 0.088

;; calculate the 95% confidence interval of the null hypothesis. If the

;; sample difference in means is outside of this range, that is evidence

;; that the two means are statistically significantly different.

(quantile permuted-means-diffs1 :probs [0.025 0.975]) ;; (-0.606 0.595)

;; Plot a histogram of the permuted-means-diffs using the density option,

;; instead of the default frequency, and then add a normal pdf curve with

;; the mean and sd of permuted-means-diffs data for a visual comparison.

(doto (histogram permuted-means-diffs1 :density true)

(add-lines (range -1 1 0.01) (pdf-normal (range -1 1 0.01)

:mean (mean permuted-means-diffs1)

:sd (sd permuted-means-diffs1)))

view)

;; compare the means of treatment 2 and control

(def permuted-groups (sample-permutations 1000 (first groups) (last groups)))

(def permuted-means-diffs2 (map means-diff (first permuted-groups) (second permuted-groups)))

(def samp-mean-diff (means-diff (first groups) (last groups))) ;; -0.4939

(mean (indicator #(< % samp-mean-diff) permuted-means-diffs2)) ;; 0.022

(quantile permuted-means-diffs2 :probs [0.025 0.975]) ;; (-0.478 0.466)

;; compare the means of treatment 1 and treatment 2

(def permuted-groups (sample-permutations 1000 (second groups) (last groups)))

(def permuted-means-diffs3 (map means-diff (first permuted-groups) (second permuted-groups)))

(def samp-mean-diff (means-diff (second groups) (last groups))) ;; -0.865

(mean (indicator #(< % samp-mean-diff) permuted-means-diffs3)) ;; 0.002

(quantile permuted-means-diffs3 :probs [0.025 0.975]) ;; (-0.676 0.646)

(doto (box-plot permuted-means-diffs1)

(add-box-plot permuted-means-diffs2)

(add-box-plot permuted-means-diffs3)

view)

Further Reading:

http://en.wikipedia.org/wiki/Resampling_(statistics)

## Comments top

## No comments for

sample-permutations. Log in to add a comment.