If provided a two arguments (n x), it returns a list of n permutations
of x. If provided three (n x y) arguments, returns a list with two with n permutations of
each arguments, where each permutation is drawn from the pooled arguments.
Arguments:
n -- number of randomized versions of the original two groups to return
x -- group 1
y -- (default nil) group 2
Examples:
(use '(incanter core stats))
(sample-permutations 10 (range 10))
(sample-permutations 10 (range 10) (range 10 20))
;; extended example with plant-growth data
(use '(incanter core stats datasets charts))
;; load the plant-growth dataset
(def data (to-matrix (get-dataset :plant-growth)))
;; break the first column of the data into groups based on treatment (second column).
(def groups (group-on data 1 :cols 0))
;; define a function for the statistic of interest
(defn means-diff [x y] (minus (mean x) (mean y)))
;; calculate the difference in sample means between the two groups
(def samp-mean-diff (means-diff (first groups) (second groups))) ;; 0.371
;; create 500 permuted versions of the original two groups
(def permuted-groups (sample-permutations 1000 (first groups) (second groups)))
;; calculate the difference of means of the 500 samples
(def permuted-means-diffs1 (map means-diff (first permuted-groups) (second permuted-groups)))
;; use an indicator function that returns 1 when the randomized means diff is greater
;; than the original sample mean, and zero otherwise. Then take the mean of this sequence
;; of ones and zeros. That is the proportion of times you would see a value more extreme
;; than the sample mean (i.e. the p-value).
(mean (indicator #(> % samp-mean-diff) permuted-means-diffs1)) ;; 0.088
;; calculate the 95% confidence interval of the null hypothesis. If the
;; sample difference in means is outside of this range, that is evidence
;; that the two means are statistically significantly different.
(quantile permuted-means-diffs1 :probs [0.025 0.975]) ;; (-0.606 0.595)
;; Plot a histogram of the permuted-means-diffs using the density option,
;; instead of the default frequency, and then add a normal pdf curve with
;; the mean and sd of permuted-means-diffs data for a visual comparison.
(doto (histogram permuted-means-diffs1 :density true)
(add-lines (range -1 1 0.01) (pdf-normal (range -1 1 0.01)
:mean (mean permuted-means-diffs1)
:sd (sd permuted-means-diffs1)))
view)
;; compare the means of treatment 2 and control
(def permuted-groups (sample-permutations 1000 (first groups) (last groups)))
(def permuted-means-diffs2 (map means-diff (first permuted-groups) (second permuted-groups)))
(def samp-mean-diff (means-diff (first groups) (last groups))) ;; -0.4939
(mean (indicator #(< % samp-mean-diff) permuted-means-diffs2)) ;; 0.022
(quantile permuted-means-diffs2 :probs [0.025 0.975]) ;; (-0.478 0.466)
;; compare the means of treatment 1 and treatment 2
(def permuted-groups (sample-permutations 1000 (second groups) (last groups)))
(def permuted-means-diffs3 (map means-diff (first permuted-groups) (second permuted-groups)))
(def samp-mean-diff (means-diff (second groups) (last groups))) ;; -0.865
(mean (indicator #(< % samp-mean-diff) permuted-means-diffs3)) ;; 0.002
(quantile permuted-means-diffs3 :probs [0.025 0.975]) ;; (-0.676 0.646)
(doto (box-plot permuted-means-diffs1)
(add-box-plot permuted-means-diffs2)
(add-box-plot permuted-means-diffs3)
view)
Further Reading:
http://en.wikipedia.org/wiki/Resampling_(statistics)
Comments top
No comments for sample-permutations. Log in to add a comment.