(defn read-dataset
"
Returns a dataset read from a file or a URL.
Options:
:delim (default \\,), other options (\\tab \\space \\| etc)
:quote (default \\\") character used for quoting strings
:skip (default 0) the number of lines to skip at the top of the file.
:header (default false) indicates the file has a header line
:compress-delim (default true if delim = \\space, false otherwise) means
compress multiple adjacent delimiters into a single delimiter
"
([filename & options]
(let [opts (when options (apply assoc {} options))
delim (or (:delim opts) \,) ; comma delim default
quote-char (or (:quote opts) \")
keyword-headers? (or (:keyword-headers opts) true)
skip (or (:skip opts) 0)
header? (or (:header opts) false)
compress-delim? (or (:compress-delim opts)
(if (= delim \space) true false))]
(with-open [reader ^CSVReader (CSVReader.
(get-input-reader filename)
delim
quote-char
skip)]
(let [data-lines (map seq (seq (.readAll reader)))
raw-data (filter #(> (count (filter (fn [field] (not= field "")) %)) 0)
(if compress-delim?
(map (fn [line] (filter #(not= % "") line)) data-lines)
data-lines))
parsed-data (into [] (map (fn [row] (into [] (map parse-string row)))
raw-data))]
(if header?
; have header row
(dataset (if keyword-headers?
(map keyword (first parsed-data))
(first parsed-data))
(rest parsed-data))
; no header row so build a default one
(let [col-count (count (first parsed-data))
col-names (apply vector (map str
(repeat col-count "col")
(iterate inc 0)))]
(dataset (if keyword-headers?
(map keyword col-names)
col-names)
parsed-data))))))))
Used in 0 other vars
Comments top
No comments for read-dataset. Log in to add a comment.