Skip to content

Commit 054d182

Browse files
committed
Refactor a bunch, bump version
1 parent 65484c6 commit 054d182

File tree

3 files changed

+89
-46
lines changed

3 files changed

+89
-46
lines changed

README.markdown

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,14 @@ Further examples to be provided at a later time.
8181

8282
### TODO - Next Steps
8383

84+
* Refactor all operations to use iterCSV as the basic building block --
85+
in progress.
86+
* The CSVeable typeclass can be refactored to have a more minimal definition.
87+
* Get mapCSVFiles out of the typeclass if possible.
8488
* Need to think about specializing an Exception type for the library and
8589
properly notifying the user when parsing-related problems occur.
8690
* Some operations can be further broken down to their atoms, increasing the
8791
flexibility of the library.
88-
* The CSVeable typeclass can be refactored to have a more minimal definition.
8992
* Operating on Text in addition to ByteString would be phenomenal.
9093
* A test-suite needs to be added.
9194
* Some benchmarking would be nice.

csv-enumerator.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Name: csv-enumerator
2-
Version: 0.8.2
2+
Version: 0.9.0
33
Synopsis: A flexible, fast, enumerator-based CSV parser library for Haskell.
44
Homepage: http://github.com/ozataman/csv-enumerator
55
License: BSD3

src/Data/CSV/Enumerator.hs

Lines changed: 84 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ module Data.CSV.Enumerator
1010
, MapRow
1111

1212
, CSVeable(..)
13-
1413
, ParsedRow(..)
1514

1615
-- * CSV Setttings
@@ -22,12 +21,18 @@ module Data.CSV.Enumerator
2221
, writeCSVFile
2322
, appendCSVFile
2423

25-
-- * Folding Over CSV Files
26-
-- | These enumerators generalize the map* family of functions with a running accumulator.
24+
-- * Generic Folds Over CSV Files
25+
-- | These operations enable you to do whatever you want to CSV files;
26+
-- including interleaved IO, etc.
27+
, foldCSVFile
2728
, CSVAction
2829
, funToIter
2930
, funToIterIO
3031

32+
-- * Mapping Over CSV Files
33+
, mapCSVFile
34+
, mapIntoHandle
35+
3136
-- * Primitive Iteratees
3237
, collectRows
3338
, outputRowIter
@@ -44,7 +49,7 @@ where
4449

4550
import Control.Applicative hiding (many)
4651
import Control.Exception (bracket, SomeException)
47-
import Control.Monad (mzero, mplus, foldM, when)
52+
import Control.Monad (mzero, mplus, foldM, when, liftM)
4853
import Control.Monad.IO.Class (liftIO, MonadIO)
4954
import qualified Data.ByteString as B
5055
import qualified Data.ByteString.Char8 as B8
@@ -81,40 +86,14 @@ class CSVeable r where
8186
-> a
8287
-> E.Iteratee B.ByteString IO a
8388

84-
-- | Iteratee to push rows into a given file
85-
fileSink :: CSVSettings
86-
-> FilePath
87-
-> (Maybe Handle, Int)
88-
-> ParsedRow r
89-
-> E.Iteratee B.ByteString IO (Maybe Handle, Int)
90-
91-
-- | Open & fold over the CSV file. Processing starts on row 2 for MapRow
92-
-- instance to use first row as column headers.
93-
foldCSVFile :: FilePath -- ^ File to open as a CSV file
94-
-> CSVSettings -- ^ CSV settings to use on the input file
95-
-> CSVAction r a -- ^ Fold action
96-
-> a -- ^ Initial accumulator
97-
-> IO (Either SomeException a) -- ^ Error or the resulting accumulator
98-
99-
-- | Take a CSV file, apply function to each of its rows and save the
100-
-- resulting rows into a new file.
101-
--
102-
-- Each row is simply a list of fields.
103-
mapCSVFile :: FilePath -- ^ Input file
104-
-> CSVSettings -- ^ CSV Settings
105-
-> (r -> [r]) -- ^ A function to map a row onto rows
106-
-> FilePath -- ^ Output file
107-
-> IO (Either SomeException Int) -- ^ Number of rows processed
108-
mapCSVFile fi s f fo = do
109-
res <- foldCSVFile fi s iter (Nothing, 0)
110-
return $ snd `fmap` res
111-
where
112-
iter !acc (ParsedRow (Just !r)) = foldM chain acc (f r)
113-
iter !acc x = fileSink s fo acc x
114-
115-
chain !acc !r = singleSink r acc
11689

117-
singleSink !x !acc = fileSink s fo acc (ParsedRow (Just x))
90+
-- | Iteratee to push rows into a given file
91+
fileSink
92+
:: CSVSettings
93+
-> FilePath
94+
-> (Maybe Handle, Int)
95+
-> ParsedRow r
96+
-> E.Iteratee B.ByteString IO (Maybe Handle, Int)
11897

11998

12099
----------------------------------------------------------------------------
@@ -152,11 +131,6 @@ instance CSVeable Row where
152131
comboIter acc' = procRow acc' >>= loop
153132

154133

155-
foldCSVFile fp csvs f acc = E.run iter
156-
where
157-
iter = enumFile fp $$ iterCSV csvs f acc
158-
159-
160134
fileSink csvs fo = iter
161135
where
162136
iter :: (Maybe Handle, Int)
@@ -237,8 +211,6 @@ instance CSVeable MapRow where
237211

238212
toMapCSV !headers !fs = yield (fs >>= (Just . M.fromList . zip headers)) (E.Chunks [])
239213

240-
foldCSVFile fp csvs f !acc = E.run (enumFile fp $$ iterCSV csvs f acc)
241-
242214

243215
fileSink s fo = mapIter
244216
where
@@ -298,6 +270,42 @@ instance CSVeable MapRow where
298270
return (Just oh, i+1)
299271

300272

273+
------------------------------------------------------------------------------
274+
-- | Open & fold over the CSV file.
275+
--
276+
-- Processing starts on row 2 for MapRow instance to use first row as column
277+
-- headers.
278+
foldCSVFile
279+
:: (CSVeable r)
280+
=> FilePath -- ^ File to open as a CSV file
281+
-> CSVSettings -- ^ CSV settings to use on the input file
282+
-> CSVAction r a -- ^ Fold action
283+
-> a -- ^ Initial accumulator
284+
-> IO (Either SomeException a) -- ^ Error or the resulting accumulator
285+
foldCSVFile fp csvs f acc = E.run (enumFile fp $$ iterCSV csvs f acc)
286+
287+
288+
------------------------------------------------------------------------------
289+
-- | Take a CSV file, apply function to each of its rows and save the
290+
-- resulting rows into a new file.
291+
--
292+
-- Each row is simply a list of fields.
293+
mapCSVFile
294+
:: (CSVeable r)
295+
=> FilePath -- ^ Input file
296+
-> CSVSettings -- ^ CSV Settings
297+
-> (r -> [r]) -- ^ A function to map a row onto rows
298+
-> FilePath -- ^ Output file
299+
-> IO (Either SomeException Int) -- ^ Number of rows processed
300+
mapCSVFile fi s f fo = do
301+
res <- foldCSVFile fi s iter (Nothing, 0)
302+
return $ snd `fmap` res
303+
where
304+
iter !acc (ParsedRow (Just !r)) = foldM chain acc (f r)
305+
iter !acc x = fileSink s fo acc x
306+
chain !acc !r = fileSink s fo acc (ParsedRow (Just r))
307+
308+
301309
------------------------------------------------------------------------------
302310
readCSVFile :: (CSVeable r) => CSVSettings -- ^ CSV settings
303311
-> FilePath -- ^ FilePath
@@ -431,6 +439,38 @@ funToIter f = iterf
431439
iterf !acc r = yield (f acc r) (E.Chunks [])
432440

433441

442+
443+
------------------------------------------------------------------------------
444+
-- | Create an iteratee that can map over a CSV stream and output results to
445+
-- a handle in an interleaved fashion.
446+
--
447+
-- Example use: Let's map over a CSV file coming in through 'stdin' and push
448+
-- results to 'stdout'.
449+
--
450+
-- > f r = return [r] -- a function that just returns the given row
451+
--
452+
-- > E.run (E.enumHandle 4096 stdin $$ mapIntoHandle defCSVSettings True stdout f)
453+
mapIntoHandle
454+
:: (CSVeable r)
455+
=> CSVSettings -- ^ 'CSVSettings'
456+
-> Bool -- ^ Whether to write headers
457+
-> Handle -- ^ Handle to stream results
458+
-> (r -> IO [r]) -- ^ Map function
459+
-> E.Iteratee ByteString IO Int -- ^ Resulting Iteratee
460+
mapIntoHandle csvs outh h f = do
461+
snd `liftM` iterCSV csvs (funToIterIO f') (False,0)
462+
where
463+
f' acc EOF = return acc
464+
f' acc (ParsedRow Nothing) = return acc
465+
f' (False, _) r'@(ParsedRow (Just r)) = do
466+
when outh $ writeHeaders csvs h [r]
467+
f' (True, 0) r'
468+
f' (True, !i) (ParsedRow (Just r)) = do
469+
rs <- f r
470+
outputRows csvs h rs
471+
return (True, i+1)
472+
473+
434474
------------------------------------------------------------------------------
435475
-- | Just collect all rows into an array. This will cancel out the incremental
436476
-- nature of this library.

0 commit comments

Comments
 (0)