@@ -10,7 +10,6 @@ module Data.CSV.Enumerator
1010 , MapRow
1111
1212 , CSVeable (.. )
13-
1413 , ParsedRow (.. )
1514
1615 -- * CSV Setttings
@@ -22,12 +21,18 @@ module Data.CSV.Enumerator
2221 , writeCSVFile
2322 , appendCSVFile
2423
25- -- * Folding Over CSV Files
26- -- | These enumerators generalize the map* family of functions with a running accumulator.
24+ -- * Generic Folds Over CSV Files
25+ -- | These operations enable you to do whatever you want to CSV files;
26+ -- including interleaved IO, etc.
27+ , foldCSVFile
2728 , CSVAction
2829 , funToIter
2930 , funToIterIO
3031
32+ -- * Mapping Over CSV Files
33+ , mapCSVFile
34+ , mapIntoHandle
35+
3136 -- * Primitive Iteratees
3237 , collectRows
3338 , outputRowIter
4449
4550import Control.Applicative hiding (many )
4651import Control.Exception (bracket , SomeException )
47- import Control.Monad (mzero , mplus , foldM , when )
52+ import Control.Monad (mzero , mplus , foldM , when , liftM )
4853import Control.Monad.IO.Class (liftIO , MonadIO )
4954import qualified Data.ByteString as B
5055import qualified Data.ByteString.Char8 as B8
@@ -81,40 +86,14 @@ class CSVeable r where
8186 -> a
8287 -> E. Iteratee B. ByteString IO a
8388
84- -- | Iteratee to push rows into a given file
85- fileSink :: CSVSettings
86- -> FilePath
87- -> (Maybe Handle , Int )
88- -> ParsedRow r
89- -> E. Iteratee B. ByteString IO (Maybe Handle , Int )
90-
91- -- | Open & fold over the CSV file. Processing starts on row 2 for MapRow
92- -- instance to use first row as column headers.
93- foldCSVFile :: FilePath -- ^ File to open as a CSV file
94- -> CSVSettings -- ^ CSV settings to use on the input file
95- -> CSVAction r a -- ^ Fold action
96- -> a -- ^ Initial accumulator
97- -> IO (Either SomeException a ) -- ^ Error or the resulting accumulator
98-
99- -- | Take a CSV file, apply function to each of its rows and save the
100- -- resulting rows into a new file.
101- --
102- -- Each row is simply a list of fields.
103- mapCSVFile :: FilePath -- ^ Input file
104- -> CSVSettings -- ^ CSV Settings
105- -> (r -> [r ]) -- ^ A function to map a row onto rows
106- -> FilePath -- ^ Output file
107- -> IO (Either SomeException Int ) -- ^ Number of rows processed
108- mapCSVFile fi s f fo = do
109- res <- foldCSVFile fi s iter (Nothing , 0 )
110- return $ snd `fmap` res
111- where
112- iter ! acc (ParsedRow (Just ! r)) = foldM chain acc (f r)
113- iter ! acc x = fileSink s fo acc x
114-
115- chain ! acc ! r = singleSink r acc
11689
117- singleSink ! x ! acc = fileSink s fo acc (ParsedRow (Just x))
90+ -- | Iteratee to push rows into a given file
91+ fileSink
92+ :: CSVSettings
93+ -> FilePath
94+ -> (Maybe Handle , Int )
95+ -> ParsedRow r
96+ -> E. Iteratee B. ByteString IO (Maybe Handle , Int )
11897
11998
12099 ----------------------------------------------------------------------------
@@ -152,11 +131,6 @@ instance CSVeable Row where
152131 comboIter acc' = procRow acc' >>= loop
153132
154133
155- foldCSVFile fp csvs f acc = E. run iter
156- where
157- iter = enumFile fp $$ iterCSV csvs f acc
158-
159-
160134 fileSink csvs fo = iter
161135 where
162136 iter :: (Maybe Handle , Int )
@@ -237,8 +211,6 @@ instance CSVeable MapRow where
237211
238212 toMapCSV ! headers ! fs = yield (fs >>= (Just . M. fromList . zip headers)) (E. Chunks [] )
239213
240- foldCSVFile fp csvs f ! acc = E. run (enumFile fp $$ iterCSV csvs f acc)
241-
242214
243215 fileSink s fo = mapIter
244216 where
@@ -298,6 +270,42 @@ instance CSVeable MapRow where
298270 return (Just oh, i+ 1 )
299271
300272
273+ ------------------------------------------------------------------------------
274+ -- | Open & fold over the CSV file.
275+ --
276+ -- Processing starts on row 2 for MapRow instance to use first row as column
277+ -- headers.
278+ foldCSVFile
279+ :: (CSVeable r )
280+ => FilePath -- ^ File to open as a CSV file
281+ -> CSVSettings -- ^ CSV settings to use on the input file
282+ -> CSVAction r a -- ^ Fold action
283+ -> a -- ^ Initial accumulator
284+ -> IO (Either SomeException a ) -- ^ Error or the resulting accumulator
285+ foldCSVFile fp csvs f acc = E. run (enumFile fp $$ iterCSV csvs f acc)
286+
287+
288+ ------------------------------------------------------------------------------
289+ -- | Take a CSV file, apply function to each of its rows and save the
290+ -- resulting rows into a new file.
291+ --
292+ -- Each row is simply a list of fields.
293+ mapCSVFile
294+ :: (CSVeable r )
295+ => FilePath -- ^ Input file
296+ -> CSVSettings -- ^ CSV Settings
297+ -> (r -> [r ]) -- ^ A function to map a row onto rows
298+ -> FilePath -- ^ Output file
299+ -> IO (Either SomeException Int ) -- ^ Number of rows processed
300+ mapCSVFile fi s f fo = do
301+ res <- foldCSVFile fi s iter (Nothing , 0 )
302+ return $ snd `fmap` res
303+ where
304+ iter ! acc (ParsedRow (Just ! r)) = foldM chain acc (f r)
305+ iter ! acc x = fileSink s fo acc x
306+ chain ! acc ! r = fileSink s fo acc (ParsedRow (Just r))
307+
308+
301309------------------------------------------------------------------------------
302310readCSVFile :: (CSVeable r ) => CSVSettings -- ^ CSV settings
303311 -> FilePath -- ^ FilePath
@@ -431,6 +439,38 @@ funToIter f = iterf
431439 iterf ! acc r = yield (f acc r) (E. Chunks [] )
432440
433441
442+
443+ ------------------------------------------------------------------------------
444+ -- | Create an iteratee that can map over a CSV stream and output results to
445+ -- a handle in an interleaved fashion.
446+ --
447+ -- Example use: Let's map over a CSV file coming in through 'stdin' and push
448+ -- results to 'stdout'.
449+ --
450+ -- > f r = return [r] -- a function that just returns the given row
451+ --
452+ -- > E.run (E.enumHandle 4096 stdin $$ mapIntoHandle defCSVSettings True stdout f)
453+ mapIntoHandle
454+ :: (CSVeable r )
455+ => CSVSettings -- ^ 'CSVSettings'
456+ -> Bool -- ^ Whether to write headers
457+ -> Handle -- ^ Handle to stream results
458+ -> (r -> IO [r ]) -- ^ Map function
459+ -> E. Iteratee ByteString IO Int -- ^ Resulting Iteratee
460+ mapIntoHandle csvs outh h f = do
461+ snd `liftM` iterCSV csvs (funToIterIO f') (False ,0 )
462+ where
463+ f' acc EOF = return acc
464+ f' acc (ParsedRow Nothing ) = return acc
465+ f' (False , _) r'@ (ParsedRow (Just r)) = do
466+ when outh $ writeHeaders csvs h [r]
467+ f' (True , 0 ) r'
468+ f' (True , ! i) (ParsedRow (Just r)) = do
469+ rs <- f r
470+ outputRows csvs h rs
471+ return (True , i+ 1 )
472+
473+
434474------------------------------------------------------------------------------
435475-- | Just collect all rows into an array. This will cancel out the incremental
436476-- nature of this library.
0 commit comments