@@ -204,30 +204,64 @@ def read_json(*args, chunksize=100000, flatten=False, **kwargs) -> 'StreamingDat
204204 print(dfs)
205205 """
206206 if not isinstance (chunksize , int ) or chunksize <= 0 :
207- raise ValueError (
208- 'chunksize must be a positive integer' ) # pragma: no cover
207+ raise ValueError ( # pragma: no cover
208+ 'chunksize must be a positive integer' )
209209 kwargs_create = StreamingDataFrame ._process_kwargs (kwargs )
210+
210211 if isinstance (args [0 ], (list , dict )):
211212 if flatten :
212213 return StreamingDataFrame .read_df (json_normalize (args [0 ]), ** kwargs_create )
213214 return StreamingDataFrame .read_df (args [0 ], ** kwargs_create )
215+
214216 if kwargs .get ('lines' , None ) == 'stream' :
215217 del kwargs ['lines' ]
216218 st = JsonIterator2Stream (enumerate_json_items (
217219 args [0 ], encoding = kwargs .get ('encoding' , None ), lines = True , flatten = flatten ))
218220 args = args [1 :]
219- return StreamingDataFrame (lambda : pandas .read_json (st , * args , chunksize = chunksize , lines = True , ** kwargs ), ** kwargs_create )
221+
222+ if chunksize is None :
223+ return StreamingDataFrame (
224+ lambda : pandas .read_json (
225+ st , * args , chunksize = None , lines = True , ** kwargs ),
226+ ** kwargs_create )
227+
228+ def fct1 (st = st , args = args , chunksize = chunksize , kw = kwargs .copy ()):
229+ for r in pandas .read_json (st , * args , chunksize = chunksize , nrows = chunksize ,
230+ lines = True , ** kw ):
231+ yield r
232+ return StreamingDataFrame (fct1 , ** kwargs_create )
233+
220234 if kwargs .get ('lines' , False ):
221235 if flatten :
222236 raise NotImplementedError (
223237 "flatten==True is implemented with option lines='stream'" )
224- return StreamingDataFrame (lambda : pandas .read_json (* args , chunksize = chunksize , ** kwargs ), ** kwargs_create )
238+ if chunksize is None :
239+ return StreamingDataFrame (
240+ lambda : pandas .read_json (* args , chunksize = None , ** kwargs ),
241+ ** kwargs_create )
242+
243+ def fct2 (args = args , chunksize = chunksize , kw = kwargs .copy ()):
244+ for r in pandas .read_json (* args , chunksize = chunksize , nrows = chunksize , ** kw ):
245+ yield r
246+ return StreamingDataFrame (fct2 , ** kwargs_create )
247+
225248 st = JsonIterator2Stream (enumerate_json_items (
226249 args [0 ], encoding = kwargs .get ('encoding' , None ), flatten = flatten ))
227250 args = args [1 :]
228251 if 'lines' in kwargs :
229252 del kwargs ['lines' ]
230- return StreamingDataFrame (lambda : pandas .read_json (st , * args , chunksize = chunksize , lines = True , ** kwargs ), ** kwargs_create )
253+
254+ if chunksize is None :
255+ return StreamingDataFrame (
256+ lambda : pandas .read_json (
257+ st , * args , chunksize = chunksize , lines = True , ** kwargs ),
258+ ** kwargs_create )
259+
260+ def fct3 (st = st , args = args , chunksize = chunksize , kw = kwargs .copy ()):
261+ for r in pandas .read_json (st , * args , chunksize = chunksize , nrows = chunksize ,
262+ lines = True , ** kw ):
263+ yield r
264+ return StreamingDataFrame (fct3 , ** kwargs_create )
231265
232266 @staticmethod
233267 def read_csv (* args , ** kwargs ) -> 'StreamingDataFrame' :
0 commit comments