1+ /* 
2+  _BlocksOutputBuffer is used to maintain an output buffer 
3+  that has unpredictable size. Suitable for compression/decompression 
4+  API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out: 
5+ 
6+  stream->next_out: point to the next output position. 
7+  stream->avail_out: the number of available bytes left in the buffer. 
8+ 
9+  It maintains a list of bytes object, so there is no overhead of resizing 
10+  the buffer. 
11+ 
12+  Usage: 
13+ 
14+  1, Initialize the struct instance like this: 
15+  _BlocksOutputBuffer buffer = {.list = NULL}; 
16+  Set .list to NULL for _BlocksOutputBuffer_OnError() 
17+ 
18+  2, Initialize the buffer use one of these functions: 
19+  _BlocksOutputBuffer_InitAndGrow() 
20+  _BlocksOutputBuffer_InitWithSize() 
21+ 
22+  3, If (avail_out == 0), grow the buffer: 
23+  _BlocksOutputBuffer_Grow() 
24+ 
25+  4, Get the current outputted data size: 
26+  _BlocksOutputBuffer_GetDataSize() 
27+ 
28+  5, Finish the buffer, and return a bytes object: 
29+  _BlocksOutputBuffer_Finish() 
30+ 
31+  6, Clean up the buffer when an error occurred: 
32+  _BlocksOutputBuffer_OnError() 
33+ */ 
34+ 
35+ #ifndef  Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H 
36+ #define  Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H 
37+ #ifdef  __cplusplus 
38+ extern  "C"  {
39+ #endif 
40+ 
41+ #include  "Python.h" 
42+ 
43+ typedef  struct  {
44+  // List of bytes objects 
45+  PyObject  * list ;
46+  // Number of whole allocated size 
47+  Py_ssize_t  allocated ;
48+  // Max length of the buffer, negative number means unlimited length. 
49+  Py_ssize_t  max_length ;
50+ } _BlocksOutputBuffer ;
51+ 
52+ static  const  char  unable_allocate_msg [] =  "Unable to allocate output buffer." ;
53+ 
54+ /* In 32-bit build, the max block size should <= INT32_MAX. */ 
55+ #define  OUTPUT_BUFFER_MAX_BLOCK_SIZE  (256*1024*1024)
56+ 
57+ /* Block size sequence */ 
58+ #define  KB  (1024)
59+ #define  MB  (1024*1024)
60+ const  Py_ssize_t  BUFFER_BLOCK_SIZE [] = 
61+  { 32 * KB , 64 * KB , 256 * KB , 1 * MB , 4 * MB , 8 * MB , 16 * MB , 16 * MB ,
62+  32 * MB , 32 * MB , 32 * MB , 32 * MB , 64 * MB , 64 * MB , 128 * MB , 128 * MB ,
63+  OUTPUT_BUFFER_MAX_BLOCK_SIZE  };
64+ #undef  KB
65+ #undef  MB
66+ 
67+ /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole 
68+  allocated size growth step is: 
69+  1 32 KB +32 KB 
70+  2 96 KB +64 KB 
71+  3 352 KB +256 KB 
72+  4 1.34 MB +1 MB 
73+  5 5.34 MB +4 MB 
74+  6 13.34 MB +8 MB 
75+  7 29.34 MB +16 MB 
76+  8 45.34 MB +16 MB 
77+  9 77.34 MB +32 MB 
78+  10 109.34 MB +32 MB 
79+  11 141.34 MB +32 MB 
80+  12 173.34 MB +32 MB 
81+  13 237.34 MB +64 MB 
82+  14 301.34 MB +64 MB 
83+  15 429.34 MB +128 MB 
84+  16 557.34 MB +128 MB 
85+  17 813.34 MB +256 MB 
86+  18 1069.34 MB +256 MB 
87+  19 1325.34 MB +256 MB 
88+  20 1581.34 MB +256 MB 
89+  21 1837.34 MB +256 MB 
90+  22 2093.34 MB +256 MB 
91+  ... 
92+ */ 
93+ 
94+ /* Initialize the buffer, and grow the buffer. 
95+ 
96+  max_length: Max length of the buffer, -1 for unlimited length. 
97+ 
98+  On success, return allocated size (>=0) 
99+  On failure, return -1 
100+ */ 
101+ static  inline  Py_ssize_t 
102+ _BlocksOutputBuffer_InitAndGrow (_BlocksOutputBuffer  * buffer ,
103+  const  Py_ssize_t  max_length ,
104+  void  * * next_out )
105+ {
106+  PyObject  * b ;
107+  Py_ssize_t  block_size ;
108+ 
109+  // ensure .list was set to NULL 
110+  assert (buffer -> list  ==  NULL );
111+ 
112+  // get block size 
113+  if  (0  <= max_length  &&  max_length  <  BUFFER_BLOCK_SIZE [0 ]) {
114+  block_size  =  max_length ;
115+  } else  {
116+  block_size  =  BUFFER_BLOCK_SIZE [0 ];
117+  }
118+ 
119+  // the first block 
120+  b  =  PyBytes_FromStringAndSize (NULL , block_size );
121+  if  (b  ==  NULL ) {
122+  return  -1 ;
123+  }
124+ 
125+  // create the list 
126+  buffer -> list  =  PyList_New (1 );
127+  if  (buffer -> list  ==  NULL ) {
128+  Py_DECREF (b );
129+  return  -1 ;
130+  }
131+  PyList_SET_ITEM (buffer -> list , 0 , b );
132+ 
133+  // set variables 
134+  buffer -> allocated  =  block_size ;
135+  buffer -> max_length  =  max_length ;
136+ 
137+  * next_out  =  PyBytes_AS_STRING (b );
138+  return  block_size ;
139+ }
140+ 
141+ /* Initialize the buffer, with an initial size. 
142+ 
143+  Check block size limit in the outer wrapper function. For example, some libs 
144+  accept UINT32_MAX as the maximum block size, then init_size should <= it. 
145+ 
146+  On success, return allocated size (>=0) 
147+  On failure, return -1 
148+ */ 
149+ static  inline  Py_ssize_t 
150+ _BlocksOutputBuffer_InitWithSize (_BlocksOutputBuffer  * buffer ,
151+  const  Py_ssize_t  init_size ,
152+  void  * * next_out )
153+ {
154+  PyObject  * b ;
155+ 
156+  // ensure .list was set to NULL 
157+  assert (buffer -> list  ==  NULL );
158+ 
159+  // the first block 
160+  b  =  PyBytes_FromStringAndSize (NULL , init_size );
161+  if  (b  ==  NULL ) {
162+  PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
163+  return  -1 ;
164+  }
165+ 
166+  // create the list 
167+  buffer -> list  =  PyList_New (1 );
168+  if  (buffer -> list  ==  NULL ) {
169+  Py_DECREF (b );
170+  return  -1 ;
171+  }
172+  PyList_SET_ITEM (buffer -> list , 0 , b );
173+ 
174+  // set variables 
175+  buffer -> allocated  =  init_size ;
176+  buffer -> max_length  =  -1 ;
177+ 
178+  * next_out  =  PyBytes_AS_STRING (b );
179+  return  init_size ;
180+ }
181+ 
182+ /* Grow the buffer. The avail_out must be 0, please check it before calling. 
183+ 
184+  On success, return allocated size (>=0) 
185+  On failure, return -1 
186+ */ 
187+ static  inline  Py_ssize_t 
188+ _BlocksOutputBuffer_Grow (_BlocksOutputBuffer  * buffer ,
189+  void  * * next_out ,
190+  const  Py_ssize_t  avail_out )
191+ {
192+  PyObject  * b ;
193+  const  Py_ssize_t  list_len  =  Py_SIZE (buffer -> list );
194+  Py_ssize_t  block_size ;
195+ 
196+  // ensure no gaps in the data 
197+  if  (avail_out  !=  0 ) {
198+  PyErr_SetString (PyExc_SystemError ,
199+  "avail_out is non-zero in _BlocksOutputBuffer_Grow()." );
200+  return  -1 ;
201+  }
202+ 
203+  // get block size 
204+  if  (list_len  <  (Py_ssize_t ) Py_ARRAY_LENGTH (BUFFER_BLOCK_SIZE )) {
205+  block_size  =  BUFFER_BLOCK_SIZE [list_len ];
206+  } else  {
207+  block_size  =  BUFFER_BLOCK_SIZE [Py_ARRAY_LENGTH (BUFFER_BLOCK_SIZE ) -  1 ];
208+  }
209+ 
210+  // check max_length 
211+  if  (buffer -> max_length  >= 0 ) {
212+  // if (rest == 0), should not grow the buffer. 
213+  Py_ssize_t  rest  =  buffer -> max_length  -  buffer -> allocated ;
214+  assert (rest  >  0 );
215+ 
216+  // block_size of the last block 
217+  if  (block_size  >  rest ) {
218+  block_size  =  rest ;
219+  }
220+  }
221+ 
222+  // check buffer->allocated overflow 
223+  if  (block_size  >  PY_SSIZE_T_MAX  -  buffer -> allocated ) {
224+  PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
225+  return  -1 ;
226+  }
227+ 
228+  // create the block 
229+  b  =  PyBytes_FromStringAndSize (NULL , block_size );
230+  if  (b  ==  NULL ) {
231+  PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
232+  return  -1 ;
233+  }
234+  if  (PyList_Append (buffer -> list , b ) <  0 ) {
235+  Py_DECREF (b );
236+  return  -1 ;
237+  }
238+  Py_DECREF (b );
239+ 
240+  // set variables 
241+  buffer -> allocated  +=  block_size ;
242+ 
243+  * next_out  =  PyBytes_AS_STRING (b );
244+  return  block_size ;
245+ }
246+ 
247+ /* Return the current outputted data size. */ 
248+ static  inline  Py_ssize_t 
249+ _BlocksOutputBuffer_GetDataSize (_BlocksOutputBuffer  * buffer ,
250+  const  Py_ssize_t  avail_out )
251+ {
252+  return  buffer -> allocated  -  avail_out ;
253+ }
254+ 
255+ /* Finish the buffer. 
256+ 
257+  Return a bytes object on success 
258+  Return NULL on failure 
259+ */ 
260+ static  inline  PyObject  * 
261+ _BlocksOutputBuffer_Finish (_BlocksOutputBuffer  * buffer ,
262+  const  Py_ssize_t  avail_out )
263+ {
264+  PyObject  * result , * block ;
265+  const  Py_ssize_t  list_len  =  Py_SIZE (buffer -> list );
266+ 
267+  // fast path for single block 
268+  if  ((list_len  ==  1  &&  avail_out  ==  0 ) || 
269+  (list_len  ==  2  &&  Py_SIZE (PyList_GET_ITEM (buffer -> list , 1 )) ==  avail_out ))
270+  {
271+  block  =  PyList_GET_ITEM (buffer -> list , 0 );
272+  Py_INCREF (block );
273+ 
274+  Py_CLEAR (buffer -> list );
275+  return  block ;
276+  }
277+ 
278+  // final bytes object 
279+  result  =  PyBytes_FromStringAndSize (NULL , buffer -> allocated  -  avail_out );
280+  if  (result  ==  NULL ) {
281+  PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
282+  return  NULL ;
283+  }
284+ 
285+  // memory copy 
286+  if  (list_len  >  0 ) {
287+  char  * posi  =  PyBytes_AS_STRING (result );
288+ 
289+  // blocks except the last one 
290+  Py_ssize_t  i  =  0 ;
291+  for  (; i  <  list_len - 1 ; i ++ ) {
292+  block  =  PyList_GET_ITEM (buffer -> list , i );
293+  memcpy (posi , PyBytes_AS_STRING (block ), Py_SIZE (block ));
294+  posi  +=  Py_SIZE (block );
295+  }
296+  // the last block 
297+  block  =  PyList_GET_ITEM (buffer -> list , i );
298+  memcpy (posi , PyBytes_AS_STRING (block ), Py_SIZE (block ) -  avail_out );
299+  } else  {
300+  assert (Py_SIZE (result ) ==  0 );
301+  }
302+ 
303+  Py_CLEAR (buffer -> list );
304+  return  result ;
305+ }
306+ 
307+ /* Clean up the buffer when an error occurred. */ 
308+ static  inline  void 
309+ _BlocksOutputBuffer_OnError (_BlocksOutputBuffer  * buffer )
310+ {
311+  Py_CLEAR (buffer -> list );
312+ }
313+ 
314+ #ifdef  __cplusplus 
315+ }
316+ #endif 
317+ #endif  /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */ 
0 commit comments