3030#  may be bound to those statements (embedded in thin_impl.pyx).
3131# ------------------------------------------------------------------------------
3232
33- #  Rules for named binds:
34- #  1. Quoted and non-quoted bind names are allowed.
35- #  2. Quoted binds can contain any characters.
36- #  3. Non-quoted binds must begin with an alphabet character.
37- #  4. Non-quoted binds can only contain alphanumeric characters, the underscore,
38- #  the dollar sign and the pound sign.
39- #  5. Non-quoted binds cannot be Oracle Database Reserved Names (Server handles
40- #  this case and returns an appropriate error)
41- BIND_PATTERN =  r ' :\s * ( (?: ". *? ") | (?: [^ \W\d _ ][\w \$ # ]* ) | \d + ) ' 
42- 
43- #  pattern used for detecting a DML returning clause; bind variables in the
44- #  SQL prior to the INTO keyword are input variables; bind varibles in the SQL
45- #  after the INTO keyword are output variables
46- DML_RETURNING_PATTERN =  r ' (?si) (?<= \b RETURNING\b ) ( . *? ) (?= \b INTO\b ) ' 
47- 
48- #  patterns for identifying comments and quoted strings
49- SINGLE_LINE_COMMENT_PATTERN =  r ' --. * ' 
50- MULTI_LINE_COMMENT_PATTERN =  r ' (?s) /\* . *? \* /' 
51- CONSTANT_STRING_PATTERN =  r " (?s) '. *? '" 
52- QUOTED_NAME_PATTERN =  r ' ( :\s * ) ? ( ". *? ") ' 
53- 
5433cdef class  BindInfo:
5534
5635 cdef:
@@ -75,6 +54,244 @@ cdef class BindInfo:
7554 return  BindInfo(self ._bind_name, self ._is_return_bind)
7655
7756
57+ cdef class  Parser:
58+ 
59+  cdef:
60+  bint returning_keyword_found
61+  ssize_t pos, max_pos
62+  void *  sql_data
63+  int  sql_kind
64+ 
65+  cdef int  _parse_bind_name(self , Statement stmt) except  - 1 :
66+  """ 
67+  Bind variables are identified as follows: 
68+  - Quoted and non-quoted bind names are allowed. 
69+  - Quoted bind names can contain any characters. 
70+  - Non-quoted bind names must begin with an alphabetic character. 
71+  - Non-quoted bind names can only contain alphanumeric characters, the 
72+  underscore, the dollar sign and the pound sign. 
73+  - Non-quoted bind names cannot be Oracle Database Reserved Names (this 
74+  is left to the server to detct and return an appropriate error). 
75+  """  
76+  cdef:
77+  bint quoted_name =  False , in_bind =  False , digits_only =  False 
78+  ssize_t start_pos =  0 , pos =  self .pos +  1 
79+  str  bind_name
80+  Py_UCS4 ch
81+  while  pos <=  self .max_pos:
82+  ch =  cpython.PyUnicode_READ(self .sql_kind, self .sql_data, pos)
83+  if  not  in_bind:
84+  if  cpython.Py_UNICODE_ISSPACE(ch):
85+  pos +=  1 
86+  continue 
87+  elif  ch ==  ' "'  :
88+  quoted_name =  True 
89+  elif  cpython.Py_UNICODE_ISDIGIT(ch):
90+  digits_only =  True 
91+  elif  not  cpython.Py_UNICODE_ISALPHA(ch):
92+  break 
93+  in_bind =  True 
94+  start_pos =  pos
95+  elif  digits_only and  not  cpython.Py_UNICODE_ISDIGIT(ch):
96+  self .pos =  pos -  1 
97+  break 
98+  elif  quoted_name and  ch ==  ' "'  :
99+  self .pos =  pos
100+  break 
101+  elif  not  digits_only and  not  quoted_name \
102+  and  not  cpython.Py_UNICODE_ISALNUM(ch) \
103+  and  ch not  in  (' _'  , ' $'  , ' #'  ):
104+  self .pos =  pos -  1 
105+  break 
106+  pos +=  1 
107+  if  in_bind:
108+  if  quoted_name:
109+  bind_name =  stmt._sql[start_pos +  1 :pos]
110+  elif  digits_only:
111+  bind_name =  stmt._sql[start_pos:pos]
112+  else :
113+  bind_name =  stmt._sql[start_pos:pos].upper()
114+  stmt._add_bind(bind_name)
115+ 
116+  cdef int  _parse_multiple_line_comment(self ) except  - 1 :
117+  """ 
118+  Multiple line comments consist of the characters /* followed by all 
119+  characters up until */. This method is called when the first slash is 
120+  detected and checks for the subsequent asterisk. If found, the comment 
121+  is traversed and the current position is updaqted; otherwise, the 
122+  current position is left untouched. 
123+  """  
124+  cdef:
125+  bint in_comment =  False , exiting_comment =  False 
126+  ssize_t pos =  self .pos +  1 
127+  Py_UCS4 ch
128+  while  pos <=  self .max_pos:
129+  ch =  cpython.PyUnicode_READ(self .sql_kind, self .sql_data, pos)
130+  if  not  in_comment:
131+  if  ch !=  ' *'  :
132+  break 
133+  in_comment =  True 
134+  elif  not  exiting_comment and  ch ==  ' *'  :
135+  exiting_comment =  True 
136+  elif  exiting_comment:
137+  if  ch ==  ' /'  :
138+  self .pos =  pos
139+  break 
140+  exiting_comment =  False 
141+  pos +=  1 
142+ 
143+  cdef int  _parse_qstring(self ) except  - 1 :
144+  """ 
145+  Parses a q-string which consists of the characters "q" and a single 
146+  quote followed by a start separator, any text that does not contain the 
147+  end seprator and the end separator and ending quote. The following are 
148+  examples that demonstrate this: 
149+  - q'[...]' 
150+  - q'{...}' 
151+  - q'<...>' 
152+  - q'(...)' 
153+  - q'?...?' (where ? is any character) 
154+  """  
155+  cdef:
156+  bint exiting_qstring =  False , in_qstring =  False 
157+  Py_UCS4 ch, sep =  0 
158+  self .pos +=  1 
159+  while  self .pos <=  self .max_pos:
160+  ch =  cpython.PyUnicode_READ(self .sql_kind, self .sql_data, self .pos)
161+  if  not  in_qstring:
162+  if  ch ==  ' ['  :
163+  sep =  ' ]' 
164+  elif  ch ==  ' {'  :
165+  sep =  ' }' 
166+  elif  ch ==  ' <'  :
167+  sep =  ' >' 
168+  elif  ch ==  ' ('  :
169+  sep =  ' )' 
170+  else :
171+  sep =  ch
172+  in_qstring =  True 
173+  elif  not  exiting_qstring and  ch ==  sep:
174+  exiting_qstring =  True 
175+  elif  exiting_qstring:
176+  if  ch ==  " '"  :
177+  break 
178+  elif  ch !=  sep:
179+  exiting_qstring =  False 
180+  self .pos +=  1 
181+ 
182+  cdef int  _parse_quoted_string(self , Py_UCS4 sep) except  - 1 :
183+  """ 
184+  Parses a quoted string with the given separator. All characters until 
185+  the separate is detected are discarded. 
186+  """  
187+  cdef Py_UCS4 ch
188+  self .pos +=  1 
189+  while  self .pos <=  self .max_pos:
190+  ch =  cpython.PyUnicode_READ(self .sql_kind, self .sql_data, self .pos)
191+  if  ch ==  sep:
192+  break 
193+  self .pos +=  1 
194+ 
195+  cdef int  _parse_single_line_comment(self ) except  - 1 :
196+  """ 
197+  Single line comments consist of two dashes and all characters up to the 
198+  next line break. This method is called when the first dash is detected 
199+  and checks for the subsequent dash. If found, the single line comment 
200+  is traversed and the current position is updated; otherwise, the 
201+  current position is left untouched. 
202+  """  
203+  cdef:
204+  ssize_t pos =  self .pos +  1 
205+  bint in_comment =  False 
206+  Py_UCS4 ch
207+  while  pos <=  self .max_pos:
208+  ch =  cpython.PyUnicode_READ(self .sql_kind, self .sql_data, pos)
209+  if  not  in_comment:
210+  if  ch !=  ' -'  :
211+  break 
212+  in_comment =  True 
213+  elif  cpython.Py_UNICODE_ISLINEBREAK(ch):
214+  self .pos =  pos
215+  break 
216+  pos +=  1 
217+ 
218+  cdef int  parse(self , Statement stmt) except  - 1 :
219+  """ 
220+  Parses the SQL stored in the statement in order to determine the 
221+  keyword that identifies the type of SQL being executed as well as a 
222+  list of bind variable names. A check is also made for DML returning 
223+  statements since the bind variables following the "INTO" keyword are 
224+  treated differently from other bind variables. 
225+  """  
226+  cdef:
227+  bint initial_keyword_found =  False , last_was_string =  False 
228+  Py_UCS4 ch, last_ch =  0 , alpha_start_ch =  0 
229+  ssize_t alpha_start_pos =  0 , alpha_len
230+  bint last_was_alpha =  False , is_alpha
231+  str  keyword
232+ 
233+  #  initialization
234+  self .pos =  0 
235+  self .max_pos =  cpython.PyUnicode_GET_LENGTH(stmt._sql) -  1 
236+  self .sql_kind =  cpython.PyUnicode_KIND(stmt._sql)
237+  self .sql_data =  cpython.PyUnicode_DATA(stmt._sql)
238+ 
239+  #  scan all characters in the string
240+  while  self .pos <=  self .max_pos:
241+  ch =  cpython.PyUnicode_READ(self .sql_kind, self .sql_data, self .pos)
242+ 
243+  #  look for certain keywords (initial keyword and the ones for
244+  #  detecting DML returning statements
245+  is_alpha =  cpython.Py_UNICODE_ISALPHA(ch)
246+  if  is_alpha and  not  last_was_alpha:
247+  alpha_start_pos =  self .pos
248+  alpha_start_ch =  ch
249+  elif  not  is_alpha and  last_was_alpha:
250+  alpha_len =  self .pos -  alpha_start_pos
251+  if  not  initial_keyword_found:
252+  keyword =  stmt._sql[alpha_start_pos:self .pos].upper()
253+  stmt._determine_statement_type(keyword)
254+  if  stmt._is_ddl:
255+  break 
256+  initial_keyword_found =  True 
257+  elif  stmt._is_dml and  not  self .returning_keyword_found \
258+  and  alpha_len ==  9  and  alpha_start_ch in  (' r'  , ' R'  ):
259+  keyword =  stmt._sql[alpha_start_pos:self .pos].upper()
260+  if  keyword ==  " RETURNING"  :
261+  self .returning_keyword_found =  True 
262+  elif  self .returning_keyword_found and  alpha_len ==  4  \
263+  and  alpha_start_ch in  (' i'  , ' I'  ):
264+  keyword =  stmt._sql[alpha_start_pos:self .pos].upper()
265+  if  keyword ==  " INTO"  :
266+  stmt._is_returning =  True 
267+ 
268+  #  need to keep track of whether the last token parsed was a string
269+  #  (excluding whitespace) as if the last token parsed was a string
270+  #  a following colon is not a bind variable but a part of the JSON
271+  #  constant syntax
272+  if  ch ==  " '"  :
273+  last_was_string =  True 
274+  if  last_ch in  (' q'  , ' Q'  ):
275+  self ._parse_qstring()
276+  else :
277+  self ._parse_quoted_string(ch)
278+  elif  not  cpython.Py_UNICODE_ISSPACE(ch):
279+  if  ch ==  ' -'  :
280+  self ._parse_single_line_comment()
281+  elif  ch ==  ' /'  :
282+  self ._parse_multiple_line_comment()
283+  elif  ch ==  ' "'  :
284+  self ._parse_quoted_string(ch)
285+  elif  ch ==  ' :'   and  not  last_was_string:
286+  self ._parse_bind_name(stmt)
287+  last_was_string =  False 
288+ 
289+  #  advance to next character and track previous character
290+  self .pos +=  1 
291+  last_was_alpha =  is_alpha
292+  last_ch =  ch
293+ 
294+ 
78295cdef class  Statement:
79296
80297 cdef:
@@ -126,94 +343,53 @@ cdef class Statement:
126343 copied_statement._return_to_cache =  False 
127344 return  copied_statement
128345
129-  cdef int  _add_binds (self , str  sql, bint is_return_bind ) except  - 1 :
346+  cdef int  _add_bind (self , str  name ) except  - 1 :
130347 """ 
131348 Add bind information to the statement by examining the passed SQL for 
132349 bind variable names. 
133350 """  
134-  cdef:
135-  BindInfo info
136-  str  name
137-  for  name in  re.findall(BIND_PATTERN, sql):
138-  if  name.startswith(' "'  ) and  name.endswith(' "'  ):
139-  name =  name[1 :- 1 ]
140-  else :
141-  name =  name.upper()
142-  if  self ._is_plsql and  name in  self ._bind_info_dict:
143-  continue 
144-  info =  BindInfo(name, is_return_bind)
351+  cdef BindInfo info
352+  if  not  self ._is_plsql or  name not  in  self ._bind_info_dict:
353+  info =  BindInfo(name, self ._is_returning)
145354 self ._bind_info_list.append(info)
146355 if  info._bind_name in  self ._bind_info_dict:
147356 self ._bind_info_dict[info._bind_name].append(info)
148357 else :
149358 self ._bind_info_dict[info._bind_name] =  [info]
150359
151-  cdef _determine_statement_type(self , str  sql ):
360+  cdef _determine_statement_type(self , str  sql_keyword ):
152361 """ 
153362 Determine the type of the SQL statement by examining the first keyword 
154363 found in the statement. 
155364 """  
156-  tokens =  sql.strip().lstrip(" ("  )[:10 ].split()
157-  if  tokens:
158-  sql_keyword =  tokens[0 ].upper()
159-  if  sql_keyword in  (" DECLARE"  , " BEGIN"  , " CALL"  ):
160-  self ._is_plsql =  True 
161-  elif  sql_keyword in  (" SELECT "  , " WITH"  ):
162-  self ._is_query =  True 
163-  elif  sql_keyword in  (" INSERT"  , " UPDATE "  , " DELETE "  , " MERGE"  ):
164-  self ._is_dml =  True 
165-  elif  sql_keyword in  (" CREATE"  , " ALTER"  , " DROP"  , " TRUNCATE"  ):
166-  self ._is_ddl =  True 
365+  if  sql_keyword in  (" DECLARE"  , " BEGIN"  , " CALL"  ):
366+  self ._is_plsql =  True 
367+  elif  sql_keyword in  (" SELECT "  , " WITH"  ):
368+  self ._is_query =  True 
369+  elif  sql_keyword in  (" INSERT"  , " UPDATE "  , " DELETE "  , " MERGE"  ):
370+  self ._is_dml =  True 
371+  elif  sql_keyword in  (" CREATE"  , " ALTER"  , " DROP"  , " GRANT"  , " REVOKE"  ,
372+  " ANALYZE"  , " AUDIT"  , " COMMENT"  , " TRUNCATE"  ):
373+  self ._is_ddl =  True 
167374
168375 cdef int  _prepare(self , str  sql) except  - 1 :
169376 """ 
170377 Prepare the SQL for execution by determining the list of bind names 
171378 that are found within it. The length of the SQL text is also calculated 
172-  at this time. If the character sets of the client and server are 
173-  identical, the length is calculated in bytes; otherwise, the length is 
174-  calculated in characters. 
379+  at this time. 
175380 """  
176-  cdef:
177-  str  input_sql, returning_sql =  None 
178-  object  match
381+  cdef Parser parser =  Parser.__new__ (Parser)
179382
180383 #  retain normalized SQL (as string and bytes) as well as the length
181384 self ._sql =  sql
182385 self ._sql_bytes =  self ._sql.encode()
183386 self ._sql_length =  < uint32_t>  len (self ._sql_bytes)
184387
185-  #  create empty list (bind by position) and dict (bind by name)
388+  #  parse SQL and populate bind variable list (bind by position) and dict
389+  #  (bind by name)
186390 self ._bind_info_dict =  collections.OrderedDict()
187391 self ._bind_info_list =  []
188- 
189-  #  Strip single/multiline comments and replace constant strings and
190-  #  quoted names with single characters in order to facilitate detection
191-  #  of bind variables; note that bind variables can be quoted so a check
192-  #  must be made to ensure that a quoted string doesn't refer to a bind
193-  #  variable first before it can be replaced
194-  sql =  re.sub(MULTI_LINE_COMMENT_PATTERN, " "  , sql)
195-  sql =  re.sub(SINGLE_LINE_COMMENT_PATTERN, " "  , sql)
196-  sql =  re.sub(CONSTANT_STRING_PATTERN, " S"  , sql)
197-  sql =  re.sub(QUOTED_NAME_PATTERN,
198-  lambda  m : m.group(0 ) if  sql[m.start(0 )] ==  " :"   else  " Q"  ,
199-  sql)
200- 
201-  #  determine statement type
202-  self ._determine_statement_type(sql)
203- 
204-  #  bind variables can only be found in queries, DML and PL/SQL
205-  if  self ._is_query or  self ._is_dml or  self ._is_plsql:
206-  input_sql =  sql
207-  if  self ._is_dml:
208-  match =  re.search(DML_RETURNING_PATTERN, sql)
209-  if  match is  not  None :
210-  pos =  match.end()
211-  input_sql =  sql[:pos]
212-  returning_sql =  sql[pos +  4 :]
213-  self ._add_binds(input_sql, is_return_bind = False )
214-  if  returning_sql is  not  None :
215-  self ._is_returning =  True 
216-  self ._add_binds(returning_sql, is_return_bind = True )
392+  parser.parse(self )
217393
218394 cdef int  _set_var(self , BindInfo bind_info, ThinVarImpl var_impl,
219395 ThinCursorImpl cursor_impl) except  - 1 :
0 commit comments