@@ -375,60 +375,25 @@ def read_excel(io,
375375 ** kwds )
376376
377377
378- class  _XlrdReader (object ):
379- 
380-  def  __init__ (self , filepath_or_buffer ):
381-  """Reader using xlrd engine. 
382- 
383-  Parameters 
384-  ---------- 
385-  filepath_or_buffer : string, path object or Workbook 
386-  Object to be parsed. 
387-  """ 
388-  err_msg  =  "Install xlrd >= 1.0.0 for Excel support" 
389- 
390-  try :
391-  import  xlrd 
392-  except  ImportError :
393-  raise  ImportError (err_msg )
394-  else :
395-  if  xlrd .__VERSION__  <  LooseVersion ("1.0.0" ):
396-  raise  ImportError (err_msg  + 
397-  ". Current version "  +  xlrd .__VERSION__ )
378+ @add_metaclass (abc .ABCMeta ) 
379+ class  _BaseExcelReader (object ):
398380
399-  # If filepath_or_buffer is a url, want to keep the data as bytes so 
400-  # can't pass to get_filepath_or_buffer() 
401-  if  _is_url (filepath_or_buffer ):
402-  filepath_or_buffer  =  _urlopen (filepath_or_buffer )
403-  elif  not  isinstance (filepath_or_buffer , (ExcelFile , xlrd .Book )):
404-  filepath_or_buffer , _ , _ , _  =  get_filepath_or_buffer (
405-  filepath_or_buffer )
381+  @property  
382+  @abc .abstractmethod  
383+  def  sheet_names (self ):
384+  pass 
406385
407-  if  isinstance (filepath_or_buffer , xlrd .Book ):
408-  self .book  =  filepath_or_buffer 
409-  elif  not  isinstance (filepath_or_buffer , xlrd .Book ) and  hasattr (
410-  filepath_or_buffer , "read" ):
411-  # N.B. xlrd.Book has a read attribute too 
412-  if  hasattr (filepath_or_buffer , 'seek' ):
413-  try :
414-  # GH 19779 
415-  filepath_or_buffer .seek (0 )
416-  except  UnsupportedOperation :
417-  # HTTPResponse does not support seek() 
418-  # GH 20434 
419-  pass 
386+  @abc .abstractmethod  
387+  def  get_sheet_by_name (self , name ):
388+  pass 
420389
421-  data  =  filepath_or_buffer .read ()
422-  self .book  =  xlrd .open_workbook (file_contents = data )
423-  elif  isinstance (filepath_or_buffer , compat .string_types ):
424-  self .book  =  xlrd .open_workbook (filepath_or_buffer )
425-  else :
426-  raise  ValueError ('Must explicitly set engine if not passing in' 
427-  ' buffer or path for io.' )
390+  @abc .abstractmethod  
391+  def  get_sheet_by_index (self , index ):
392+  pass 
428393
429-  @property  
430-  def  sheet_names (self ):
431-  return   self . book . sheet_names () 
394+  @abc . abstractmethod  
395+  def  get_sheet_data (self ,  sheet ,  convert_float ):
396+  pass 
432397
433398 def  parse (self ,
434399 sheet_name = 0 ,
@@ -455,56 +420,14 @@ def parse(self,
455420
456421 _validate_header_arg (header )
457422
458-  from  xlrd  import  (xldate , XL_CELL_DATE ,
459-  XL_CELL_ERROR , XL_CELL_BOOLEAN ,
460-  XL_CELL_NUMBER )
461- 
462-  epoch1904  =  self .book .datemode 
463- 
464-  def  _parse_cell (cell_contents , cell_typ ):
465-  """converts the contents of the cell into a pandas 
466-  appropriate object""" 
467- 
468-  if  cell_typ  ==  XL_CELL_DATE :
469- 
470-  # Use the newer xlrd datetime handling. 
471-  try :
472-  cell_contents  =  xldate .xldate_as_datetime (
473-  cell_contents , epoch1904 )
474-  except  OverflowError :
475-  return  cell_contents 
476- 
477-  # Excel doesn't distinguish between dates and time, 
478-  # so we treat dates on the epoch as times only. 
479-  # Also, Excel supports 1900 and 1904 epochs. 
480-  year  =  (cell_contents .timetuple ())[0 :3 ]
481-  if  ((not  epoch1904  and  year  ==  (1899 , 12 , 31 )) or 
482-  (epoch1904  and  year  ==  (1904 , 1 , 1 ))):
483-  cell_contents  =  time (cell_contents .hour ,
484-  cell_contents .minute ,
485-  cell_contents .second ,
486-  cell_contents .microsecond )
487- 
488-  elif  cell_typ  ==  XL_CELL_ERROR :
489-  cell_contents  =  np .nan 
490-  elif  cell_typ  ==  XL_CELL_BOOLEAN :
491-  cell_contents  =  bool (cell_contents )
492-  elif  convert_float  and  cell_typ  ==  XL_CELL_NUMBER :
493-  # GH5394 - Excel 'numbers' are always floats 
494-  # it's a minimal perf hit and less surprising 
495-  val  =  int (cell_contents )
496-  if  val  ==  cell_contents :
497-  cell_contents  =  val 
498-  return  cell_contents 
499- 
500423 ret_dict  =  False 
501424
502425 # Keep sheetname to maintain backwards compatibility. 
503426 if  isinstance (sheet_name , list ):
504427 sheets  =  sheet_name 
505428 ret_dict  =  True 
506429 elif  sheet_name  is  None :
507-  sheets  =  self .book . sheet_names () 
430+  sheets  =  self .sheet_names 
508431 ret_dict  =  True 
509432 else :
510433 sheets  =  [sheet_name ]
@@ -519,19 +442,13 @@ def _parse_cell(cell_contents, cell_typ):
519442 print ("Reading sheet {sheet}" .format (sheet = asheetname ))
520443
521444 if  isinstance (asheetname , compat .string_types ):
522-  sheet  =  self .book . sheet_by_name (asheetname )
445+  sheet  =  self .get_sheet_by_name (asheetname )
523446 else : # assume an integer if not a string 
524-  sheet  =  self .book . sheet_by_index (asheetname )
447+  sheet  =  self .get_sheet_by_index (asheetname )
525448
526-  data  =  [] 
449+  data  =  self . get_sheet_data ( sheet ,  convert_float ) 
527450 usecols  =  _maybe_convert_usecols (usecols )
528451
529-  for  i  in  range (sheet .nrows ):
530-  row  =  [_parse_cell (value , typ )
531-  for  value , typ  in  zip (sheet .row_values (i ),
532-  sheet .row_types (i ))]
533-  data .append (row )
534- 
535452 if  sheet .nrows  ==  0 :
536453 output [asheetname ] =  DataFrame ()
537454 continue 
@@ -620,6 +537,120 @@ def _parse_cell(cell_contents, cell_typ):
620537 return  output [asheetname ]
621538
622539
540+ class  _XlrdReader (_BaseExcelReader ):
541+ 
542+  def  __init__ (self , filepath_or_buffer ):
543+  """Reader using xlrd engine. 
544+ 
545+  Parameters 
546+  ---------- 
547+  filepath_or_buffer : string, path object or Workbook 
548+  Object to be parsed. 
549+  """ 
550+  err_msg  =  "Install xlrd >= 1.0.0 for Excel support" 
551+ 
552+  try :
553+  import  xlrd 
554+  except  ImportError :
555+  raise  ImportError (err_msg )
556+  else :
557+  if  xlrd .__VERSION__  <  LooseVersion ("1.0.0" ):
558+  raise  ImportError (err_msg  + 
559+  ". Current version "  +  xlrd .__VERSION__ )
560+ 
561+  # If filepath_or_buffer is a url, want to keep the data as bytes so 
562+  # can't pass to get_filepath_or_buffer() 
563+  if  _is_url (filepath_or_buffer ):
564+  filepath_or_buffer  =  _urlopen (filepath_or_buffer )
565+  elif  not  isinstance (filepath_or_buffer , (ExcelFile , xlrd .Book )):
566+  filepath_or_buffer , _ , _ , _  =  get_filepath_or_buffer (
567+  filepath_or_buffer )
568+ 
569+  if  isinstance (filepath_or_buffer , xlrd .Book ):
570+  self .book  =  filepath_or_buffer 
571+  elif  hasattr (filepath_or_buffer , "read" ):
572+  # N.B. xlrd.Book has a read attribute too 
573+  if  hasattr (filepath_or_buffer , 'seek' ):
574+  try :
575+  # GH 19779 
576+  filepath_or_buffer .seek (0 )
577+  except  UnsupportedOperation :
578+  # HTTPResponse does not support seek() 
579+  # GH 20434 
580+  pass 
581+ 
582+  data  =  filepath_or_buffer .read ()
583+  self .book  =  xlrd .open_workbook (file_contents = data )
584+  elif  isinstance (filepath_or_buffer , compat .string_types ):
585+  self .book  =  xlrd .open_workbook (filepath_or_buffer )
586+  else :
587+  raise  ValueError ('Must explicitly set engine if not passing in' 
588+  ' buffer or path for io.' )
589+ 
590+  @property  
591+  def  sheet_names (self ):
592+  return  self .book .sheet_names ()
593+ 
594+  def  get_sheet_by_name (self , name ):
595+  return  self .book .sheet_by_name (name )
596+ 
597+  def  get_sheet_by_index (self , index ):
598+  return  self .book .sheet_by_index (index )
599+ 
600+  def  get_sheet_data (self , sheet , convert_float ):
601+  from  xlrd  import  (xldate , XL_CELL_DATE ,
602+  XL_CELL_ERROR , XL_CELL_BOOLEAN ,
603+  XL_CELL_NUMBER )
604+ 
605+  epoch1904  =  self .book .datemode 
606+ 
607+  def  _parse_cell (cell_contents , cell_typ ):
608+  """converts the contents of the cell into a pandas 
609+  appropriate object""" 
610+ 
611+  if  cell_typ  ==  XL_CELL_DATE :
612+ 
613+  # Use the newer xlrd datetime handling. 
614+  try :
615+  cell_contents  =  xldate .xldate_as_datetime (
616+  cell_contents , epoch1904 )
617+  except  OverflowError :
618+  return  cell_contents 
619+ 
620+  # Excel doesn't distinguish between dates and time, 
621+  # so we treat dates on the epoch as times only. 
622+  # Also, Excel supports 1900 and 1904 epochs. 
623+  year  =  (cell_contents .timetuple ())[0 :3 ]
624+  if  ((not  epoch1904  and  year  ==  (1899 , 12 , 31 )) or 
625+  (epoch1904  and  year  ==  (1904 , 1 , 1 ))):
626+  cell_contents  =  time (cell_contents .hour ,
627+  cell_contents .minute ,
628+  cell_contents .second ,
629+  cell_contents .microsecond )
630+ 
631+  elif  cell_typ  ==  XL_CELL_ERROR :
632+  cell_contents  =  np .nan 
633+  elif  cell_typ  ==  XL_CELL_BOOLEAN :
634+  cell_contents  =  bool (cell_contents )
635+  elif  convert_float  and  cell_typ  ==  XL_CELL_NUMBER :
636+  # GH5394 - Excel 'numbers' are always floats 
637+  # it's a minimal perf hit and less surprising 
638+  val  =  int (cell_contents )
639+  if  val  ==  cell_contents :
640+  cell_contents  =  val 
641+  return  cell_contents 
642+ 
643+  data  =  []
644+ 
645+  for  i  in  range (sheet .nrows ):
646+  row  =  [_parse_cell (value , typ )
647+  for  value , typ  in  zip (sheet .row_values (i ),
648+  sheet .row_types (i ))]
649+  data .append (row )
650+ 
651+  return  data 
652+ 
653+ 
623654class  ExcelFile (object ):
624655 """ 
625656 Class for parsing tabular excel sheets into DataFrame objects. 
0 commit comments