33# license that can be found in the LICENSE file.
44
55import copy
6- import concurrent .futures
76from datetime import datetime
87import logging
98import re
2019if typing .TYPE_CHECKING : # pragma: NO COVER
2120 import pandas
2221
23- from pandas_gbq .exceptions import AccessDenied , GenericGBQException
22+ from pandas_gbq .exceptions import GenericGBQException , QueryTimeout
2423from pandas_gbq .features import FEATURES
24+ import pandas_gbq .query
2525import pandas_gbq .schema
2626import pandas_gbq .timestamp
2727
@@ -130,15 +130,6 @@ class NotFoundException(ValueError):
130130 pass
131131
132132
133- class QueryTimeout (ValueError ):
134- """
135- Raised when the query request exceeds the timeoutMs value specified in the
136- BigQuery configuration.
137- """
138-
139- pass
140-
141-
142133class TableCreationError (ValueError ):
143134 """
144135 Raised when the create table method fails
@@ -340,10 +331,6 @@ def __init__(
340331 self .client = self .get_client ()
341332 self .use_bqstorage_api = use_bqstorage_api
342333
343- # BQ Queries costs $5 per TB. First 1 TB per month is free
344- # see here for more: https://cloud.google.com/bigquery/pricing
345- self .query_price_for_TB = 5.0 / 2 ** 40 # USD/TB
346-
347334 def _start_timer (self ):
348335 self .start = time .time ()
349336
@@ -355,16 +342,6 @@ def log_elapsed_seconds(self, prefix="Elapsed", postfix="s.", overlong=6):
355342 if sec > overlong :
356343 logger .info ("{} {} {}" .format (prefix , sec , postfix ))
357344
358- # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
359- @staticmethod
360- def sizeof_fmt (num , suffix = "B" ):
361- fmt = "%3.1f %s%s"
362- for unit in ["" , "K" , "M" , "G" , "T" , "P" , "E" , "Z" ]:
363- if abs (num ) < 1024.0 :
364- return fmt % (num , unit , suffix )
365- num /= 1024.0
366- return fmt % (num , "Y" , suffix )
367-
368345 def get_client (self ):
369346 import google .api_core .client_info
370347 from google .cloud import bigquery
@@ -421,46 +398,10 @@ def download_table(
421398 user_dtypes = dtypes ,
422399 )
423400
424- def _wait_for_query_job (self , query_reply , timeout_ms ):
425- """Wait for query to complete, pausing occasionally to update progress.
426-
427- Args:
428- query_reply (QueryJob):
429- A query job which has started.
430-
431- timeout_ms (Optional[int]):
432- How long to wait before cancelling the query.
433- """
434- # Wait at most 10 seconds so we can show progress.
435- # TODO(https://github.com/googleapis/python-bigquery-pandas/issues/327):
436- # Include a tqdm progress bar here instead of a stream of log messages.
437- timeout_sec = 10.0
438- if timeout_ms :
439- timeout_sec = min (timeout_sec , timeout_ms / 1000.0 )
440-
441- while query_reply .state != "DONE" :
442- self .log_elapsed_seconds (" Elapsed" , "s. Waiting..." )
443-
444- if timeout_ms and timeout_ms < self .get_elapsed_seconds () * 1000 :
445- self .client .cancel_job (
446- query_reply .job_id , location = query_reply .location
447- )
448- raise QueryTimeout ("Query timeout: {} ms" .format (timeout_ms ))
449-
450- try :
451- query_reply .result (timeout = timeout_sec )
452- except concurrent .futures .TimeoutError :
453- # Use our own timeout logic
454- pass
455- except self .http_error as ex :
456- self .process_http_error (ex )
457-
458401 def run_query (self , query , max_results = None , progress_bar_type = None , ** kwargs ):
459- from google .auth .exceptions import RefreshError
460402 from google .cloud import bigquery
461- import pandas
462403
463- job_config = {
404+ job_config_dict = {
464405 "query" : {
465406 "useLegacySql" : self .dialect
466407 == "legacy"
@@ -470,74 +411,27 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
470411 }
471412 config = kwargs .get ("configuration" )
472413 if config is not None :
473- job_config .update (config )
414+ job_config_dict .update (config )
474415
475- self ._start_timer ()
476-
477- try :
478- logger .debug ("Requesting query... " )
479- query_reply = self .client .query (
480- query ,
481- job_config = bigquery .QueryJobConfig .from_api_repr (job_config ),
482- location = self .location ,
483- project = self .project_id ,
484- )
485- logger .debug ("Query running..." )
486- except (RefreshError , ValueError ) as ex :
487- if self .private_key :
488- raise AccessDenied (
489- f"The service account credentials are not valid: { ex } "
490- )
491- else :
492- raise AccessDenied (
493- "The credentials have been revoked or expired, "
494- f"please re-run the application to re-authorize: { ex } "
495- )
496- except self .http_error as ex :
497- self .process_http_error (ex )
498-
499- job_id = query_reply .job_id
500- logger .debug ("Job ID: %s" % job_id )
501-
502- timeout_ms = job_config .get ("jobTimeoutMs" ) or job_config ["query" ].get (
503- "timeoutMs"
504- )
416+ timeout_ms = job_config_dict .get ("jobTimeoutMs" ) or job_config_dict [
417+ "query"
418+ ].get ("timeoutMs" )
505419 timeout_ms = int (timeout_ms ) if timeout_ms else None
506- self ._wait_for_query_job (query_reply , timeout_ms )
507420
508- if query_reply .cache_hit :
509- logger .debug ("Query done.\n Cache hit.\n " )
510- else :
511- bytes_processed = query_reply .total_bytes_processed or 0
512- bytes_billed = query_reply .total_bytes_billed or 0
513- logger .debug (
514- "Query done.\n Processed: {} Billed: {}" .format (
515- self .sizeof_fmt (bytes_processed ),
516- self .sizeof_fmt (bytes_billed ),
517- )
518- )
519- logger .debug (
520- "Standard price: ${:,.2f} USD\n " .format (
521- bytes_billed * self .query_price_for_TB
522- )
523- )
421+ self ._start_timer ()
422+ job_config = bigquery .QueryJobConfig .from_api_repr (job_config_dict )
423+ rows_iter = pandas_gbq .query .query_and_wait (
424+ self ,
425+ self .client ,
426+ query ,
427+ location = self .location ,
428+ project_id = self .project_id ,
429+ job_config = job_config ,
430+ max_results = max_results ,
431+ timeout_ms = timeout_ms ,
432+ )
524433
525434 dtypes = kwargs .get ("dtypes" )
526-
527- # Ensure destination is populated.
528- try :
529- query_reply .result ()
530- except self .http_error as ex :
531- self .process_http_error (ex )
532-
533- # Avoid attempting to download results from DML queries, which have no
534- # destination.
535- if query_reply .destination is None :
536- return pandas .DataFrame ()
537-
538- rows_iter = self .client .list_rows (
539- query_reply .destination , max_results = max_results
540- )
541435 return self ._download_results (
542436 rows_iter ,
543437 max_results = max_results ,
0 commit comments