|
25 | 25 | import time |
26 | 26 |
|
27 | 27 | import pytest |
28 | | -import six |
29 | 28 |
|
30 | 29 | try: |
31 | 30 | import fastparquet |
@@ -581,169 +580,6 @@ def test_manage_views(client, to_delete): |
581 | 580 | # [END bigquery_grant_view_access] |
582 | 581 |
|
583 | 582 |
|
584 | | -def test_load_table_from_uri_autodetect(client, to_delete, capsys): |
585 | | - """Load table from a GCS URI using various formats and auto-detected schema |
586 | | - Each file format has its own tested load from URI sample. Because most of |
587 | | - the code is common for autodetect, append, and truncate, this sample |
588 | | - includes snippets for all supported formats but only calls a single load |
589 | | - job. |
590 | | - This code snippet is made up of shared code, then format-specific code, |
591 | | - followed by more shared code. Note that only the last format in the |
592 | | - format-specific code section will be tested in this test. |
593 | | - """ |
594 | | - dataset_id = "load_table_from_uri_auto_{}".format(_millis()) |
595 | | - dataset = bigquery.Dataset(client.dataset(dataset_id)) |
596 | | - client.create_dataset(dataset) |
597 | | - to_delete.append(dataset) |
598 | | - |
599 | | - # Shared code |
600 | | - # [START bigquery_load_table_gcs_csv_autodetect] |
601 | | - # [START bigquery_load_table_gcs_json_autodetect] |
602 | | - # from google.cloud import bigquery |
603 | | - # client = bigquery.Client() |
604 | | - # dataset_id = 'my_dataset' |
605 | | - |
606 | | - dataset_ref = client.dataset(dataset_id) |
607 | | - job_config = bigquery.LoadJobConfig() |
608 | | - job_config.autodetect = True |
609 | | - # [END bigquery_load_table_gcs_csv_autodetect] |
610 | | - # [END bigquery_load_table_gcs_json_autodetect] |
611 | | - |
612 | | - # Format-specific code |
613 | | - # [START bigquery_load_table_gcs_csv_autodetect] |
614 | | - job_config.skip_leading_rows = 1 |
615 | | - # The source format defaults to CSV, so the line below is optional. |
616 | | - job_config.source_format = bigquery.SourceFormat.CSV |
617 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" |
618 | | - # [END bigquery_load_table_gcs_csv_autodetect] |
619 | | - # unset csv-specific attribute |
620 | | - del job_config._properties["load"]["skipLeadingRows"] |
621 | | - |
622 | | - # [START bigquery_load_table_gcs_json_autodetect] |
623 | | - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON |
624 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" |
625 | | - # [END bigquery_load_table_gcs_json_autodetect] |
626 | | - |
627 | | - # Shared code |
628 | | - # [START bigquery_load_table_gcs_csv_autodetect] |
629 | | - # [START bigquery_load_table_gcs_json_autodetect] |
630 | | - load_job = client.load_table_from_uri( |
631 | | - uri, dataset_ref.table("us_states"), job_config=job_config |
632 | | - ) # API request |
633 | | - print("Starting job {}".format(load_job.job_id)) |
634 | | - |
635 | | - load_job.result() # Waits for table load to complete. |
636 | | - print("Job finished.") |
637 | | - |
638 | | - destination_table = client.get_table(dataset_ref.table("us_states")) |
639 | | - print("Loaded {} rows.".format(destination_table.num_rows)) |
640 | | - # [END bigquery_load_table_gcs_csv_autodetect] |
641 | | - # [END bigquery_load_table_gcs_json_autodetect] |
642 | | - |
643 | | - out, _ = capsys.readouterr() |
644 | | - assert "Loaded 50 rows." in out |
645 | | - |
646 | | - |
647 | | -def test_load_table_from_uri_truncate(client, to_delete, capsys): |
648 | | - """Replaces table data with data from a GCS URI using various formats |
649 | | - Each file format has its own tested load from URI sample. Because most of |
650 | | - the code is common for autodetect, append, and truncate, this sample |
651 | | - includes snippets for all supported formats but only calls a single load |
652 | | - job. |
653 | | - This code snippet is made up of shared code, then format-specific code, |
654 | | - followed by more shared code. Note that only the last format in the |
655 | | - format-specific code section will be tested in this test. |
656 | | - """ |
657 | | - dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) |
658 | | - dataset = bigquery.Dataset(client.dataset(dataset_id)) |
659 | | - client.create_dataset(dataset) |
660 | | - to_delete.append(dataset) |
661 | | - |
662 | | - job_config = bigquery.LoadJobConfig() |
663 | | - job_config.schema = [ |
664 | | - bigquery.SchemaField("name", "STRING"), |
665 | | - bigquery.SchemaField("post_abbr", "STRING"), |
666 | | - ] |
667 | | - table_ref = dataset.table("us_states") |
668 | | - body = six.BytesIO(b"Washington,WA") |
669 | | - client.load_table_from_file(body, table_ref, job_config=job_config).result() |
670 | | - previous_rows = client.get_table(table_ref).num_rows |
671 | | - assert previous_rows > 0 |
672 | | - |
673 | | - # Shared code |
674 | | - # [START bigquery_load_table_gcs_avro_truncate] |
675 | | - # [START bigquery_load_table_gcs_csv_truncate] |
676 | | - # [START bigquery_load_table_gcs_json_truncate] |
677 | | - # [START bigquery_load_table_gcs_parquet_truncate] |
678 | | - # [START bigquery_load_table_gcs_orc_truncate] |
679 | | - # from google.cloud import bigquery |
680 | | - # client = bigquery.Client() |
681 | | - # table_ref = client.dataset('my_dataset').table('existing_table') |
682 | | - |
683 | | - job_config = bigquery.LoadJobConfig() |
684 | | - job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE |
685 | | - # [END bigquery_load_table_gcs_avro_truncate] |
686 | | - # [END bigquery_load_table_gcs_csv_truncate] |
687 | | - # [END bigquery_load_table_gcs_json_truncate] |
688 | | - # [END bigquery_load_table_gcs_parquet_truncate] |
689 | | - # [END bigquery_load_table_gcs_orc_truncate] |
690 | | - |
691 | | - # Format-specific code |
692 | | - # [START bigquery_load_table_gcs_avro_truncate] |
693 | | - job_config.source_format = bigquery.SourceFormat.AVRO |
694 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" |
695 | | - # [END bigquery_load_table_gcs_avro_truncate] |
696 | | - |
697 | | - # [START bigquery_load_table_gcs_csv_truncate] |
698 | | - job_config.skip_leading_rows = 1 |
699 | | - # The source format defaults to CSV, so the line below is optional. |
700 | | - job_config.source_format = bigquery.SourceFormat.CSV |
701 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" |
702 | | - # [END bigquery_load_table_gcs_csv_truncate] |
703 | | - # unset csv-specific attribute |
704 | | - del job_config._properties["load"]["skipLeadingRows"] |
705 | | - |
706 | | - # [START bigquery_load_table_gcs_json_truncate] |
707 | | - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON |
708 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" |
709 | | - # [END bigquery_load_table_gcs_json_truncate] |
710 | | - |
711 | | - # [START bigquery_load_table_gcs_parquet_truncate] |
712 | | - job_config.source_format = bigquery.SourceFormat.PARQUET |
713 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" |
714 | | - # [END bigquery_load_table_gcs_parquet_truncate] |
715 | | - |
716 | | - # [START bigquery_load_table_gcs_orc_truncate] |
717 | | - job_config.source_format = bigquery.SourceFormat.ORC |
718 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" |
719 | | - # [END bigquery_load_table_gcs_orc_truncate] |
720 | | - |
721 | | - # Shared code |
722 | | - # [START bigquery_load_table_gcs_avro_truncate] |
723 | | - # [START bigquery_load_table_gcs_csv_truncate] |
724 | | - # [START bigquery_load_table_gcs_json_truncate] |
725 | | - # [START bigquery_load_table_gcs_parquet_truncate] |
726 | | - # [START bigquery_load_table_gcs_orc_truncate] |
727 | | - load_job = client.load_table_from_uri( |
728 | | - uri, table_ref, job_config=job_config |
729 | | - ) # API request |
730 | | - print("Starting job {}".format(load_job.job_id)) |
731 | | - |
732 | | - load_job.result() # Waits for table load to complete. |
733 | | - print("Job finished.") |
734 | | - |
735 | | - destination_table = client.get_table(table_ref) |
736 | | - print("Loaded {} rows.".format(destination_table.num_rows)) |
737 | | - # [END bigquery_load_table_gcs_avro_truncate] |
738 | | - # [END bigquery_load_table_gcs_csv_truncate] |
739 | | - # [END bigquery_load_table_gcs_json_truncate] |
740 | | - # [END bigquery_load_table_gcs_parquet_truncate] |
741 | | - # [END bigquery_load_table_gcs_orc_truncate] |
742 | | - |
743 | | - out, _ = capsys.readouterr() |
744 | | - assert "Loaded 50 rows." in out |
745 | | - |
746 | | - |
747 | 583 | def test_load_table_add_column(client, to_delete): |
748 | 584 | dataset_id = "load_table_add_column_{}".format(_millis()) |
749 | 585 | dataset_ref = client.dataset(dataset_id) |
|
0 commit comments