|
25 | 25 | import time |
26 | 26 |
|
27 | 27 | import pytest |
28 | | -import six |
29 | 28 |
|
30 | 29 | try: |
31 | 30 | import fastparquet |
@@ -585,173 +584,6 @@ def test_manage_views(client, to_delete): |
585 | 584 | # [END bigquery_grant_view_access] |
586 | 585 |
|
587 | 586 |
|
588 | | -def test_load_table_from_uri_autodetect(client, to_delete, capsys): |
589 | | - """Load table from a GCS URI using various formats and auto-detected schema |
590 | | - Each file format has its own tested load from URI sample. Because most of |
591 | | - the code is common for autodetect, append, and truncate, this sample |
592 | | - includes snippets for all supported formats but only calls a single load |
593 | | - job. |
594 | | - This code snippet is made up of shared code, then format-specific code, |
595 | | - followed by more shared code. Note that only the last format in the |
596 | | - format-specific code section will be tested in this test. |
597 | | - """ |
598 | | - dataset_id = "load_table_from_uri_auto_{}".format(_millis()) |
599 | | - project = client.project |
600 | | - dataset_ref = bigquery.DatasetReference(project, dataset_id) |
601 | | - dataset = bigquery.Dataset(dataset_ref) |
602 | | - client.create_dataset(dataset) |
603 | | - to_delete.append(dataset) |
604 | | - |
605 | | - # Shared code |
606 | | - # [START bigquery_load_table_gcs_csv_autodetect] |
607 | | - # [START bigquery_load_table_gcs_json_autodetect] |
608 | | - # from google.cloud import bigquery |
609 | | - # client = bigquery.Client() |
610 | | - # dataset_id = 'my_dataset' |
611 | | - |
612 | | - dataset_ref = bigquery.DatasetReference(project, dataset_id) |
613 | | - job_config = bigquery.LoadJobConfig() |
614 | | - job_config.autodetect = True |
615 | | - # [END bigquery_load_table_gcs_csv_autodetect] |
616 | | - # [END bigquery_load_table_gcs_json_autodetect] |
617 | | - |
618 | | - # Format-specific code |
619 | | - # [START bigquery_load_table_gcs_csv_autodetect] |
620 | | - job_config.skip_leading_rows = 1 |
621 | | - # The source format defaults to CSV, so the line below is optional. |
622 | | - job_config.source_format = bigquery.SourceFormat.CSV |
623 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" |
624 | | - # [END bigquery_load_table_gcs_csv_autodetect] |
625 | | - # unset csv-specific attribute |
626 | | - del job_config._properties["load"]["skipLeadingRows"] |
627 | | - |
628 | | - # [START bigquery_load_table_gcs_json_autodetect] |
629 | | - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON |
630 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" |
631 | | - # [END bigquery_load_table_gcs_json_autodetect] |
632 | | - |
633 | | - # Shared code |
634 | | - # [START bigquery_load_table_gcs_csv_autodetect] |
635 | | - # [START bigquery_load_table_gcs_json_autodetect] |
636 | | - load_job = client.load_table_from_uri( |
637 | | - uri, dataset_ref.table("us_states"), job_config=job_config |
638 | | - ) # API request |
639 | | - print("Starting job {}".format(load_job.job_id)) |
640 | | - |
641 | | - load_job.result() # Waits for table load to complete. |
642 | | - print("Job finished.") |
643 | | - |
644 | | - destination_table = client.get_table(dataset_ref.table("us_states")) |
645 | | - print("Loaded {} rows.".format(destination_table.num_rows)) |
646 | | - # [END bigquery_load_table_gcs_csv_autodetect] |
647 | | - # [END bigquery_load_table_gcs_json_autodetect] |
648 | | - |
649 | | - out, _ = capsys.readouterr() |
650 | | - assert "Loaded 50 rows." in out |
651 | | - |
652 | | - |
653 | | -def test_load_table_from_uri_truncate(client, to_delete, capsys): |
654 | | - """Replaces table data with data from a GCS URI using various formats |
655 | | - Each file format has its own tested load from URI sample. Because most of |
656 | | - the code is common for autodetect, append, and truncate, this sample |
657 | | - includes snippets for all supported formats but only calls a single load |
658 | | - job. |
659 | | - This code snippet is made up of shared code, then format-specific code, |
660 | | - followed by more shared code. Note that only the last format in the |
661 | | - format-specific code section will be tested in this test. |
662 | | - """ |
663 | | - dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) |
664 | | - project = client.project |
665 | | - dataset_ref = bigquery.DatasetReference(project, dataset_id) |
666 | | - dataset = bigquery.Dataset(dataset_ref) |
667 | | - client.create_dataset(dataset) |
668 | | - to_delete.append(dataset) |
669 | | - |
670 | | - job_config = bigquery.LoadJobConfig() |
671 | | - job_config.schema = [ |
672 | | - bigquery.SchemaField("name", "STRING"), |
673 | | - bigquery.SchemaField("post_abbr", "STRING"), |
674 | | - ] |
675 | | - table_ref = dataset.table("us_states") |
676 | | - body = six.BytesIO(b"Washington,WA") |
677 | | - client.load_table_from_file(body, table_ref, job_config=job_config).result() |
678 | | - previous_rows = client.get_table(table_ref).num_rows |
679 | | - assert previous_rows > 0 |
680 | | - |
681 | | - # Shared code |
682 | | - # [START bigquery_load_table_gcs_avro_truncate] |
683 | | - # [START bigquery_load_table_gcs_csv_truncate] |
684 | | - # [START bigquery_load_table_gcs_json_truncate] |
685 | | - # [START bigquery_load_table_gcs_parquet_truncate] |
686 | | - # [START bigquery_load_table_gcs_orc_truncate] |
687 | | - # from google.cloud import bigquery |
688 | | - # client = bigquery.Client() |
689 | | - # table_ref = client.dataset('my_dataset').table('existing_table') |
690 | | - |
691 | | - job_config = bigquery.LoadJobConfig() |
692 | | - job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE |
693 | | - # [END bigquery_load_table_gcs_avro_truncate] |
694 | | - # [END bigquery_load_table_gcs_csv_truncate] |
695 | | - # [END bigquery_load_table_gcs_json_truncate] |
696 | | - # [END bigquery_load_table_gcs_parquet_truncate] |
697 | | - # [END bigquery_load_table_gcs_orc_truncate] |
698 | | - |
699 | | - # Format-specific code |
700 | | - # [START bigquery_load_table_gcs_avro_truncate] |
701 | | - job_config.source_format = bigquery.SourceFormat.AVRO |
702 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" |
703 | | - # [END bigquery_load_table_gcs_avro_truncate] |
704 | | - |
705 | | - # [START bigquery_load_table_gcs_csv_truncate] |
706 | | - job_config.skip_leading_rows = 1 |
707 | | - # The source format defaults to CSV, so the line below is optional. |
708 | | - job_config.source_format = bigquery.SourceFormat.CSV |
709 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" |
710 | | - # [END bigquery_load_table_gcs_csv_truncate] |
711 | | - # unset csv-specific attribute |
712 | | - del job_config._properties["load"]["skipLeadingRows"] |
713 | | - |
714 | | - # [START bigquery_load_table_gcs_json_truncate] |
715 | | - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON |
716 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" |
717 | | - # [END bigquery_load_table_gcs_json_truncate] |
718 | | - |
719 | | - # [START bigquery_load_table_gcs_parquet_truncate] |
720 | | - job_config.source_format = bigquery.SourceFormat.PARQUET |
721 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" |
722 | | - # [END bigquery_load_table_gcs_parquet_truncate] |
723 | | - |
724 | | - # [START bigquery_load_table_gcs_orc_truncate] |
725 | | - job_config.source_format = bigquery.SourceFormat.ORC |
726 | | - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" |
727 | | - # [END bigquery_load_table_gcs_orc_truncate] |
728 | | - |
729 | | - # Shared code |
730 | | - # [START bigquery_load_table_gcs_avro_truncate] |
731 | | - # [START bigquery_load_table_gcs_csv_truncate] |
732 | | - # [START bigquery_load_table_gcs_json_truncate] |
733 | | - # [START bigquery_load_table_gcs_parquet_truncate] |
734 | | - # [START bigquery_load_table_gcs_orc_truncate] |
735 | | - load_job = client.load_table_from_uri( |
736 | | - uri, table_ref, job_config=job_config |
737 | | - ) # API request |
738 | | - print("Starting job {}".format(load_job.job_id)) |
739 | | - |
740 | | - load_job.result() # Waits for table load to complete. |
741 | | - print("Job finished.") |
742 | | - |
743 | | - destination_table = client.get_table(table_ref) |
744 | | - print("Loaded {} rows.".format(destination_table.num_rows)) |
745 | | - # [END bigquery_load_table_gcs_avro_truncate] |
746 | | - # [END bigquery_load_table_gcs_csv_truncate] |
747 | | - # [END bigquery_load_table_gcs_json_truncate] |
748 | | - # [END bigquery_load_table_gcs_parquet_truncate] |
749 | | - # [END bigquery_load_table_gcs_orc_truncate] |
750 | | - |
751 | | - out, _ = capsys.readouterr() |
752 | | - assert "Loaded 50 rows." in out |
753 | | - |
754 | | - |
755 | 587 | def test_load_table_add_column(client, to_delete): |
756 | 588 | dataset_id = "load_table_add_column_{}".format(_millis()) |
757 | 589 | project = client.project |
|
0 commit comments