Skip to content

Commit 2647909

Browse files
committed
add course files
1 parent d6115c5 commit 2647909

File tree

16 files changed

+544
-0
lines changed

16 files changed

+544
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

Snowflake/.DS_Store

6 KB
Binary file not shown.
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
------------------------ Prep para carregar dados ------------------------------
2+
3+
CREATE OR REPLACE TABLE EVENTS
4+
(
5+
GLOBALEVENTID integer
6+
,SQLDATE date
7+
,MONTHYEAR string
8+
,Actor1Name string
9+
,NumMentions integer
10+
,SOURCEURL string
11+
);
12+
13+
list @GDELT_EVENTS;
14+
15+
16+
CREATE OR REPLACE FILE FORMAT CSV_TAB_FMT
17+
FIELD_DELIMITER = '\t'
18+
TYPE = CSV;
19+
20+
show file formats in database GDELT;
21+
22+
------------------------ Carregando os dados ------------------------------
23+
24+
COPY INTO EVENTS FROM (
25+
SELECT
26+
$1::int GLOBALEVENTID
27+
,TO_DATE($2,'YYYYMMDD') AS SQLDATE
28+
,$3::string MONTHYEAR
29+
,$7::string Actor1Name
30+
,$34::int NumMentions
31+
,$58::string SOURCEURL
32+
FROM @MANAGE_DB.EXTERNAL_STAGES.GDELT_EVENTS/events/20190917.export.csv
33+
(file_format => MANAGE_DB.FILE_FORMATS.CSV_TAB_FMT) );
34+
--(file_format => MANAGE_DB.FILE_FORMATS.CSV_TAB_FMT)
35+
);
36+
37+
38+
--mostrar que nao tem mais nada
39+
select * from EVENTS limit 10;
40+
41+
--change warehouse size from small to large (4x)
42+
alter warehouse compute_wh set warehouse_size='large';
43+
44+
--load data with large warehouse
45+
show warehouses;
46+
47+
--Rodar novamente comando copy e mostar que foi mais rapido
48+
49+
50+
51+
------------------------ Cache results e Clone ------------------------------
52+
53+
select * from EVENTS limit 20;
54+
55+
SELECT
56+
Actor1Name as actor_name
57+
,sum(NumMentions) mentions_actor
58+
,count(GLOBALEVENTID) events_actor
59+
FROM EVENTS
60+
where Actor1Name is not null
61+
group by 1
62+
order by 2 desc;
63+
64+
65+
create table trips_dev clone trips;
66+
67+
68+
------------------------ Psemi-structured data ------------------------------
69+
70+
create database weather;
71+
72+
use role sysadmin;
73+
use warehouse compute_wh;
74+
use database weather;
75+
use schema public;
76+
77+
create table json_weather_data (v variant);
78+
79+
create stage nyc_weather
80+
url = 's3://snowflake-workshop-lab/weather-nyc';
81+
82+
list @nyc_weather;
83+
84+
copy into json_weather_data
85+
from @nyc_weather
86+
file_format = (type=json);
87+
88+
select * from json_weather_data limit 10;
89+
90+
create view json_weather_data_view as
91+
select
92+
v:time::timestamp as observation_time,
93+
v:city.id::int as city_id,
94+
v:city.name::string as city_name,
95+
v:city.country::string as country,
96+
v:city.coord.lat::float as city_lat,
97+
v:city.coord.lon::float as city_lon,
98+
v:clouds.all::int as clouds,
99+
(v:main.temp::float)-273.15 as temp_avg,
100+
(v:main.temp_min::float)-273.15 as temp_min,
101+
(v:main.temp_max::float)-273.15 as temp_max,
102+
v:weather[0].main::string as weather,
103+
v:weather[0].description::string as weather_desc,
104+
v:weather[0].icon::string as weather_icon,
105+
v:wind.deg::float as wind_dir,
106+
v:wind.speed::float as wind_speed
107+
from json_weather_data
108+
where city_id = 5128638;
109+
110+
111+
------------------------ time travel ------------------------------
112+
113+
drop table json_weather_data;=
114+
115+
Select * from json_weather_data limit 10;
116+
117+
undrop table json_weather_data;
118+
119+
use role sysadmin;
120+
use warehouse compute_wh;
121+
use database citibike;
122+
use schema public;
123+
124+
update trips set start_station_name = 'oops';
125+
126+
select
127+
start_station_name as "station",
128+
count(*) as "rides"
129+
from trips
130+
group by 1
131+
order by 2 desc
132+
limit 20;
133+
134+
set query_id =
135+
(select query_id from
136+
table(information_schema.query_history_by_session (result_limit=>5))
137+
where query_text like 'update%' order by start_time limit 1);
138+
139+
create or replace table trips as
140+
(select * from trips before (statement => $query_id));
141+
142+
select
143+
start_station_name as "station",
144+
count(*) as "rides"
145+
from trips
146+
group by 1
147+
order by 2 desc
148+
limit 20;
149+
150+
151+
152+
153+
154+
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
SELECT
2+
$1
3+
,$2
4+
FROM @MANAGE_DB.EXTERNAL_STAGES.GDELT_EVENTS/events/20190916.export.csv ;
5+
6+
CREATE DATABASE GDELT;
7+
8+
CREATE SCHEMA EVENTS;
9+
10+
CREATE TABLE IF NOT EXISTS GDELT.EVENTS.EVENTS_FULL (
11+
12+
GLOBALEVENTID INT
13+
14+
,SQLDATE varchar
15+
16+
,MonthYear varchar
17+
18+
,Year varchar
19+
20+
,FractionDate varchar
21+
22+
,Actor1Code varchar
23+
24+
,Actor1Name varchar
25+
26+
,Actor1CountryCode varchar
27+
28+
,Actor1KnownGroupCode varchar
29+
30+
,Actor1EthnicCode varchar
31+
32+
,Actor1Religion1Code varchar
33+
34+
,Actor1Religion2Code varchar
35+
36+
,Actor1Type1Code varchar
37+
38+
,Actor1Type2Code varchar
39+
40+
,Actor1Type3Code varchar
41+
42+
,Actor2Code varchar
43+
44+
,Actor2Name varchar
45+
46+
,Actor2CountryCode varchar
47+
48+
,Actor2KnownGroupCode varchar
49+
50+
,Actor2EthnicCode varchar
51+
52+
,Actor2Religion1Code varchar
53+
54+
,Actor2Religion2Code varchar
55+
56+
,Actor2Type1Code varchar
57+
58+
,Actor2Type2Code varchar
59+
60+
,Actor2Type3Code varchar
61+
62+
,IsRootEvent varchar
63+
64+
,EventCode varchar
65+
66+
,EventBaseCode varchar
67+
68+
,EventRootCode varchar
69+
70+
,QuadClass varchar
71+
72+
,GoldsteinScale varchar
73+
74+
,NumMentions varchar
75+
76+
,NumSources varchar
77+
78+
,NumArticles varchar
79+
80+
,AvgTone varchar
81+
82+
,Actor1Geo_Type varchar
83+
84+
,Actor1Geo_FullName varchar
85+
86+
,Actor1Geo_CountryCode varchar
87+
88+
,Actor1Geo_ADM1Code varchar
89+
90+
,Actor1Geo_Lat varchar
91+
92+
,Actor1Geo_Long varchar
93+
94+
,Actor1Geo_FeatureID varchar
95+
96+
,Actor2Geo_Type varchar
97+
98+
,Actor2Geo_FullName varchar
99+
100+
,Actor2Geo_CountryCode varchar
101+
102+
,Actor2Geo_ADM1Code varchar
103+
104+
,Actor2Geo_Lat varchar
105+
106+
,Actor2Geo_Long varchar
107+
108+
,Actor2Geo_FeatureID varchar
109+
110+
,ActionGeo_Type varchar
111+
112+
,ActionGeo_FullName varchar
113+
114+
,ActionGeo_CountryCode varchar
115+
116+
,ActionGeo_ADM1Code varchar
117+
118+
,ActionGeo_Lat varchar
119+
120+
,ActionGeo_Long varchar
121+
122+
,ActionGeo_FeatureID varchar
123+
124+
,DATEADDED varchar
125+
126+
,SOURCEURL varchar
127+
128+
);
129+
130+
131+
COPY INTO GDELT.EVENTS.EVENTS_FULL
132+
FROM @MANAGE_DB.EXTERNAL_STAGES.GDELT_EVENTS/events
133+
file_format = ( type = 'csv' field_delimiter = '\t')
134+
pattern = '.*2019091.*'
135+
;
136+
137+
select * from GDELT.EVENTS.EVENTS_FULL;
138+
139+
140+
141+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
CREATE OR REPLACE SCHEMA FILE_FORMATS;
2+
3+
CREATE OR REPLACE FILE FORMAT MANAGE_DB.FILE_FORMATS.CSV_TAB_FMT
4+
FIELD_DELIMITER = '\t'
5+
TYPE = CSV
6+
--SKIP_HEADER=1
7+
;
8+
9+
DESC FILE FORMAT FILE_FORMATS.CSV_TAB_FMT;
10+
11+
LIST @MANAGE_DB.EXTERNAL_STAGES.GDELT_EVENTS/events/ ;
12+
13+
COPY INTO GDELT.EVENTS.EVENTS_FULL
14+
FROM @MANAGE_DB.EXTERNAL_STAGES.GDELT_EVENTS/events/20130922.export.csv
15+
file_format = MANAGE_DB.FILE_FORMATS.CSV_TAB_FMT
16+
;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
SELECT DISTINCT
2+
RAW_FILE:id id
3+
,array_size(RAW_FILE:multiMedia) size
4+
from medias.youtube.statistics_raw ;
5+
6+
SELECT distinct
7+
RAW_FILE:id::int id
8+
,RAW_FILE:createdAt createdAt
9+
,RAW_FILE:description::string description
10+
,RAW_FILE:likeDislike.dislikes dislikes
11+
,RAW_FILE:likeDislike.likes likes
12+
,RAW_FILE:likeDislike.userAction user_action
13+
,f.value:id multimedia_id
14+
FROM MEDIAS.YOUTUBE.STATISTICS_RAW ,
15+
table(flatten(RAW_FILE:multiMedia)) f
16+
--where RAW_FILE:id::int = 2114
17+
18+
19+
;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
2+
CREATE OR REPLACE TABLE MEDIAS.YOUTUBE.STATISTICS AS
3+
SELECT distinct
4+
RAW_FILE:id::int id
5+
,TO_TIMESTAMP(RAW_FILE:createdAt) createdAt
6+
,RAW_FILE:description::string description
7+
,RAW_FILE:likeDislike.dislikes::INT dislikes
8+
,RAW_FILE:likeDislike.likes::INT likes
9+
,RAW_FILE:likeDislike.userAction::INT user_action
10+
,f.value:id::INT multimedia_id
11+
FROM MEDIAS.YOUTUBE.STATISTICS_RAW ,
12+
table(flatten(RAW_FILE:multiMedia)) f
13+
;
14+
15+
SELECT COUNT(*) FROM MEDIAS.YOUTUBE.STATISTICS;
16+
17+
INSERT INTO MEDIAS.YOUTUBE.STATISTICS
18+
SELECT distinct
19+
RAW_FILE:id::int id
20+
,TO_TIMESTAMP(RAW_FILE:createdAt) createdAt
21+
,RAW_FILE:description::string description
22+
,RAW_FILE:likeDislike.dislikes::INT dislikes
23+
,RAW_FILE:likeDislike.likes::INT likes
24+
,RAW_FILE:likeDislike.userAction::INT user_action
25+
,f.value:id::INT multimedia_id
26+
FROM MEDIAS.YOUTUBE.STATISTICS_RAW ,
27+
table(flatten(RAW_FILE:multiMedia)) f ;
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
-- CRIAR STAGE > CARREGAR DADOS BRUTOS > ANALISE E PARSE > CRIAR O COMANDO COPY
2+
3+
CREATE OR REPLACE STAGE MANAGE_DB.EXTERNAL_STAGES.SNOWFLAKE_SERIES
4+
URL= 's3://snowflake-series/'
5+
STORAGE_INTEGRATION = S3_INT;
6+
7+
LIST @MANAGE_DB.EXTERNAL_STAGES.SNOWFLAKE_SERIES;
8+
9+
CREATE OR REPLACE FILE FORMAT MANAGE_DB.FILE_FORMATS.JSON_FMT
10+
TYPE = JSON;
11+
12+
CREATE DATABASE MEDIAS;
13+
14+
CREATE SCHEMA MEDIAS.YOUTUBE;
15+
16+
CREATE OR REPLACE table MEDIAS.YOUTUBE.STATISTICS_RAW (
17+
raw_file variant);
18+
19+
COPY INTO MEDIAS.YOUTUBE.STATISTICS_RAW
20+
FROM @MANAGE_DB.EXTERNAL_STAGES.SNOWFLAKE_SERIES
21+
file_format= MANAGE_DB.FILE_FORMATS.JSON_FMT
22+
files = ('youtube_data.json');
23+
24+
SELECT * FROM MEDIAS.YOUTUBE.STATISTICS_RAW ;

0 commit comments

Comments
 (0)