Skip to content

Commit ba22fb9

Browse files
committed
added gzip support
1 parent 986db12 commit ba22fb9

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,8 @@ psql> select * from animals;
182182
tortoise | 205
183183
(4 rows)
184184
```
185+
186+
## Support for gzip files
187+
188+
If the file has the metadata `Content-Encoding=gzip` in S3, then the file will be automatically unzipped prior to be copied to the table.
189+
One can update the metadata in S3 by following the instructions described [here](https://docs.aws.amazon.com/AmazonS3/latest/user-guide/add-object-metadata.html).

aws_s3--0.0.1.sql

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ AS $$
6565

6666
boto3 = cache_import('boto3')
6767
tempfile = cache_import('tempfile')
68+
gzip = cache_import('gzip')
69+
shutil = cache_import('shutil')
6870

6971
plan = plpy.prepare('select current_setting($1, true)::int', ['TEXT'])
7072

@@ -76,8 +78,18 @@ AS $$
7678
region_name=region
7779
)
7880

81+
response = s3.head_object(Bucket=bucket, Key=file_path)
82+
content_encoding = response.get('ContentEncoding')
83+
7984
with tempfile.NamedTemporaryFile() as fd:
80-
s3.download_fileobj(bucket, file_path, fd)
85+
if content_encoding and content_encoding.lower() == 'gzip':
86+
with tempfile.NamedTemporaryFile() as gzfd:
87+
s3.download_fileobj(bucket, file_path, gzfd)
88+
gzfd.flush()
89+
gzfd.seek(0)
90+
shutil.copyfileobj(gzip.GzipFile(fileobj=gzfd, mode='rb'), fd)
91+
else:
92+
s3.download_fileobj(bucket, file_path, fd)
8193
fd.flush()
8294
res = plpy.execute("COPY {table_name} {column_list} FROM {filename} {options};".format(
8395
table_name=table_name,

0 commit comments

Comments
 (0)