Skip to content

Commit e716a8b

Browse files
authored
Add support for specifying custom patterns (#10)
1 parent 21c7310 commit e716a8b

File tree

16 files changed

+9464
-4296
lines changed

16 files changed

+9464
-4296
lines changed

README.md

Lines changed: 81 additions & 36 deletions
Large diffs are not rendered by default.

apps/csv2sql/README.md

Lines changed: 0 additions & 288 deletions
This file was deleted.

apps/csv2sql/lib/csv2sql.ex

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ defmodule Csv2sql do
7676
connect_timeout: :integer,
7777
pool_size: :integer,
7878
queue_target: :integer,
79-
queue_interval: :integer
79+
queue_interval: :integer,
80+
custom_date_patterns: :string,
81+
custom_datetime_patterns: :string
8082
]
8183
)
8284

@@ -115,6 +117,8 @@ defmodule Csv2sql do
115117
pool_size = opts[:pool_size] || 20
116118
queue_target = opts[:queue_target] || 5000
117119
queue_interval = opts[:queue_interval] || 1000
120+
custom_date_patterns = ["{YYYY}-{0M}-{0D}" | String.split((opts[:custom_date_patterns] || ""), ";")]
121+
custom_datetime_patterns = ["{YYYY}-{0M}-{0D} {0h24}:{0m}:{0s}" | String.split((opts[:custom_datetime_patterns] || ""), ";")]
118122

119123
repo_config = [
120124
username: username,
@@ -148,7 +152,9 @@ defmodule Csv2sql do
148152
[
149153
varchar_limit: varchar_limit,
150154
schema_file_path: schema_file_path,
151-
schema_infer_chunk_size: schema_infer_chunk_size
155+
schema_infer_chunk_size: schema_infer_chunk_size,
156+
custom_date_patterns: custom_date_patterns,
157+
custom_datetime_patterns: custom_datetime_patterns
152158
]},
153159
{Csv2sql.MainServer,
154160
[

apps/csv2sql/lib/csv2sql/database.ex

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,14 @@ defmodule Csv2sql.Database do
114114
{val, ""} = Float.parse(val)
115115
val
116116

117+
# MYSQL
118+
"DATE" ->
119+
format_datetime(val, true)
120+
121+
# MYSQL
122+
"DATETIME" ->
123+
format_datetime(val, false)
124+
117125
_ ->
118126
val
119127
end
@@ -157,4 +165,35 @@ defmodule Csv2sql.Database do
157165

158166
Helpers.print_msg("Create Schema for: #{table_name}")
159167
end
168+
169+
# Warning: Timezone information if any will be ignored while parsing datetime
170+
defp format_datetime(datetime, is_date) do
171+
schema_maker_configs = Application.get_env(:csv2sql, Csv2sql.SchemaMaker)
172+
173+
is_date
174+
|> if(
175+
do: schema_maker_configs[:custom_date_patterns],
176+
else: schema_maker_configs[:custom_datetime_patterns]
177+
)
178+
|> Enum.find_value(fn pattern ->
179+
case Timex.parse(datetime, pattern) do
180+
{:ok, %DateTime{} = datetime} ->
181+
to_date_or_datetime_string(datetime, is_date)
182+
183+
{:ok, %NaiveDateTime{} = native_datetime} ->
184+
native_datetime
185+
|> DateTime.from_naive!("Etc/UTC")
186+
|> to_date_or_datetime_string(is_date)
187+
188+
{:error, _} ->
189+
false
190+
end
191+
end)
192+
end
193+
194+
defp to_date_or_datetime_string(datetime, true),
195+
do: datetime |> DateTime.to_date() |> Date.to_string() |> String.trim_trailing("Z")
196+
197+
defp to_date_or_datetime_string(datetime, false),
198+
do: datetime |> DateTime.to_string() |> String.trim_trailing("Z")
160199
end

apps/csv2sql/lib/csv2sql/schema_maker.ex

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ defmodule Csv2sql.SchemaMaker do
3636
Map.put(type, :is_empty, type.is_empty && empty)
3737
else
3838
is_date = type.is_date && is_date?(item)
39-
is_timestamp = type.is_timestamp && is_timestamp?(item)
39+
is_datetime = type.is_datetime && is_datetime?(item)
4040
is_integer = type.is_integer && is_integer?(item)
4141
is_float = type.is_float && is_float?(item)
4242
is_boolean = type.is_boolean && is_boolean?(item)
@@ -45,7 +45,7 @@ defmodule Csv2sql.SchemaMaker do
4545
%{
4646
is_empty: type.is_empty && empty,
4747
is_date: is_date,
48-
is_timestamp: is_timestamp,
48+
is_datetime: is_datetime,
4949
is_boolean: is_boolean,
5050
is_integer: is_integer,
5151
is_float: is_float,
@@ -99,16 +99,20 @@ defmodule Csv2sql.SchemaMaker do
9999
varchar_limit = Application.get_env(:csv2sql, Csv2sql.SchemaMaker)[:varchar_limit]
100100
headers_type_list = List.duplicate(get_type_map(), Enum.count(headers))
101101

102+
schema_infer_chunk_size =
103+
Application.get_env(:csv2sql, Csv2sql.SchemaMaker)[:schema_infer_chunk_size]
104+
102105
db_type = Csv2sql.get_db_type()
103106

104107
types =
105108
path
106109
|> File.stream!()
107110
|> CSV.parse_stream()
108-
|> Stream.chunk_every(
109-
Application.get_env(:csv2sql, Csv2sql.SchemaMaker)[:schema_infer_chunk_size]
111+
|> Stream.chunk_every(schema_infer_chunk_size)
112+
|> Task.async_stream(__MODULE__, :infer_type, [headers_type_list],
113+
timeout: :infinity,
114+
ordered: false
110115
)
111-
|> Task.async_stream(__MODULE__, :infer_type, [headers_type_list], timeout: :infinity)
112116
|> Enum.reduce(headers_type_list, fn {:ok, result}, acc ->
113117
# Here we get a list of type maps for each chunk of data
114118
# We need to merge theses type maps obtained from each chunk
@@ -117,7 +121,7 @@ defmodule Csv2sql.SchemaMaker do
117121
%{
118122
is_empty: acc_map.is_empty && result_map.is_empty,
119123
is_date: acc_map.is_date && result_map.is_date,
120-
is_timestamp: acc_map.is_timestamp && result_map.is_timestamp,
124+
is_datetime: acc_map.is_datetime && result_map.is_datetime,
121125
is_boolean: acc_map.is_boolean && result_map.is_boolean,
122126
is_integer: acc_map.is_integer && result_map.is_integer,
123127
is_float: acc_map.is_float && result_map.is_float,
@@ -154,8 +158,8 @@ defmodule Csv2sql.SchemaMaker do
154158
defp get_column_types(:mysql, varchar_limit, type) do
155159
cond do
156160
type[:is_empty] -> "VARCHAR(#{varchar_limit})"
157-
type[:is_timestamp] -> "TIMESTAMP"
158161
type[:is_date] -> "DATE"
162+
type[:is_datetime] -> "DATETIME"
159163
type[:is_boolean] -> "BIT"
160164
type[:is_integer] -> "INT"
161165
type[:is_float] -> "DOUBLE"
@@ -179,7 +183,7 @@ defmodule Csv2sql.SchemaMaker do
179183
%{
180184
is_empty: true,
181185
is_date: true,
182-
is_timestamp: true,
186+
is_datetime: true,
183187
is_boolean: true,
184188
is_integer: true,
185189
is_float: true,
@@ -198,11 +202,23 @@ defmodule Csv2sql.SchemaMaker do
198202
end
199203

200204
defp is_date?(item) do
201-
Regex.match?(~r/\d\d\d\d-\d\d-\d\d/, item)
205+
Application.get_env(:csv2sql, Csv2sql.SchemaMaker)[:custom_date_patterns]
206+
|> Enum.any?(fn pattern ->
207+
case Timex.parse(item, pattern) do
208+
{:ok, _} -> true
209+
{:error, _} -> false
210+
end
211+
end)
202212
end
203213

204-
defp is_timestamp?(item) do
205-
Regex.match?(~r/\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d/, item)
214+
defp is_datetime?(item) do
215+
Application.get_env(:csv2sql, Csv2sql.SchemaMaker)[:custom_datetime_patterns]
216+
|> Enum.any?(fn pattern ->
217+
case Timex.parse(item, pattern) do
218+
{:ok, _} -> true
219+
{:error, _} -> false
220+
end
221+
end)
206222
end
207223

208224
defp is_boolean?(item) do

apps/csv2sql/mix.exs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ defmodule Csv2sql.MixProject do
4040
{:ecto, "~> 3.4"},
4141
{:ecto_sql, "~> 3.1"},
4242
{:cli_spinners, "~> 0.1.0"},
43-
{:sizeable, "~> 1.0"}
43+
{:sizeable, "~> 1.0"},
44+
{:timex, "~> 3.7"}
4445
]
4546
end
4647
end

apps/csv2sql/mix.lock

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,27 @@
11
%{
2+
"certifi": {:hex, :certifi, "2.8.0", "d4fb0a6bb20b7c9c3643e22507e42f356ac090a1dcea9ab99e27e0376d695eba", [:rebar3], [], "hexpm", "6ac7efc1c6f8600b08d625292d4bbf584e14847ce1b6b5c44d983d273e1097ea"},
23
"cli_spinners": {:hex, :cli_spinners, "0.1.0", "08d89c6f1840a8927daed48c675ecb2f20c05c855dc2b4b58c2933d393d5e0c9", [:mix], [], "hexpm", "3b8ccad722e518309d8b92230960ca4775a761164514f78a89c4f04d5a25c97f"},
4+
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"},
35
"connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm", "4a0850c9be22a43af9920a71ab17c051f5f7d45c209e40269a1938832510e4d9"},
46
"db_connection": {:hex, :db_connection, "2.2.1", "caee17725495f5129cb7faebde001dc4406796f12a62b8949f4ac69315080566", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm", "2b02ece62d9f983fcd40954e443b7d9e6589664380e5546b2b9b523cd0fb59e1"},
57
"decimal": {:hex, :decimal, "1.8.1", "a4ef3f5f3428bdbc0d35374029ffcf4ede8533536fa79896dd450168d9acdf3c", [:mix], [], "hexpm", "3cb154b00225ac687f6cbd4acc4b7960027c757a5152b369923ead9ddbca7aec"},
68
"dir_walker": {:hex, :dir_walker, "0.0.8", "5332225074e4887e6e60ca0242af490215f296511ded4df18d554ae25394f727", [:mix], [], "hexpm", "2f4fb16e6427523700df9eb12eece5679ad4459aaefb1ca3cb580184bfc8d173"},
79
"ecto": {:hex, :ecto, "3.4.0", "a7a83ab8359bf816ce729e5e65981ce25b9fc5adfc89c2ea3980f4fed0bfd7c1", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "5eed18252f5b5bbadec56a24112b531343507dbe046273133176b12190ce19cc"},
810
"ecto_sql": {:hex, :ecto_sql, "3.4.1", "3c9136ba138f9b74d31286c73c61232a92bd19385f7c5607bdeb3a4587ef91f5", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.4.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.3.0 or ~> 0.4.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.0", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "9b4be0bffe7b0bdf5393defcae52712f248e70cc2bc0e8ab6ddb03be66371516"},
11+
"gettext": {:hex, :gettext, "0.18.2", "7df3ea191bb56c0309c00a783334b288d08a879f53a7014341284635850a6e55", [:mix], [], "hexpm", "f9f537b13d4fdd30f3039d33cb80144c3aa1f8d9698e47d7bcbcc8df93b1f5c5"},
12+
"hackney": {:hex, :hackney, "1.18.0", "c4443d960bb9fba6d01161d01cd81173089686717d9490e5d3606644c48d121f", [:rebar3], [{:certifi, "~>2.8.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "9afcda620704d720db8c6a3123e9848d09c87586dc1c10479c42627b905b5c5e"},
13+
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
14+
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"},
15+
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},
916
"myxql": {:hex, :myxql, "0.3.4", "41163cfc97b899db0fd9ebb6f38a8dc841298a7fa1c1e84a93b9369e2dbb1815", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:geo, "~> 3.3", [hex: :geo, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "fc8d49ba141a46a174b410e7c86906d4d655ff8a1e1608c31d5d1777d106d1dd"},
1017
"nimble_csv": {:hex, :nimble_csv, "0.7.0", "52f23ce46eee304d063d1716e19e45ea544bd751536bc53e5d41cb7fc0ca9405", [:mix], [], "hexpm", "e7051e7a95b5c4f26512af5805c320ee9185e752d949f048bf318fedef86cccc"},
18+
"parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"},
1119
"postgrex": {:hex, :postgrex, "0.15.6", "a464c72010a56e3214fe2b99c1a76faab4c2bb0255cabdef30dea763a3569aa2", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "f99268325ac8f66ffd6c4964faab9e70fbf721234ab2ad238c00f9530b8cdd55"},
1220
"sizeable": {:hex, :sizeable, "1.0.2", "625fe06a5dad188b52121a140286f1a6ae1adf350a942cf419499ecd8a11ee29", [:mix], [], "hexpm", "4bab548e6dfba777b400ca50830a9e3a4128e73df77ab1582540cf5860601762"},
21+
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
1322
"stream_split": {:hex, :stream_split, "0.1.4", "ea8073779725101127123bd4176500227e958113090741e38b23ee4efd4452f6", [:mix], [], "hexpm", "fd1a8ea510b237c2006bbd33ab0b7ccc6e3831e16b1d7f142dff8434acdbf15e"},
1423
"telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"},
24+
"timex": {:hex, :timex, "3.7.6", "502d2347ec550e77fdf419bc12d15bdccd31266bb7d925b30bf478268098282f", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "a296327f79cb1ec795b896698c56e662ed7210cc9eb31f0ab365eb3a62e2c589"},
25+
"tzdata": {:hex, :tzdata, "1.1.1", "20c8043476dfda8504952d00adac41c6eda23912278add38edc140ae0c5bcc46", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "a69cec8352eafcd2e198dea28a34113b60fdc6cb57eb5ad65c10292a6ba89787"},
26+
"unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"},
1527
}

0 commit comments

Comments
 (0)