Skip to content

Commit f124a7f

Browse files
Update config_lib.py
Update weight decay.
1 parent 0f1c3ac commit f124a7f

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

hero/config_lib.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ class Flops2e17TFM41MC4L2048BS128(BaseExperimentConfig):
257257
batch_size: int = 128
258258
num_train_steps: int = 2588
259259
lr_schedule_name: str = 'cosine_decay'
260-
weight_decay: float = 3e-4
260+
weight_decay: float = 1e-1
261261
lr_schedule_config: Tuple[Tuple[str, Any], ...] = (
262262
('lr', 3e-3), ('steps_after_decay', 0),
263263
('end_decay', 0.1))
@@ -294,7 +294,7 @@ class Flops1e18TFM111MC4L2048BS256(Flops2e17TFM41MC4L2048BS128):
294294
batch_size: int = 256
295295
num_train_steps: int = 3626
296296
lr_schedule_name: str = 'cosine_decay'
297-
weight_decay: float = 3e-4
297+
weight_decay: float = 1e-1
298298
lr_schedule_config: Tuple[Tuple[str, Any], ...] = (
299299
('lr', 3e-3), ('steps_after_decay', 0),
300300
('end_decay', 0.1))
@@ -329,7 +329,7 @@ class Flops1e19TFM338MC4L2048BS512(Flops2e17TFM41MC4L2048BS128):
329329
batch_size: int = 512
330330
num_train_steps: int = 5800
331331
lr_schedule_name: str = 'cosine_decay'
332-
weight_decay: float = 3e-4
332+
weight_decay: float = 1e-1
333333
lr_schedule_config: Tuple[Tuple[str, Any], ...] = (
334334
('lr', 3e-3),
335335
('steps_after_decay', 0),

0 commit comments

Comments
 (0)