@@ -159,12 +159,16 @@ let vocab = state_dict["token_embedding.weight"]
159159let pos = state_dict [ " positional_embedding " ]
160160tokenEmbed. parameters. copy ( from: try ! Tensor < Float > ( numpy: vocab. numpy ( ) ) )
161161positionEmbed. parameters. copy ( from: try ! Tensor < Float > ( numpy: pos. numpy ( ) ) )
162+ print ( " \" token_embedding.weight \" , \" \( tokenEmbed. parameters. name) \" " )
163+ print ( " \" positional_embedding \" , \" \( positionEmbed. parameters. name) \" " )
162164
163165for i in 0 ..< 23 {
164166 let layer_norm_1_weight = state_dict [ " transformer.resblocks. \( i) .ln_1.weight " ] . numpy ( )
165167 let layer_norm_1_bias = state_dict [ " transformer.resblocks. \( i) .ln_1.bias " ] . numpy ( )
166168 layerNorm1s [ i] . parameters ( for: . weight) . copy ( from: try ! Tensor < Float > ( numpy: layer_norm_1_weight) )
167169 layerNorm1s [ i] . parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: layer_norm_1_bias) )
170+ print ( " \" transformer.resblocks. \( i) .ln_1.weight \" , \" \( layerNorm1s [ i] . parameters ( for: . weight) . name) \" " )
171+ print ( " \" transformer.resblocks. \( i) .ln_1.bias \" , \" \( layerNorm1s [ i] . parameters ( for: . bias) . name) \" " )
168172
169173 let in_proj_weight = state_dict [ " transformer.resblocks. \( i) .attn.in_proj_weight " ] . type (
170174 torch. float
@@ -174,42 +178,58 @@ for i in 0..<23 {
174178 toqueries [ i] . parameters ( for: . weight) . copy (
175179 from: try ! Tensor < Float > ( numpy: in_proj_weight [ ..< ( 1024 ) , ... ] ) )
176180 toqueries [ i] . parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: in_proj_bias [ ..< ( 1024 ) ] ) )
181+ print ( " \" transformer.resblocks. \( i) .attn.in_proj_weight \" , \" \( toqueries [ i] . parameters ( for: . weight) . name) \" " )
182+ print ( " \" transformer.resblocks. \( i) .attn.in_proj_bias \" , \" \( toqueries [ i] . parameters ( for: . bias) . name) \" " )
177183 tokeys [ i] . parameters ( for: . weight) . copy (
178184 from: try ! Tensor < Float > ( numpy: in_proj_weight [ ( 1024 ) ..< ( 2 * 1024 ) , ... ] ) )
179185 tokeys [ i] . parameters ( for: . bias) . copy (
180186 from: try ! Tensor < Float > ( numpy: in_proj_bias [ ( 1024 ) ..< ( 2 * 1024 ) ] ) )
187+ print ( " \" transformer.resblocks. \( i) .attn.in_proj_weight \" , \" \( tokeys [ i] . parameters ( for: . weight) . name) \" " )
188+ print ( " \" transformer.resblocks. \( i) .attn.in_proj_bias \" , \" \( tokeys [ i] . parameters ( for: . bias) . name) \" " )
181189 tovalues [ i] . parameters ( for: . weight) . copy (
182190 from: try ! Tensor < Float > ( numpy: in_proj_weight [ ( 2 * 1024 ) ... , ... ] ) )
183191 tovalues [ i] . parameters ( for: . bias) . copy (
184192 from: try ! Tensor < Float > ( numpy: in_proj_bias [ ( 2 * 1024 ) ... ] ) )
193+ print ( " \" transformer.resblocks. \( i) .attn.in_proj_weight \" , \" \( tovalues [ i] . parameters ( for: . weight) . name) \" " )
194+ print ( " \" transformer.resblocks. \( i) .attn.in_proj_bias \" , \" \( tovalues [ i] . parameters ( for: . bias) . name) \" " )
185195
186196 let out_proj_weight = state_dict [ " transformer.resblocks. \( i) .attn.out_proj.weight " ]
187197 . numpy ( )
188198 let out_proj_bias = state_dict [ " transformer.resblocks. \( i) .attn.out_proj.bias " ] . numpy ( )
189199 unifyheads [ i] . parameters ( for: . weight) . copy ( from: try ! Tensor < Float > ( numpy: out_proj_weight) )
190200 unifyheads [ i] . parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: out_proj_bias) )
201+ print ( " \" transformer.resblocks. \( i) .attn.out_proj.weight \" , \" \( unifyheads [ i] . parameters ( for: . weight) . name) \" " )
202+ print ( " \" transformer.resblocks. \( i) .attn.out_proj.bias \" , \" \( unifyheads [ i] . parameters ( for: . bias) . name) \" " )
191203
192204 let layer_norm_2_weight = state_dict [ " transformer.resblocks. \( i) .ln_2.weight " ] . numpy ( )
193205 let layer_norm_2_bias = state_dict [ " transformer.resblocks. \( i) .ln_2.bias " ] . numpy ( )
194206 layerNorm2s [ i] . parameters ( for: . weight) . copy ( from: try ! Tensor < Float > ( numpy: layer_norm_2_weight) )
195207 layerNorm2s [ i] . parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: layer_norm_2_bias) )
208+ print ( " \" transformer.resblocks. \( i) .ln_2.weight \" , \" \( layerNorm2s [ i] . parameters ( for: . weight) . name) \" " )
209+ print ( " \" transformer.resblocks. \( i) .ln_2.bias \" , \" \( layerNorm2s [ i] . parameters ( for: . bias) . name) \" " )
196210
197211 let fc1_weight = state_dict [ " transformer.resblocks. \( i) .mlp.c_fc.weight " ] . numpy ( )
198212 let fc1_bias = state_dict [ " transformer.resblocks. \( i) .mlp.c_fc.bias " ] . numpy ( )
199213 fc1s [ i] . parameters ( for: . weight) . copy ( from: try ! Tensor < Float > ( numpy: fc1_weight) )
200214 fc1s [ i] . parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: fc1_bias) )
215+ print ( " \" transformer.resblocks. \( i) .mlp.c_fc.weight \" , \" \( fc1s [ i] . parameters ( for: . weight) . name) \" " )
216+ print ( " \" transformer.resblocks. \( i) .mlp.c_fc.bias \" , \" \( fc1s [ i] . parameters ( for: . bias) . name) \" " )
201217
202218 let fc2_weight = state_dict [ " transformer.resblocks. \( i) .mlp.c_proj.weight " ] . numpy ( )
203219 let fc2_bias = state_dict [ " transformer.resblocks. \( i) .mlp.c_proj.bias " ] . numpy ( )
204220 fc2s [ i] . parameters ( for: . weight) . copy ( from: try ! Tensor < Float > ( numpy: fc2_weight) )
205221 fc2s [ i] . parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: fc2_bias) )
222+ print ( " \" transformer.resblocks. \( i) .mlp.c_proj.weight \" , \" \( fc2s [ i] . parameters ( for: . weight) . name) \" " )
223+ print ( " \" transformer.resblocks. \( i) .mlp.c_proj.bias \" , \" \( fc2s [ i] . parameters ( for: . bias) . name) \" " )
206224}
207225
208226let final_layer_norm_weight = state_dict [ " ln_final.weight " ] . numpy ( )
209227let final_layer_norm_bias = state_dict [ " ln_final.bias " ] . numpy ( )
210228finalLayerNorm. parameters ( for: . weight) . copy (
211229 from: try ! Tensor < Float > ( numpy: final_layer_norm_weight) )
212230finalLayerNorm. parameters ( for: . bias) . copy ( from: try ! Tensor < Float > ( numpy: final_layer_norm_bias) )
231+ print ( " \" ln_final.weight \" , \" \( finalLayerNorm. parameters ( for: . weight) . name) \" " )
232+ print ( " \" ln_final.bias \" , \" \( finalLayerNorm. parameters ( for: . bias) . name) \" " )
213233
214234let c = textModel ( inputs: tokensTensor, positionTensor, casualAttentionMask) [ 0 ] . as ( of: Float . self)
215235for i in 0 ..< 6 {
0 commit comments