aplacmhgg
diff --git a/‎examples/t2i-adapter/main.swift‎
Lines changed: 91 additions & 10 deletions b/‎examples/t2i-adapter/main.swift‎
Lines changed: 91 additions & 10 deletions
@@ -38,9 +38,7 @@ func ResnetBlock(outChannels: Int, inConv: Bool) -> (
  )
 }
 
-func Adapter(
- channels: [Int], numRepeat: Int
-) -> ((PythonObject) -> Void, Model) {
+func Adapter(channels: [Int], numRepeat: Int) -> ((PythonObject) -> Void, Model) {
  let x = Input()
  let convIn = Convolution(
  groups: 1, filters: channels[0], filterSize: [3, 3],
@@ -90,25 +88,108 @@ func Adapter(
  return (reader, Model([x], outs))
 }
 
+func ResnetBlockLight(outChannels: Int) -> (
+ Model, Model, Model
+) {
+ let x = Input()
+ let inLayerConv2d = Convolution(
+ groups: 1, filters: outChannels, filterSize: [3, 3],
+ hint: Hint(stride: [1, 1], border: Hint.Border(begin: [1, 1], end: [1, 1])))
+ var out = inLayerConv2d(x)
+ out = ReLU()(out)
+ // Dropout if needed in the future (for training).
+ let outLayerConv2d = Convolution(
+ groups: 1, filters: outChannels, filterSize: [3, 3],
+ hint: Hint(stride: [1, 1], border: Hint.Border(begin: [1, 1], end: [1, 1])))
+ out = outLayerConv2d(out) + x
+ return (
+ inLayerConv2d, outLayerConv2d, Model([x], [out])
+ )
+}
+
+func Extractor(prefix: String, channel: Int, innerChannel: Int, numRepeat: Int, downsample: Bool) -> ((PythonObject) -> Void, Model) {
+ let x = Input()
+ let inConv = Convolution(groups: 1, filters: innerChannel, filterSize: [1, 1], hint: Hint(stride: [1, 1]))
+ var out = inConv(x)
+ var readers = [(PythonObject) -> Void]()
+ for i in 0..<numRepeat {
+ let (inLayerConv2d, outLayerConv2d, resnetBlock) = ResnetBlockLight(outChannels: innerChannel)
+ out = resnetBlock(out)
+ let reader: (PythonObject) -> Void = { state_dict in
+ let block1_weight = state_dict["body.\(prefix).body.\(i).block1.weight"].numpy()
+ let block1_bias = state_dict["body.\(prefix).body.\(i).block1.bias"].numpy()
+ inLayerConv2d.parameters(for: .weight).copy(from: try! Tensor<Float>(numpy: block1_weight))
+ inLayerConv2d.parameters(for: .bias).copy(from: try! Tensor<Float>(numpy: block1_bias))
+ let block2_weight = state_dict["body.\(prefix).body.\(i).block2.weight"].numpy()
+ let block2_bias = state_dict["body.\(prefix).body.\(i).block2.bias"].numpy()
+ outLayerConv2d.parameters(for: .weight).copy(from: try! Tensor<Float>(numpy: block2_weight))
+ outLayerConv2d.parameters(for: .bias).copy(from: try! Tensor<Float>(numpy: block2_bias))
+ }
+ readers.append(reader)
+ }
+ let outConv = Convolution(groups: 1, filters: channel, filterSize: [1, 1], hint: Hint(stride: [1, 1]))
+ out = outConv(out)
+ if downsample {
+ let downsample = AveragePool(filterSize: [2, 2], hint: Hint(stride: [2, 2]))
+ out = downsample(out)
+ }
+ let reader: (PythonObject) -> Void = { state_dict in
+ let in_conv_weight = state_dict["body.\(prefix).in_conv.weight"].numpy()
+ let in_conv_bias = state_dict["body.\(prefix).in_conv.bias"].numpy()
+ inConv.parameters(for: .weight).copy(from: try! Tensor<Float>(numpy: in_conv_weight))
+ inConv.parameters(for: .bias).copy(from: try! Tensor<Float>(numpy: in_conv_bias))
+ let out_conv_weight = state_dict["body.\(prefix).out_conv.weight"].numpy()
+ let out_conv_bias = state_dict["body.\(prefix).out_conv.bias"].numpy()
+ outConv.parameters(for: .weight).copy(from: try! Tensor<Float>(numpy: out_conv_weight))
+ outConv.parameters(for: .bias).copy(from: try! Tensor<Float>(numpy: out_conv_bias))
+ for reader in readers {
+ reader(state_dict)
+ }
+ }
+ return (reader, Model([x], [out]))
+}
+
+func AdapterLight(channels: [Int], numRepeat: Int) -> ((PythonObject) -> Void, Model) {
+ var readers = [(PythonObject) -> Void]()
+ let x = Input()
+ var out: Model.IO = x
+ var outs = [Model.IO]()
+ for (i, channel) in channels.enumerated() {
+ let (reader, extractor) = Extractor(prefix: "\(i)", channel: channel, innerChannel: channel / 4, numRepeat: numRepeat, downsample: i != 0)
+ out = extractor(out)
+ outs.append(out)
+ readers.append(reader)
+ }
+ let reader: (PythonObject) -> Void = { state_dict in
+ for reader in readers {
+ reader(state_dict)
+ }
+ }
+ return (reader, Model([x], outs))
+}
+
 random.seed(42)
 numpy.random.seed(42)
 torch.manual_seed(42)
 torch.cuda.manual_seed_all(42)
 
-let hint = torch.randn([2, 1, 512, 512])
+let hint = torch.randn([2, 3, 512, 512])
 
-let adapter = ldm_modules_encoders_adapter.Adapter(cin: 64, channels: [320, 640, 1280, 1280], nums_rb: 2, ksize: 1, sk: true, use_conv: false).to(torch.device("cpu"))
-adapter.load_state_dict(torch.load("/home/liu/workspace/T2I-Adapter/models/t2iadapter_canny_sd14v1.pth"))
-let state_dict = adapter.state_dict()
-let ret = adapter(hint)
+// let adapter = ldm_modules_encoders_adapter.Adapter(cin: 64, channels: [320, 640, 1280, 1280], nums_rb: 2, ksize: 1, sk: true, use_conv: false).to(torch.device("cpu"))
+let adapterLight = ldm_modules_encoders_adapter.Adapter_light(cin: 64 * 3, channels: [320, 640, 1280, 1280], nums_rb: 4).to(torch.device("cpu"))
+adapterLight.load_state_dict(torch.load("/home/liu/workspace/T2I-Adapter/models/t2iadapter_color_sd14v1.pth"))
+let state_dict = adapterLight.state_dict()
+let ret = adapterLight(hint)
+print(adapterLight)
 print(ret[0])
 
 let graph = DynamicGraph()
 let hintTensor = graph.variable(try! Tensor<Float>(numpy: hint.numpy())).toGPU(0)
-let (reader, adapternet) = Adapter(channels: [320, 640, 1280, 1280], numRepeat: 2)
+// let (reader, adapternet) = Adapter(channels: [320, 640, 1280, 1280], numRepeat: 2)
+let (reader, adapternet) = AdapterLight(channels: [320, 640, 1280, 1280], numRepeat: 4)
 graph.workspaceSize = 1_024 * 1_024 * 1_024
 graph.withNoGrad {
- let hintIn = hintTensor.reshaped(format: .NCHW, shape: [2, 1, 64, 8, 64, 8]).permuted(0, 1, 3, 5, 2, 4).copied().reshaped(.NCHW(2, 64, 64, 64))
+ let hintIn = hintTensor.reshaped(format: .NCHW, shape: [2, 3, 64, 8, 64, 8]).permuted(0, 1, 3, 5, 2, 4).copied().reshaped(.NCHW(2, 64 * 3, 64, 64))
  var controls = adapternet(inputs: hintIn).map { $0.as(of: Float.self) }
  reader(state_dict)
  controls = adapternet(inputs: hintIn).map { $0.as(of: Float.self) }