aplacmhgg
diff --git a/‎examples/lora/main.swift‎
Lines changed: 26 additions & 9 deletions b/‎examples/lora/main.swift‎
Lines changed: 26 additions & 9 deletions
diff --git a/‎examples/txt2img/main.swift‎
Lines changed: 8 additions & 8 deletions b/‎examples/txt2img/main.swift‎
Lines changed: 8 additions & 8 deletions
@@ -9,6 +9,7 @@ public struct Storage {
  var name: String
  var size: Int
  var dataType: DataType
+ var BF16: Bool
 }
 
 public struct TensorDescriptor {
@@ -46,7 +47,7 @@ public final class SafeTensors {
  guard !(shape.contains { $0 <= 0 }) else { continue }
  guard
  dtype == "f32" || dtype == "f16" || dtype == "float16" || dtype == "float32"
- || dtype == "float" || dtype == "half"
+ || dtype == "float" || dtype == "half" || dtype == "bf16"
  else { continue }
  let dataType: DataType =
  dtype == "f32" || dtype == "float32" || dtype == "float" ? .Float32 : .Float16
@@ -58,7 +59,7 @@ public final class SafeTensors {
  }
  strides.reverse()
  let tensorDescriptor = TensorDescriptor(
- storage: Storage(name: key, size: offsetEnd - offsetStart, dataType: dataType),
+ storage: Storage(name: key, size: offsetEnd - offsetStart, dataType: dataType, BF16: dtype == "bf16"),
  storageOffset: offsetStart, shape: shape, strides: strides)
  states[key] = tensorDescriptor
  }
@@ -75,12 +76,27 @@ public final class SafeTensors {
  guard let address = $0.baseAddress else { fatalError() }
  let tensor: AnyTensor
  if tensorDescriptor.storage.dataType == .Float16 {
- tensor = Tensor<Float16>(
- .CPU, format: .NCHW, shape: TensorShape(tensorDescriptor.shape),
- unsafeMutablePointer: (address + bufferStart + tensorDescriptor.storageOffset)
- .assumingMemoryBound(
- to: Float16.self), bindLifetimeOf: self
- )
+ if tensorDescriptor.storage.BF16 {
+ let count = tensorDescriptor.strides[0] * tensorDescriptor.shape[0]
+ let u16 = UnsafeMutablePointer<UInt16>.allocate(capacity: count * 2)
+ let bf16 = (address + bufferStart + tensorDescriptor.storageOffset).assumingMemoryBound(to: UInt16.self)
+ for i in 0..<count {
+ u16[i * 2] = 0
+ u16[i * 2 + 1] = bf16[i]
+ }
+ tensor = Tensor<Float>(
+ .CPU, format: .NCHW, shape: TensorShape(tensorDescriptor.shape),
+ unsafeMutablePointer: UnsafeMutableRawPointer(u16).assumingMemoryBound(to: Float.self), bindLifetimeOf: self
+ ).copied()
+ u16.deallocate()
+ } else {
+ tensor = Tensor<Float16>(
+ .CPU, format: .NCHW, shape: TensorShape(tensorDescriptor.shape),
+ unsafeMutablePointer: (address + bufferStart + tensorDescriptor.storageOffset)
+ .assumingMemoryBound(
+ to: Float16.self), bindLifetimeOf: self
+ )
+ }
  } else {
  tensor = Tensor<Float>(
  .CPU, format: .NCHW, shape: TensorShape(tensorDescriptor.shape),
@@ -94,7 +110,7 @@ public final class SafeTensors {
  }
 }
 
-let filename = "/home/liu/workspace/swift-diffusion/lucyCyberpunk_35Epochs.safetensors"
+let filename = "/home/liu/workspace/swift-diffusion/openjourneyLora_v1.safetensors"
 /*
 let archive = Archive(url: URL(fileURLWithPath: filename), accessMode: .read)!
 let entry = archive["archive/data.pkl"]!
@@ -174,6 +190,7 @@ for key in keys {
  keysSet.remove(key)
  }
 }
+print(keysSet)
 var unetMapCount = [String: Int]()
 for i in stride(from: 0, to: unetMap.count, by: 2) {
  unetMapCount[unetMap[i]] = unetMapCount[unetMap[i], default: 0] + 1
 
@@ -160,16 +160,16 @@ graph.withNoGrad {
  let positionTensorGPU = positionTensor.toGPU(0)
  let casualAttentionMaskGPU = casualAttentionMask.toGPU(0)
  textModel.compile(inputs: tokensTensorGPU, positionTensorGPU, casualAttentionMaskGPU)
- graph.openStore(workDir + "/lora.ckpt") { lora in
+ graph.openStore(workDir + "/moxin_v1.0_lora_f16.ckpt") { lora in
  let keys = Set(lora.keys)
- graph.openStore(workDir + "/sd-v1.4.ckpt") { store in
+ graph.openStore(workDir + "/sd-v1.5.ckpt") { store in
  store.read("text_model", model: textModel) { name, _, _, _ in
  if keys.contains(name + "__up__") {
  let original = graph.variable(Tensor<UseFloatingPoint>(from: store.read(name)!)).toGPU(0)
  let up = graph.variable(Tensor<UseFloatingPoint>(lora.read(name + "__up__")!)).toGPU(0)
  let down = graph.variable(Tensor<UseFloatingPoint>(lora.read(name + "__down__")!)).toGPU(0)
- let final = original + 0.6 * (up * down)
- return .final(final.rawValue)
+ let final = original + 0.8 * (up * down)
+ return .final(final.rawValue.toCPU())
  }
  return .continue(name)
  }
@@ -186,9 +186,9 @@ graph.withNoGrad {
  let ts = timeEmbedding(timestep: 0, batchSize: 2, embeddingSize: 320, maxPeriod: 10_000).toGPU(0)
  unet.compile(inputs: xIn, graph.variable(Tensor<UseFloatingPoint>(from: ts)), c)
  decoder.compile(inputs: x)
- graph.openStore(workDir + "/lora.ckpt") { lora in
+ graph.openStore(workDir + "/moxin_v1.0_lora_f16.ckpt") { lora in
  let keys = Set(lora.keys)
- graph.openStore(workDir + "/sd-v1.4.ckpt") { store in
+ graph.openStore(workDir + "/sd-v1.5.ckpt") { store in
  store.read("unet", model: unet) { name, _, _, _ in
  if keys.contains(name + "__up__") {
  let original = graph.variable(Tensor<UseFloatingPoint>(from: store.read(name)!)).toGPU(0)
@@ -200,11 +200,11 @@ graph.withNoGrad {
  up = graph.variable(loraUp.reshaped(.NC(loraUp.shape[0], loraUp.shape[1] * loraUp.shape[2] * loraUp.shape[3]))).toGPU(0)
  let loraDown = Tensor<UseFloatingPoint>(lora.read(name + "__down__")!)
  down = graph.variable(loraDown.reshaped(.NC(loraDown.shape[0], loraDown.shape[1] * loraDown.shape[2] * loraDown.shape[3]))).toGPU(0)
- result = original + 0.6 * (up * down).reshaped(format: .NCHW, shape: original.shape)
+ result = original + 0.8 * (up * down).reshaped(format: .NCHW, shape: original.shape)
  } else {
  up = graph.variable(Tensor<UseFloatingPoint>(lora.read(name + "__up__")!)).toGPU(0)
  down = graph.variable(Tensor<UseFloatingPoint>(lora.read(name + "__down__")!)).toGPU(0)
- result = original + 0.6 * (up * down)
+ result = original + 0.8 * (up * down)
  }
  return .final(result.rawValue)
  }