@@ -6,7 +6,7 @@ use libc::{c_char, c_uint};
66use rustc_abi as abi;
77use rustc_abi:: { AddressSpace , Align , HasDataLayout , Size , TargetDataLayout , WrappingRange } ;
88use rustc_codegen_ssa:: MemFlags ;
9- use rustc_codegen_ssa:: common:: { IntPredicate , RealPredicate , TypeKind , AtomicRmwBinOp } ;
9+ use rustc_codegen_ssa:: common:: { AtomicRmwBinOp , IntPredicate , RealPredicate , TypeKind } ;
1010use rustc_codegen_ssa:: mir:: operand:: { OperandRef , OperandValue } ;
1111use rustc_codegen_ssa:: mir:: place:: PlaceRef ;
1212use rustc_codegen_ssa:: traits:: * ;
@@ -1114,46 +1114,54 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
11141114 weak : bool ,
11151115 ) -> ( & ' ll Value , & ' ll Value ) {
11161116 // LLVM verifier rejects cases where the `failure_order` is stronger than `order`
1117- match ( order, failure_order) {
1118- ( AtomicOrdering :: SeqCst , _) =>( ) ,
1119- ( _, AtomicOrdering :: Relaxed ) =>( ) ,
1120- ( AtomicOrdering :: Release , AtomicOrdering :: Release ) | ( AtomicOrdering :: Release , AtomicOrdering :: Acquire ) | ( AtomicOrdering :: Acquire , AtomicOrdering :: Acquire ) =>( ) ,
1121- ( AtomicOrdering :: AcqRel , AtomicOrdering :: Acquire ) => ( ) ,
1122- ( AtomicOrdering :: Relaxed , _) | ( _, AtomicOrdering :: Release | AtomicOrdering :: AcqRel | AtomicOrdering :: SeqCst ) =>{
1117+ match ( order, failure_order) {
1118+ ( AtomicOrdering :: SeqCst , _) => ( ) ,
1119+ ( _, AtomicOrdering :: Relaxed ) => ( ) ,
1120+ ( AtomicOrdering :: Release , AtomicOrdering :: Release )
1121+ | ( AtomicOrdering :: Release , AtomicOrdering :: Acquire )
1122+ | ( AtomicOrdering :: Acquire , AtomicOrdering :: Acquire ) => ( ) ,
1123+ ( AtomicOrdering :: AcqRel , AtomicOrdering :: Acquire ) => ( ) ,
1124+ ( AtomicOrdering :: Relaxed , _)
1125+ | ( _, AtomicOrdering :: Release | AtomicOrdering :: AcqRel | AtomicOrdering :: SeqCst ) => {
11231126 // Invalid cmpxchg - `failure_order` is stronger than `order`! So, we abort.
11241127 self . abort ( ) ;
1125- return ( self . const_undef ( self . val_ty ( cmp) ) , self . const_undef ( self . type_i1 ( ) ) ) ;
1128+ return (
1129+ self . const_undef ( self . val_ty ( cmp) ) ,
1130+ self . const_undef ( self . type_i1 ( ) ) ,
1131+ ) ;
11261132 }
11271133 } ;
11281134 let res = self . atomic_op (
11291135 dst,
1130- |builder, dst| {
1136+ |builder, dst| {
11311137 // We are in a supported address space - just use ordinary atomics
11321138 unsafe {
11331139 llvm:: LLVMRustBuildAtomicCmpXchg (
11341140 builder. llbuilder ,
11351141 dst,
11361142 cmp,
11371143 src,
1138- crate :: llvm:: AtomicOrdering :: from_generic ( order) ,
1144+ crate :: llvm:: AtomicOrdering :: from_generic ( order) ,
11391145 crate :: llvm:: AtomicOrdering :: from_generic ( failure_order) ,
11401146 weak as u32 ,
11411147 )
11421148 }
11431149 } ,
1144- |builder, dst| {
1150+ |builder, dst| {
11451151 // Local space is only accessible to the current thread.
1146- // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store.
1147- let load: & ' ll Value = unsafe { llvm:: LLVMBuildLoad ( builder. llbuilder , dst, UNNAMED ) } ;
1152+ // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store.
1153+ let load: & ' ll Value =
1154+ unsafe { llvm:: LLVMBuildLoad ( builder. llbuilder , dst, UNNAMED ) } ;
11481155 let compare = builder. icmp ( IntPredicate :: IntEQ , load, cmp) ;
11491156 // We can do something smart & branchless here:
1150- // We select either the current value(if the comparison fails), or a new value.
1157+ // We select either the current value(if the comparison fails), or a new value.
11511158 // We then *undconditionally* write that back to local memory(which is very, very cheap).
11521159 // TODO: measure if this has a positive impact, or if we should just use more blocks, and conditional writes.
11531160 let value = builder. select ( compare, src, load) ;
1154- unsafe { llvm:: LLVMBuildStore ( builder. llbuilder , value, dst) } ;
1155- let res_type = builder. type_struct ( & [ builder. val_ty ( cmp) , builder. type_ix ( 1 ) ] , false ) ;
1156- // We pack the result, to match the behaviour of proper atomics / emulated thread-local atomics.
1161+ unsafe { llvm:: LLVMBuildStore ( builder. llbuilder , value, dst) } ;
1162+ let res_type =
1163+ builder. type_struct ( & [ builder. val_ty ( cmp) , builder. type_ix ( 1 ) ] , false ) ;
1164+ // We pack the result, to match the behaviour of proper atomics / emulated thread-local atomics.
11571165 let res = builder. const_undef ( res_type) ;
11581166 let res = builder. insert_value ( res, load, 0 ) ;
11591167 let res = builder. insert_value ( res, compare, 1 ) ;
@@ -1172,57 +1180,58 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
11721180 src : & ' ll Value ,
11731181 order : AtomicOrdering ,
11741182 ) -> & ' ll Value {
1175- if matches ! ( op, AtomicRmwBinOp :: AtomicNand ) {
1183+ if matches ! ( op, AtomicRmwBinOp :: AtomicNand ) {
11761184 self . fatal ( "Atomic NAND not supported yet!" )
11771185 }
11781186 self . atomic_op (
11791187 dst,
1180- |builder, dst| {
1188+ |builder, dst| {
11811189 // We are in a supported address space - just use ordinary atomics
11821190 unsafe {
11831191 llvm:: LLVMBuildAtomicRMW (
11841192 builder. llbuilder ,
11851193 op,
11861194 dst,
11871195 src,
1188- crate :: llvm:: AtomicOrdering :: from_generic ( order) ,
1196+ crate :: llvm:: AtomicOrdering :: from_generic ( order) ,
11891197 0 ,
11901198 )
11911199 }
11921200 } ,
1193- |builder, dst| {
1201+ |builder, dst| {
11941202 // Local space is only accessible to the current thread.
1195- // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store.
1196- let load: & ' ll Value = unsafe { llvm:: LLVMBuildLoad ( builder. llbuilder , dst, UNNAMED ) } ;
1197- let next_val = match op{
1203+ // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store.
1204+ let load: & ' ll Value =
1205+ unsafe { llvm:: LLVMBuildLoad ( builder. llbuilder , dst, UNNAMED ) } ;
1206+ let next_val = match op {
11981207 AtomicRmwBinOp :: AtomicXchg => src,
11991208 AtomicRmwBinOp :: AtomicAdd => builder. add ( load, src) ,
12001209 AtomicRmwBinOp :: AtomicSub => builder. sub ( load, src) ,
12011210 AtomicRmwBinOp :: AtomicAnd => builder. and ( load, src) ,
12021211 AtomicRmwBinOp :: AtomicNand => {
12031212 let and = builder. and ( load, src) ;
12041213 builder. not ( and)
1205- } ,
1214+ }
12061215 AtomicRmwBinOp :: AtomicOr => builder. or ( load, src) ,
12071216 AtomicRmwBinOp :: AtomicXor => builder. xor ( load, src) ,
12081217 AtomicRmwBinOp :: AtomicMax => {
12091218 let is_src_bigger = builder. icmp ( IntPredicate :: IntSGT , src, load) ;
1210- builder. select ( is_src_bigger, src, load)
1219+ builder. select ( is_src_bigger, src, load)
12111220 }
12121221 AtomicRmwBinOp :: AtomicMin => {
12131222 let is_src_smaller = builder. icmp ( IntPredicate :: IntSLT , src, load) ;
1214- builder. select ( is_src_smaller, src, load)
1223+ builder. select ( is_src_smaller, src, load)
12151224 }
1216- AtomicRmwBinOp :: AtomicUMax => {
1225+ AtomicRmwBinOp :: AtomicUMax => {
12171226 let is_src_bigger = builder. icmp ( IntPredicate :: IntUGT , src, load) ;
1218- builder. select ( is_src_bigger, src, load)
1219- } ,
1227+ builder. select ( is_src_bigger, src, load)
1228+ }
12201229 AtomicRmwBinOp :: AtomicUMin => {
12211230 let is_src_smaller = builder. icmp ( IntPredicate :: IntULT , src, load) ;
1222- builder. select ( is_src_smaller, src, load)
1231+ builder. select ( is_src_smaller, src, load)
12231232 }
12241233 } ;
1225- unsafe { llvm:: LLVMBuildStore ( builder. llbuilder , next_val, dst) } ;
1234+ unsafe { llvm:: LLVMBuildStore ( builder. llbuilder , next_val, dst) } ;
12261235 load
12271236 } ,
12281237 )
@@ -1687,8 +1696,8 @@ impl<'ll, 'tcx, 'a> Builder<'a, 'll, 'tcx> {
16871696 fn atomic_op (
16881697 & mut self ,
16891698 dst : & ' ll Value ,
1690- atomic_supported : impl FnOnce ( & mut Builder < ' a , ' ll , ' tcx > , & ' ll Value ) -> & ' ll Value ,
1691- emulate_local : impl FnOnce ( & mut Builder < ' a , ' ll , ' tcx > , & ' ll Value ) -> & ' ll Value ,
1699+ atomic_supported : impl FnOnce ( & mut Builder < ' a , ' ll , ' tcx > , & ' ll Value ) -> & ' ll Value ,
1700+ emulate_local : impl FnOnce ( & mut Builder < ' a , ' ll , ' tcx > , & ' ll Value ) -> & ' ll Value ,
16921701 ) -> & ' ll Value {
16931702 // (FractalFir) Atomics in CUDA have some limitations, and we have to work around them.
16941703 // For example, they are restricted in what address space they operate on.
0 commit comments