@@ -152,14 +152,14 @@ bool PhaseCFG::is_CFG(Node* n) {
152152 return n->is_block_proj () || n->is_block_start () || is_control_proj_or_safepoint (n);
153153}
154154
155- bool PhaseCFG::is_control_proj_or_safepoint (Node* n) {
155+ bool PhaseCFG::is_control_proj_or_safepoint (Node* n) const {
156156 bool result = (n->is_Mach () && n->as_Mach ()->ideal_Opcode () == Op_SafePoint) || (n->is_Proj () && n->as_Proj ()->bottom_type () == Type::CONTROL);
157157 assert (!result || (n->is_Mach () && n->as_Mach ()->ideal_Opcode () == Op_SafePoint)
158158 || (n->is_Proj () && n->as_Proj ()->_con == 0 ), " If control projection, it must be projection 0" );
159159 return result;
160160}
161161
162- Block* PhaseCFG::find_block_for_node (Node* n) {
162+ Block* PhaseCFG::find_block_for_node (Node* n) const {
163163 if (n->is_block_start () || n->is_block_proj ()) {
164164 return get_block_for_node (n);
165165 } else {
@@ -1274,6 +1274,46 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) {
12741274 default :
12751275 break ;
12761276 }
1277+ if (C->has_irreducible_loop () && self->bottom_type ()->has_memory ()) {
1278+ // If the CFG is irreducible, keep memory-writing nodes as close as
1279+ // possible to their original block (given by the control input). This
1280+ // prevents PhaseCFG::hoist_to_cheaper_block() from placing such nodes
1281+ // into descendants of their original loop, as in the following example:
1282+ //
1283+ // Original placement of store in B1 (loop L1):
1284+ //
1285+ // B1 (L1):
1286+ // m1 <- ..
1287+ // m2 <- store m1, ..
1288+ // B2 (L2):
1289+ // jump B2
1290+ // B3 (L1):
1291+ // .. <- .. m2, ..
1292+ //
1293+ // Wrong "hoisting" of store to B2 (in loop L2, child of L1):
1294+ //
1295+ // B1 (L1):
1296+ // m1 <- ..
1297+ // B2 (L2):
1298+ // m2 <- store m1, ..
1299+ // # Wrong: m1 and m2 interfere at this point.
1300+ // jump B2
1301+ // B3 (L1):
1302+ // .. <- .. m2, ..
1303+ //
1304+ // This "hoist inversion" can happen due to CFGLoop::compute_freq()'s
1305+ // inaccurate estimation of frequencies for irreducible CFGs, which can
1306+ // lead to for example assigning B1 and B3 a higher frequency than B2.
1307+ #ifndef PRODUCT
1308+ if (trace_opto_pipelining ()) {
1309+ tty->print_cr (" # Irreducible loops: schedule in earliest block B%d:" ,
1310+ early->_pre_order );
1311+ self->dump ();
1312+ }
1313+ #endif
1314+ schedule_node_into_block (self, early);
1315+ continue ;
1316+ }
12771317 }
12781318
12791319 // Gather LCA of all uses
0 commit comments