60
60
import org .elasticsearch .xpack .esql .action .EsqlQueryAction ;
61
61
import org .elasticsearch .xpack .esql .action .EsqlSearchShardsAction ;
62
62
import org .elasticsearch .xpack .esql .core .expression .Attribute ;
63
- import org .elasticsearch .xpack .esql .core .util .Holder ;
64
63
import org .elasticsearch .xpack .esql .enrich .EnrichLookupService ;
65
64
import org .elasticsearch .xpack .esql .enrich .LookupFromIndexService ;
66
65
import org .elasticsearch .xpack .esql .plan .physical .ExchangeSinkExec ;
67
66
import org .elasticsearch .xpack .esql .plan .physical .ExchangeSourceExec ;
68
- import org .elasticsearch .xpack .esql .plan .physical .FragmentExec ;
69
67
import org .elasticsearch .xpack .esql .plan .physical .OutputExec ;
70
68
import org .elasticsearch .xpack .esql .plan .physical .PhysicalPlan ;
71
69
import org .elasticsearch .xpack .esql .planner .EsPhysicalOperationProviders ;
@@ -780,35 +778,24 @@ private void runComputeOnDataNode(
780
778
}
781
779
}
782
780
781
+ private static PhysicalPlan reductionPlan (ExchangeSinkExec plan , boolean enable ) {
782
+ PhysicalPlan reducePlan = new ExchangeSourceExec (plan .source (), plan .output (), plan .isIntermediateAgg ());
783
+ if (enable ) {
784
+ PhysicalPlan p = PlannerUtils .reductionPlan (plan );
785
+ if (p != null ) {
786
+ reducePlan = p .replaceChildren (List .of (reducePlan ));
787
+ }
788
+ }
789
+ return new ExchangeSinkExec (plan .source (), plan .output (), plan .isIntermediateAgg (), reducePlan );
790
+ }
791
+
783
792
private class DataNodeRequestHandler implements TransportRequestHandler <DataNodeRequest > {
784
793
@ Override
785
794
public void messageReceived (DataNodeRequest request , TransportChannel channel , Task task ) {
786
795
final ActionListener <ComputeResponse > listener = new ChannelActionListener <>(channel );
787
- final ExchangeSinkExec reducePlan ;
796
+ final PhysicalPlan reductionPlan ;
788
797
if (request .plan () instanceof ExchangeSinkExec plan ) {
789
- var fragments = plan .collectFirstChildren (FragmentExec .class ::isInstance );
790
- if (fragments .isEmpty ()) {
791
- listener .onFailure (new IllegalStateException ("expected a fragment plan for a remote compute; got " + request .plan ()));
792
- return ;
793
- }
794
- var localExchangeSource = new ExchangeSourceExec (plan .source (), plan .output (), plan .isIntermediateAgg ());
795
- Holder <PhysicalPlan > reducePlanHolder = new Holder <>();
796
- if (request .pragmas ().nodeLevelReduction ()) {
797
- PhysicalPlan dataNodePlan = request .plan ();
798
- request .plan ()
799
- .forEachUp (
800
- FragmentExec .class ,
801
- f -> { reducePlanHolder .set (PlannerUtils .dataNodeReductionPlan (f .fragment (), dataNodePlan )); }
802
- );
803
- }
804
- reducePlan = new ExchangeSinkExec (
805
- plan .source (),
806
- plan .output (),
807
- plan .isIntermediateAgg (),
808
- reducePlanHolder .get () != null
809
- ? reducePlanHolder .get ().replaceChildren (List .of (localExchangeSource ))
810
- : localExchangeSource
811
- );
798
+ reductionPlan = reductionPlan (plan , request .pragmas ().nodeLevelReduction ());
812
799
} else {
813
800
listener .onFailure (new IllegalStateException ("expected exchange sink for a remote compute; got " + request .plan ()));
814
801
return ;
@@ -825,7 +812,7 @@ public void messageReceived(DataNodeRequest request, TransportChannel channel, T
825
812
request .indicesOptions ()
826
813
);
827
814
try (var computeListener = ComputeListener .create (transportService , (CancellableTask ) task , listener )) {
828
- runComputeOnDataNode ((CancellableTask ) task , sessionId , reducePlan , request , computeListener );
815
+ runComputeOnDataNode ((CancellableTask ) task , sessionId , reductionPlan , request , computeListener );
829
816
}
830
817
}
831
818
}
@@ -871,10 +858,10 @@ public void messageReceived(ClusterComputeRequest request, TransportChannel chan
871
858
* Performs a compute on a remote cluster. The output pages are placed in an exchange sink specified by
872
859
* {@code globalSessionId}. The coordinator on the main cluster will poll pages from there.
873
860
* <p>
874
- * Currently, the coordinator on the remote cluster simply collects pages from data nodes in the remote cluster
875
- * and places them in the exchange sink. We can achieve this by using a single exchange buffer to minimize overhead.
876
- * However, here we use two exchange buffers so that we can run an actual plan on this coordinator to perform partial
877
- * reduce operations, such as limit, topN, and partial-to-partial aggregation in the future .
861
+ * Currently, the coordinator on the remote cluster polls pages from data nodes within the remote cluster
862
+ * and performs cluster-level reduction before sending pages to the querying cluster. This reduction aims
863
+ * to minimize data transfers across clusters but may require additional CPU resources for operations like
864
+ * aggregations .
878
865
*/
879
866
void runComputeOnRemoteCluster (
880
867
String clusterAlias ,
@@ -892,19 +879,14 @@ void runComputeOnRemoteCluster(
892
879
() -> exchangeService .finishSinkHandler (globalSessionId , new TaskCancelledException (parentTask .getReasonCancelled ()))
893
880
);
894
881
final String localSessionId = clusterAlias + ":" + globalSessionId ;
882
+ final PhysicalPlan coordinatorPlan = reductionPlan (plan , true );
895
883
var exchangeSource = new ExchangeSourceHandler (
896
884
configuration .pragmas ().exchangeBufferSize (),
897
885
transportService .getThreadPool ().executor (ThreadPool .Names .SEARCH ),
898
886
computeListener .acquireAvoid ()
899
887
);
900
888
try (Releasable ignored = exchangeSource .addEmptySink ()) {
901
889
exchangeSink .addCompletionListener (computeListener .acquireAvoid ());
902
- PhysicalPlan coordinatorPlan = new ExchangeSinkExec (
903
- plan .source (),
904
- plan .output (),
905
- plan .isIntermediateAgg (),
906
- new ExchangeSourceExec (plan .source (), plan .output (), plan .isIntermediateAgg ())
907
- );
908
890
runCompute (
909
891
parentTask ,
910
892
new ComputeContext (localSessionId , clusterAlias , List .of (), configuration , exchangeSource , exchangeSink ),
0 commit comments