@@ -12,6 +12,7 @@ import (
1212"github.com/vishvananda/netlink"
1313"k8s.io/apimachinery/pkg/api/errors"
1414"k8s.io/apimachinery/pkg/types"
15+ "k8s.io/apimachinery/pkg/util/sets"
1516"k8s.io/utils/ptr"
1617ctrl "sigs.k8s.io/controller-runtime"
1718"sigs.k8s.io/controller-runtime/pkg/client"
@@ -31,6 +32,8 @@ const (
3132defaultGenevePort = 6081
3233defaultGeneveVNI = 100
3334defaultGeneveMTU = 1400
35+
36+ rtProtocol = 0x61
3437)
3538
3639var _ reconcile.Reconciler = & TunnelNodeReconciler {}
@@ -138,7 +141,7 @@ func (r *TunnelNodeReconciler) Reconcile(ctx context.Context, request reconcile.
138141log .Error (err , "Failed to parse address" , "address" , rs .Address )
139142return ctrl.Result {}, err
140143}
141- ula , err := tunnet .ULAFromAddr (ctx , addr )
144+ ula , err := tunnet .ULAFromPrefix (ctx , addr )
142145if err != nil {
143146log .Error (err , "Failed to parse address" , "address" , rs .Address )
144147return ctrl.Result {}, err
@@ -153,12 +156,12 @@ func (r *TunnelNodeReconciler) Reconcile(ctx context.Context, request reconcile.
153156return ctrl.Result {}, err
154157}
155158
156- currentRoutes , err := r .routeList (ctx )
159+ curRoutes , err := r .routeList (ctx )
157160if err != nil {
158161log .Error (err , "Failed to get current routes" )
159162return ctrl.Result {}, err
160163}
161- desiredRoutes := make ( map [netip.Prefix ]netip. Addr )
164+ updRoutes := sets . New [netip.Prefix ]( )
162165
163166for _ , agent := range tunnelNode .Status .Agents {
164167if agent .PrivateAddress == "" || agent .AgentAddress == "" {
@@ -173,46 +176,46 @@ func (r *TunnelNodeReconciler) Reconcile(ctx context.Context, request reconcile.
173176continue
174177}
175178
176- remoteULA , err := netip .ParsePrefix (agent .AgentAddress )
179+ agentAddr , err := netip .ParsePrefix (agent .AgentAddress )
177180if err != nil {
178181log .Error (err , "Failed to parse agent address" ,
179182"agent" , agent .Name , "agentAddress" , agent .AgentAddress )
180183continue
181184}
182- if ! remoteULA .Addr ().Is6 () || ! remoteULA .Addr ().IsGlobalUnicast () {
185+ if ! agentAddr .Addr ().Is6 () || ! agentAddr .Addr ().IsGlobalUnicast () {
183186log .Error (goerrors .New ("overlay address must be global unicase IPv6" ),
184187"Invalid overlay address" ,
185188"agent" , agent .Name , "agentAddress" , agent .AgentAddress )
186189continue
187190}
188- if remoteULA .Bits () != 96 {
191+ if agentAddr .Bits () != 96 {
189192log .Error (goerrors .New ("overlay address must be /96" ),
190193"Invalid overlay address" ,
191194"agent" , agent .Name , "agentAddress" , agent .AgentAddress )
192195continue
193196}
197+ agentULA , err := tunnet .ULAFromPrefix (ctx , agentAddr )
198+ if err != nil {
199+ log .Error (err , "Failed to generate ULA" ,
200+ "agent" , agent .Name , "agentAddress" , agent .AgentAddress )
201+ continue
202+ }
194203
195- desiredRoutes [remoteULA ] = nve
196- if err := r .routeAdd (ctx , remoteULA , nve ); err != nil {
204+ if err := r .routeAdd (ctx , agentULA , nve ); err != nil {
197205log .Error (err , "Failed to ensure route" ,
198- "agent" , agent .Name , "overlay " , remoteULA .String (), "VNE " , nve )
206+ "agent" , agent .Name , "ula " , agentAddr .String (), "vne " , nve )
199207continue
200208}
201209
202- log .Info ("Successfully configured Geneve tunnel route for agent" ,
203- "agent" , agent .Name ,
204- "ULA" , remoteULA ,
205- "VNE" , nve )
210+ updRoutes .Insert (agentAddr )
206211}
207212
208- for route , nexthop := range currentRoutes {
209- if _ , exists := desiredRoutes [route ]; ! exists {
210- if err := r .deleteRoute (ctx , route , nexthop ); err != nil {
211- log .Error (err , "Failed to delete stale route" , "route" , route , "nexthop" , nexthop .String ())
212- } else {
213- log .Info ("Deleted stale route" , "route" , route , "nexthop" , nexthop .String ())
214- }
213+ for _ , dst := range curRoutes .Difference (updRoutes ).UnsortedList () {
214+ if err := r .deleteRoute (ctx , dst ); err != nil {
215+ log .Error (err , "Failed to delete stale route" , "dst" , dst )
216+ continue
215217}
218+ log .Info ("Deleted stale route" , "dst" , dst )
216219}
217220
218221return ctrl.Result {}, nil
@@ -315,7 +318,9 @@ func (r *TunnelNodeReconciler) cleanupGeneve(ctx context.Context) error {
315318}
316319
317320// routeAdd adds a route to the overlay addr via NVE.
318- func (r * TunnelNodeReconciler ) routeAdd (ctx context.Context , ula netip.Prefix , nve netip.Addr ) error {
321+ func (r * TunnelNodeReconciler ) routeAdd (ctx context.Context , ula * tunnet.NetULA , nve netip.Addr ) error {
322+ log := clog .FromContext (ctx )
323+
319324link , err := netlink .LinkByName (r .gnvDev )
320325if err != nil {
321326return fmt .Errorf ("failed to get Geneve interface: %w" , err )
@@ -325,26 +330,24 @@ func (r *TunnelNodeReconciler) routeAdd(ctx context.Context, ula netip.Prefix, n
325330// "to reach this overlay IP, encapsulate and send to this VTEP"
326331// This is equivalent to the following iproute2 command:
327332// ip route add <overlayIP>/96 encap ip id <gnvVNI> dst <nve> dev <gnvDev>
333+ ulaAddr := ula .FullPrefix ().Addr ()
328334af , mask := netlink .FAMILY_V6 , 128
329- if ula . Addr () .Is4 () {
335+ if ulaAddr .Is4 () {
330336af , mask = netlink .FAMILY_V4 , 32
331337}
332- _ = af
333338route := & netlink.Route {
334339LinkIndex : link .Attrs ().Index ,
340+ Family : af ,
335341Dst : & net.IPNet {
336- IP : ula . Addr () .AsSlice (),
337- Mask : net .CIDRMask (ula .Bits (), mask ),
342+ IP : ulaAddr .AsSlice (),
343+ Mask : net .CIDRMask (ula .FullPrefix (). Bits (), mask ),
338344},
339- //Via: &netlink.Via{
340- // AddrFamily: af,
341- // Addr: ula.Addr().AsSlice(),
342- //},
343345Encap : & IPEncap {
344346ID : r .gnvVNI ,
345347Remote : nve .AsSlice (),
346348},
347- Scope : netlink .SCOPE_UNIVERSE ,
349+ Scope : netlink .SCOPE_UNIVERSE ,
350+ Protocol : rtProtocol ,
348351}
349352
350353if err := netlink .RouteAdd (route ); err != nil {
@@ -357,10 +360,36 @@ func (r *TunnelNodeReconciler) routeAdd(ctx context.Context, ula netip.Prefix, n
357360}
358361}
359362
363+ log .Info ("Configured route" , "af" , af , "dst" , ula , "encap_id" ,
364+ r .gnvVNI , "encap_remote" , nve .String ())
365+
366+ hwAddr := r .hwAddr (ula )
367+ if err := netlink .NeighSet (& netlink.Neigh {
368+ LinkIndex : link .Attrs ().Index ,
369+ State : netlink .NUD_PERMANENT ,
370+ IP : ulaAddr .AsSlice (),
371+ HardwareAddr : hwAddr ,
372+ }); err != nil {
373+ return fmt .Errorf ("failed to add neighbor entry: %w" , err )
374+ }
375+
376+ log .Info ("Neighbor entry set" , "remote" , ulaAddr , "hwAddr" , hwAddr )
377+
378+ // Via is needed so that kernel can use the same dst hwaddr for the entire ula prefix.
379+ // Can't set via during route creation because the route to gw does not yet exist.
380+ route .Gw = ulaAddr .AsSlice ()
381+ if err := netlink .RouteChange (route ); err != nil {
382+ return fmt .Errorf ("failed to change route with gw %v: %w" , route .Gw , err )
383+ }
384+
385+ log .Info ("Configured route gw" ,
386+ "af" , af , "dst" , ula , "gw" , ulaAddr ,
387+ "encap_id" , r .gnvVNI , "encap_remote" , nve )
388+
360389return nil
361390}
362391
363- func (r * TunnelNodeReconciler ) deleteRoute (ctx context.Context , dst netip.Prefix , nve netip. Addr ) error {
392+ func (r * TunnelNodeReconciler ) deleteRoute (ctx context.Context , dst netip.Prefix ) error {
364393link , err := netlink .LinkByName (r .gnvDev )
365394if err != nil {
366395return fmt .Errorf ("failed to get Geneve interface: %w" , err )
@@ -376,11 +405,6 @@ func (r *TunnelNodeReconciler) deleteRoute(ctx context.Context, dst netip.Prefix
376405IP : dst .Addr ().AsSlice (),
377406Mask : net .CIDRMask (dst .Bits (), mask ),
378407},
379- //Encap: &GeneveEncap{
380- // ID: r.gnvVNI,
381- // Remote: nve.AsSlice(),
382- // Port: r.gnvPort,
383- //},
384408}
385409
386410if err := netlink .RouteDel (route ); err != nil {
@@ -391,29 +415,40 @@ func (r *TunnelNodeReconciler) deleteRoute(ctx context.Context, dst netip.Prefix
391415}
392416
393417// routeList returns the current routes for the Geneve interface.
394- func (r * TunnelNodeReconciler ) routeList (ctx context.Context ) (map [netip.Prefix ]netip. Addr , error ) {
418+ func (r * TunnelNodeReconciler ) routeList (ctx context.Context ) (sets. Set [netip.Prefix ], error ) {
395419log := log .FromContext (ctx )
396420
397421link , err := netlink .LinkByName (r .gnvDev )
398422if err != nil {
399- return nil , nil
423+ return nil , fmt . Errorf ( "failed to get Geneve link: %w" , err )
400424}
401425
402- routes , err := netlink .RouteList (link , 0 )
426+ routes , err := netlink .RouteListFiltered (
427+ netlink .FAMILY_ALL ,
428+ & netlink.Route {
429+ Protocol : rtProtocol ,
430+ },
431+ netlink .RT_FILTER_PROTOCOL ,
432+ )
403433if err != nil {
404434return nil , fmt .Errorf ("failed to list routes: %w" , err )
405435}
406436
407- out := make ( map [netip.Prefix ]netip. Addr )
437+ out := sets . New [netip.Prefix ]( )
408438for _ , route := range routes {
409- if route .Encap == nil || route .Dst == nil {
410- log .V (1 ).Info ("Skipping route with nil Encap or Dst" )
411- continue
412- }
413-
414- encap , ok := route .Encap .(* IPEncap )
415- if ! ok {
416- log .V (1 ).Info ("Skipping route with non-Geneve Encap" )
439+ // TODO(dilyevsky): netlink doesn't currently support deserialization of encap type ip.
440+ //if route.Encap == nil || route.Encap.Type() != nl.LWTUNNEL_ENCAP_IP {
441+ // log.Info("Skipping route with no/mismatching encap", "dst", route.Dst, "encap", route.Encap)
442+ // continue
443+ //}
444+
445+ //encap, ok := route.Encap.(*IPEncap)
446+ //if !ok {
447+ // log.Info("Skipping route with non-Geneve Encap", "dst", route.Dst)
448+ // continue
449+ //}
450+ if link .Attrs ().Index != route .LinkIndex {
451+ log .V (1 ).Info ("Skipping route with mismatching link index" , "dst" , route .Dst , "linkIndex" , link .Attrs ().Index , "routeLinkIndex" , route .LinkIndex )
417452continue
418453}
419454
@@ -431,13 +466,9 @@ func (r *TunnelNodeReconciler) routeList(ctx context.Context) (map[netip.Prefix]
431466)
432467}
433468
434- log .Info ("Route found" , "dst" , dst , "remote" )
469+ log .Info ("Route found" , "dst" , dst )
435470
436- out [dst ], ok = netip .AddrFromSlice (encap .Remote )
437- if ! ok {
438- log .Info ("Could not parse remote address" , "remote" , encap .Remote )
439- continue
440- }
471+ out .Insert (dst )
441472}
442473
443474return out , nil
0 commit comments