Skip to content

Commit 93bd1a9

Browse files
committed
[tunnel] Add neighbor discovery and fix bugs
1 parent 6d3a551 commit 93bd1a9

File tree

4 files changed

+637
-61
lines changed

4 files changed

+637
-61
lines changed

pkg/backplane/controllers/encap.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ func (e *IPEncap) Decode(buf []byte) error {
5050
for _, attr := range attrs {
5151
switch attr.Attr.Type {
5252
case LWTUNNEL_IP_ID:
53-
if len(attr.Value) < 4 {
53+
if len(attr.Value) < 8 {
5454
return fmt.Errorf("geneve: invalid VNI length")
5555
}
56-
e.ID = native.Uint32(attr.Value[0:4])
56+
e.ID = uint32(native.Uint64(attr.Value[0:8]))
5757
case LWTUNNEL_IP_DST:
5858
if len(attr.Value) == 4 {
5959
e.Remote = net.IP(attr.Value[0:4])
@@ -62,6 +62,11 @@ func (e *IPEncap) Decode(buf []byte) error {
6262
} else {
6363
return fmt.Errorf("geneve: invalid remote address length")
6464
}
65+
case LWTUNNEL_IP_TTL:
66+
if len(attr.Value) != 1 {
67+
return fmt.Errorf("geneve: invalid TTL length")
68+
}
69+
e.TTL = attr.Value[0]
6570
}
6671
}
6772

pkg/backplane/controllers/tunnelnode.go

Lines changed: 85 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/vishvananda/netlink"
1313
"k8s.io/apimachinery/pkg/api/errors"
1414
"k8s.io/apimachinery/pkg/types"
15+
"k8s.io/apimachinery/pkg/util/sets"
1516
"k8s.io/utils/ptr"
1617
ctrl "sigs.k8s.io/controller-runtime"
1718
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -31,6 +32,8 @@ const (
3132
defaultGenevePort = 6081
3233
defaultGeneveVNI = 100
3334
defaultGeneveMTU = 1400
35+
36+
rtProtocol = 0x61
3437
)
3538

3639
var _ reconcile.Reconciler = &TunnelNodeReconciler{}
@@ -138,7 +141,7 @@ func (r *TunnelNodeReconciler) Reconcile(ctx context.Context, request reconcile.
138141
log.Error(err, "Failed to parse address", "address", rs.Address)
139142
return ctrl.Result{}, err
140143
}
141-
ula, err := tunnet.ULAFromAddr(ctx, addr)
144+
ula, err := tunnet.ULAFromPrefix(ctx, addr)
142145
if err != nil {
143146
log.Error(err, "Failed to parse address", "address", rs.Address)
144147
return ctrl.Result{}, err
@@ -153,12 +156,12 @@ func (r *TunnelNodeReconciler) Reconcile(ctx context.Context, request reconcile.
153156
return ctrl.Result{}, err
154157
}
155158

156-
currentRoutes, err := r.routeList(ctx)
159+
curRoutes, err := r.routeList(ctx)
157160
if err != nil {
158161
log.Error(err, "Failed to get current routes")
159162
return ctrl.Result{}, err
160163
}
161-
desiredRoutes := make(map[netip.Prefix]netip.Addr)
164+
updRoutes := sets.New[netip.Prefix]()
162165

163166
for _, agent := range tunnelNode.Status.Agents {
164167
if agent.PrivateAddress == "" || agent.AgentAddress == "" {
@@ -173,46 +176,46 @@ func (r *TunnelNodeReconciler) Reconcile(ctx context.Context, request reconcile.
173176
continue
174177
}
175178

176-
remoteULA, err := netip.ParsePrefix(agent.AgentAddress)
179+
agentAddr, err := netip.ParsePrefix(agent.AgentAddress)
177180
if err != nil {
178181
log.Error(err, "Failed to parse agent address",
179182
"agent", agent.Name, "agentAddress", agent.AgentAddress)
180183
continue
181184
}
182-
if !remoteULA.Addr().Is6() || !remoteULA.Addr().IsGlobalUnicast() {
185+
if !agentAddr.Addr().Is6() || !agentAddr.Addr().IsGlobalUnicast() {
183186
log.Error(goerrors.New("overlay address must be global unicase IPv6"),
184187
"Invalid overlay address",
185188
"agent", agent.Name, "agentAddress", agent.AgentAddress)
186189
continue
187190
}
188-
if remoteULA.Bits() != 96 {
191+
if agentAddr.Bits() != 96 {
189192
log.Error(goerrors.New("overlay address must be /96"),
190193
"Invalid overlay address",
191194
"agent", agent.Name, "agentAddress", agent.AgentAddress)
192195
continue
193196
}
197+
agentULA, err := tunnet.ULAFromPrefix(ctx, agentAddr)
198+
if err != nil {
199+
log.Error(err, "Failed to generate ULA",
200+
"agent", agent.Name, "agentAddress", agent.AgentAddress)
201+
continue
202+
}
194203

195-
desiredRoutes[remoteULA] = nve
196-
if err := r.routeAdd(ctx, remoteULA, nve); err != nil {
204+
if err := r.routeAdd(ctx, agentULA, nve); err != nil {
197205
log.Error(err, "Failed to ensure route",
198-
"agent", agent.Name, "overlay", remoteULA.String(), "VNE", nve)
206+
"agent", agent.Name, "ula", agentAddr.String(), "vne", nve)
199207
continue
200208
}
201209

202-
log.Info("Successfully configured Geneve tunnel route for agent",
203-
"agent", agent.Name,
204-
"ULA", remoteULA,
205-
"VNE", nve)
210+
updRoutes.Insert(agentAddr)
206211
}
207212

208-
for route, nexthop := range currentRoutes {
209-
if _, exists := desiredRoutes[route]; !exists {
210-
if err := r.deleteRoute(ctx, route, nexthop); err != nil {
211-
log.Error(err, "Failed to delete stale route", "route", route, "nexthop", nexthop.String())
212-
} else {
213-
log.Info("Deleted stale route", "route", route, "nexthop", nexthop.String())
214-
}
213+
for _, dst := range curRoutes.Difference(updRoutes).UnsortedList() {
214+
if err := r.deleteRoute(ctx, dst); err != nil {
215+
log.Error(err, "Failed to delete stale route", "dst", dst)
216+
continue
215217
}
218+
log.Info("Deleted stale route", "dst", dst)
216219
}
217220

218221
return ctrl.Result{}, nil
@@ -315,7 +318,9 @@ func (r *TunnelNodeReconciler) cleanupGeneve(ctx context.Context) error {
315318
}
316319

317320
// routeAdd adds a route to the overlay addr via NVE.
318-
func (r *TunnelNodeReconciler) routeAdd(ctx context.Context, ula netip.Prefix, nve netip.Addr) error {
321+
func (r *TunnelNodeReconciler) routeAdd(ctx context.Context, ula *tunnet.NetULA, nve netip.Addr) error {
322+
log := clog.FromContext(ctx)
323+
319324
link, err := netlink.LinkByName(r.gnvDev)
320325
if err != nil {
321326
return fmt.Errorf("failed to get Geneve interface: %w", err)
@@ -325,26 +330,24 @@ func (r *TunnelNodeReconciler) routeAdd(ctx context.Context, ula netip.Prefix, n
325330
// "to reach this overlay IP, encapsulate and send to this VTEP"
326331
// This is equivalent to the following iproute2 command:
327332
// ip route add <overlayIP>/96 encap ip id <gnvVNI> dst <nve> dev <gnvDev>
333+
ulaAddr := ula.FullPrefix().Addr()
328334
af, mask := netlink.FAMILY_V6, 128
329-
if ula.Addr().Is4() {
335+
if ulaAddr.Is4() {
330336
af, mask = netlink.FAMILY_V4, 32
331337
}
332-
_ = af
333338
route := &netlink.Route{
334339
LinkIndex: link.Attrs().Index,
340+
Family: af,
335341
Dst: &net.IPNet{
336-
IP: ula.Addr().AsSlice(),
337-
Mask: net.CIDRMask(ula.Bits(), mask),
342+
IP: ulaAddr.AsSlice(),
343+
Mask: net.CIDRMask(ula.FullPrefix().Bits(), mask),
338344
},
339-
//Via: &netlink.Via{
340-
// AddrFamily: af,
341-
// Addr: ula.Addr().AsSlice(),
342-
//},
343345
Encap: &IPEncap{
344346
ID: r.gnvVNI,
345347
Remote: nve.AsSlice(),
346348
},
347-
Scope: netlink.SCOPE_UNIVERSE,
349+
Scope: netlink.SCOPE_UNIVERSE,
350+
Protocol: rtProtocol,
348351
}
349352

350353
if err := netlink.RouteAdd(route); err != nil {
@@ -357,10 +360,36 @@ func (r *TunnelNodeReconciler) routeAdd(ctx context.Context, ula netip.Prefix, n
357360
}
358361
}
359362

363+
log.Info("Configured route", "af", af, "dst", ula, "encap_id",
364+
r.gnvVNI, "encap_remote", nve.String())
365+
366+
hwAddr := r.hwAddr(ula)
367+
if err := netlink.NeighSet(&netlink.Neigh{
368+
LinkIndex: link.Attrs().Index,
369+
State: netlink.NUD_PERMANENT,
370+
IP: ulaAddr.AsSlice(),
371+
HardwareAddr: hwAddr,
372+
}); err != nil {
373+
return fmt.Errorf("failed to add neighbor entry: %w", err)
374+
}
375+
376+
log.Info("Neighbor entry set", "remote", ulaAddr, "hwAddr", hwAddr)
377+
378+
// Via is needed so that kernel can use the same dst hwaddr for the entire ula prefix.
379+
// Can't set via during route creation because the route to gw does not yet exist.
380+
route.Gw = ulaAddr.AsSlice()
381+
if err := netlink.RouteChange(route); err != nil {
382+
return fmt.Errorf("failed to change route with gw %v: %w", route.Gw, err)
383+
}
384+
385+
log.Info("Configured route gw",
386+
"af", af, "dst", ula, "gw", ulaAddr,
387+
"encap_id", r.gnvVNI, "encap_remote", nve)
388+
360389
return nil
361390
}
362391

363-
func (r *TunnelNodeReconciler) deleteRoute(ctx context.Context, dst netip.Prefix, nve netip.Addr) error {
392+
func (r *TunnelNodeReconciler) deleteRoute(ctx context.Context, dst netip.Prefix) error {
364393
link, err := netlink.LinkByName(r.gnvDev)
365394
if err != nil {
366395
return fmt.Errorf("failed to get Geneve interface: %w", err)
@@ -376,11 +405,6 @@ func (r *TunnelNodeReconciler) deleteRoute(ctx context.Context, dst netip.Prefix
376405
IP: dst.Addr().AsSlice(),
377406
Mask: net.CIDRMask(dst.Bits(), mask),
378407
},
379-
//Encap: &GeneveEncap{
380-
// ID: r.gnvVNI,
381-
// Remote: nve.AsSlice(),
382-
// Port: r.gnvPort,
383-
//},
384408
}
385409

386410
if err := netlink.RouteDel(route); err != nil {
@@ -391,29 +415,40 @@ func (r *TunnelNodeReconciler) deleteRoute(ctx context.Context, dst netip.Prefix
391415
}
392416

393417
// routeList returns the current routes for the Geneve interface.
394-
func (r *TunnelNodeReconciler) routeList(ctx context.Context) (map[netip.Prefix]netip.Addr, error) {
418+
func (r *TunnelNodeReconciler) routeList(ctx context.Context) (sets.Set[netip.Prefix], error) {
395419
log := log.FromContext(ctx)
396420

397421
link, err := netlink.LinkByName(r.gnvDev)
398422
if err != nil {
399-
return nil, nil
423+
return nil, fmt.Errorf("failed to get Geneve link: %w", err)
400424
}
401425

402-
routes, err := netlink.RouteList(link, 0)
426+
routes, err := netlink.RouteListFiltered(
427+
netlink.FAMILY_ALL,
428+
&netlink.Route{
429+
Protocol: rtProtocol,
430+
},
431+
netlink.RT_FILTER_PROTOCOL,
432+
)
403433
if err != nil {
404434
return nil, fmt.Errorf("failed to list routes: %w", err)
405435
}
406436

407-
out := make(map[netip.Prefix]netip.Addr)
437+
out := sets.New[netip.Prefix]()
408438
for _, route := range routes {
409-
if route.Encap == nil || route.Dst == nil {
410-
log.V(1).Info("Skipping route with nil Encap or Dst")
411-
continue
412-
}
413-
414-
encap, ok := route.Encap.(*IPEncap)
415-
if !ok {
416-
log.V(1).Info("Skipping route with non-Geneve Encap")
439+
// TODO(dilyevsky): netlink doesn't currently support deserialization of encap type ip.
440+
//if route.Encap == nil || route.Encap.Type() != nl.LWTUNNEL_ENCAP_IP {
441+
// log.Info("Skipping route with no/mismatching encap", "dst", route.Dst, "encap", route.Encap)
442+
// continue
443+
//}
444+
445+
//encap, ok := route.Encap.(*IPEncap)
446+
//if !ok {
447+
// log.Info("Skipping route with non-Geneve Encap", "dst", route.Dst)
448+
// continue
449+
//}
450+
if link.Attrs().Index != route.LinkIndex {
451+
log.V(1).Info("Skipping route with mismatching link index", "dst", route.Dst, "linkIndex", link.Attrs().Index, "routeLinkIndex", route.LinkIndex)
417452
continue
418453
}
419454

@@ -431,13 +466,9 @@ func (r *TunnelNodeReconciler) routeList(ctx context.Context) (map[netip.Prefix]
431466
)
432467
}
433468

434-
log.Info("Route found", "dst", dst, "remote")
469+
log.Info("Route found", "dst", dst)
435470

436-
out[dst], ok = netip.AddrFromSlice(encap.Remote)
437-
if !ok {
438-
log.Info("Could not parse remote address", "remote", encap.Remote)
439-
continue
440-
}
471+
out.Insert(dst)
441472
}
442473

443474
return out, nil

0 commit comments

Comments
 (0)