Skip to content

Commit 4106639

Browse files
QxBytesCopilot
andauthored
fix: remove veth pair in vm ns if previously leaked and fix validation (#3940)
* proactively clean up leaked vnet and container veths in vm ns before creating new veth pair * fix veth move validation logic * address linter * fix naming and add fields for debugging * adjust log message * consolidate logic and improve uts * Update network/transparent_vlan_endpointclient_linux.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Alexander <39818795+QxBytes@users.noreply.github.com> --------- Signed-off-by: Alexander <39818795+QxBytes@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 78cda85 commit 4106639

File tree

3 files changed

+86
-56
lines changed

3 files changed

+86
-56
lines changed

network/networkutils/networkutils_linux.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,14 @@ func (nu NetworkUtils) DisableRAForInterface(ifName string) error {
256256
raFilePath := fmt.Sprintf(acceptRAV6File, ifName)
257257
exist, err := platform.CheckIfFileExists(raFilePath)
258258
if !exist {
259-
logger.Error("accept_ra file doesn't exist with", zap.Error(err))
259+
logger.Error("accept_ra file doesn't exist with", zap.Error(err), zap.String("ifName", ifName))
260260
return nil
261261
}
262262

263263
cmd := fmt.Sprintf(disableRACmd, ifName)
264264
out, err := nu.plClient.ExecuteRawCommand(cmd)
265265
if err != nil {
266-
logger.Error("Diabling ra failed with", zap.Error(err), zap.Any("out", out))
266+
logger.Error("Diabling ra failed with", zap.Error(err), zap.Any("out", out), zap.String("ifName", ifName))
267267
}
268268

269269
return err

network/transparent_vlan_endpointclient_linux.go

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,24 @@ func NewTransparentVlanEndpointClient(
110110
return client
111111
}
112112

113+
// cleanupInterfaceIfExists checks if an interface exists and deletes it if found
114+
// Returns an error if the deletion fails
115+
func (client *TransparentVlanEndpointClient) cleanupInterfaceIfExists(interfaceName string) error {
116+
_, ifExists := client.netioshim.GetNetworkInterfaceByName(interfaceName)
117+
if ifExists == nil {
118+
logger.Info("Interface exists, deleting", zap.String("interfaceName", interfaceName))
119+
if err := client.netlink.DeleteLink(interfaceName); err != nil {
120+
return errors.Wrapf(err, "failed to clean up %s", interfaceName)
121+
}
122+
}
123+
return nil
124+
}
125+
113126
// Adds interfaces to the vnet (created if not existing) and vm namespace
114127
func (client *TransparentVlanEndpointClient) AddEndpoints(epInfo *EndpointInfo) error {
115128
// VM Namespace
116129
if err := client.ensureCleanPopulateVM(); err != nil {
117-
return errors.Wrap(err, "failed to ensure both network namespace and vlan veth were present or both absent")
130+
return errors.Wrap(err, "failed to ensure both network namespace and vlan interface were present or both absent")
118131
}
119132
if err := client.PopulateVM(epInfo); err != nil {
120133
return err
@@ -131,7 +144,7 @@ func (client *TransparentVlanEndpointClient) AddEndpoints(epInfo *EndpointInfo)
131144
// Called from AddEndpoints, Namespace: VM and Vnet
132145
func (client *TransparentVlanEndpointClient) ensureCleanPopulateVM() error {
133146
// Clean up vlan interface in the VM namespace and ensure the network namespace (if it exists) has a vlan interface
134-
logger.Info("Checking if NS and vlan veth exists...")
147+
logger.Info("Checking if NS and vlan interface exists...")
135148
var existingErr error
136149
client.vnetNSFileDescriptor, existingErr = client.netnsClient.GetFromName(client.vnetNSName)
137150
if existingErr == nil {
@@ -146,18 +159,14 @@ func (client *TransparentVlanEndpointClient) ensureCleanPopulateVM() error {
146159
logger.Info("vlan interface doesn't exist even though network namespace exists, deleting network namespace...", zap.String("message", vlanIfErr.Error()))
147160
delErr := client.netnsClient.DeleteNamed(client.vnetNSName)
148161
if delErr != nil {
149-
return errors.Wrap(delErr, "failed to cleanup/delete ns after noticing vlan veth does not exist")
162+
return errors.Wrap(delErr, "failed to cleanup/delete ns after noticing vlan interface does not exist")
150163
}
151164
}
152165
}
153166
// Delete the vlan interface in the VM namespace if it exists
154-
_, vlanIfInVMErr := client.netioshim.GetNetworkInterfaceByName(client.vlanIfName)
155-
if vlanIfInVMErr == nil {
156-
// The vlan interface exists in the VM ns because it failed to move into the network ns previously and needs to be cleaned up
157-
logger.Info("vlan interface exists on the VM namespace, deleting", zap.String("vlanIfName", client.vlanIfName))
158-
if delErr := client.netlink.DeleteLink(client.vlanIfName); delErr != nil {
159-
return errors.Wrap(delErr, "failed to clean up vlan interface")
160-
}
167+
// The vlan interface exists in the VM ns because it failed to move into the network ns previously and needs to be cleaned up
168+
if delErr := client.cleanupInterfaceIfExists(client.vlanIfName); delErr != nil {
169+
return errors.Wrap(delErr, "failed to clean up vlan interface")
161170
}
162171
return nil
163172
}
@@ -183,24 +192,24 @@ func (client *TransparentVlanEndpointClient) createNetworkNamespace(vmNS int) er
183192
return errors.Wrap(errNamespaceCreation, "vnet and vm namespace are the same")
184193
}
185194

186-
// Called from PopulateVM, Namespace: VM and namespace represented by fd
187-
func (client *TransparentVlanEndpointClient) setLinkNetNSAndConfirm(name string, fd uintptr) error {
195+
// Called from PopulateVM, Namespace: VM and namespace represented by fd (which is named nsName)
196+
func (client *TransparentVlanEndpointClient) setLinkNetNSAndConfirm(name string, fd uintptr, nsName string) error {
188197
logger.Info("Move link to NS", zap.String("ifName", name), zap.Any("NSFileDescriptor", fd))
189198
err := client.netlink.SetLinkNetNs(name, fd)
190199
if err != nil {
191-
return errors.Wrapf(err, "failed to set %v inside namespace %v", name, fd)
200+
return errors.Wrapf(err, "failed to set %v inside namespace %v (%s)", name, fd, nsName)
192201
}
193202

194203
// confirm veth was moved successfully
195204
err = RunWithRetries(func() error {
196205
// retry checking in the namespace if the interface is not detected
197-
return ExecuteInNS(client.nsClient, client.vnetNSName, func() error {
198-
_, ifDetectedErr := client.netioshim.GetNetworkInterfaceByName(client.vlanIfName)
199-
return errors.Wrap(ifDetectedErr, "failed to get vlan veth in namespace")
206+
return ExecuteInNS(client.nsClient, nsName, func() error {
207+
_, ifDetectedErr := client.netioshim.GetNetworkInterfaceByName(name)
208+
return errors.Wrap(ifDetectedErr, "failed to confirm interface moved in namespace")
200209
})
201210
}, numRetries, sleepInMs)
202211
if err != nil {
203-
return errors.Wrapf(err, "failed to detect %v inside namespace %v", name, fd)
212+
return errors.Wrapf(err, "failed to detect %v inside namespace %v (%s)", name, fd, nsName)
204213
}
205214
return nil
206215
}
@@ -215,7 +224,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
215224
var existingErr error
216225
client.vnetNSFileDescriptor, existingErr = client.netnsClient.GetFromName(client.vnetNSName)
217226
// If the ns does not exist, the below code will trigger to create it
218-
// This will also (we assume) mean the vlan veth does not exist
227+
// This will also (we assume) mean the vlan interface does not exist
219228
if existingErr != nil {
220229
// We assume the only possible error is that the namespace doesn't exist
221230
logger.Info("No existing NS detected. Creating the vnet namespace and switching to it", zap.String("message", existingErr.Error()))
@@ -234,15 +243,15 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
234243
logger.Info("[transparent vlan] removing vnet ns due to failure...")
235244
err = client.netnsClient.DeleteNamed(client.vnetNSName)
236245
if err != nil {
237-
logger.Error("failed to cleanup/delete ns after failing to create vlan veth")
246+
logger.Error("failed to cleanup/delete ns after failing to create vlan interface")
238247
}
239248
}
240249
}()
241250
if deleteNSIfNotNilErr != nil {
242251
return errors.Wrap(deleteNSIfNotNilErr, "failed to set current ns to vm")
243252
}
244253

245-
// Now create vlan veth
254+
// Now create vlan interface
246255
logger.Info("Create the host vlan link after getting eth0", zap.String("primaryHostIfName", client.primaryHostIfName))
247256
// Get parent interface index. Index is consistent across libraries.
248257
eth0, deleteNSIfNotNilErr := client.netioshim.GetNetworkInterfaceByName(client.primaryHostIfName)
@@ -258,7 +267,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
258267
VlanId: client.vlanID,
259268
}
260269
logger.Info("Attempting to create link in VM NS", zap.String("vlanIfName", client.vlanIfName))
261-
// Create vlan veth
270+
// Create vlan interface
262271
deleteNSIfNotNilErr = vishnetlink.LinkAdd(link)
263272
if deleteNSIfNotNilErr != nil {
264273
// ignore link already exists error
@@ -271,35 +280,35 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
271280
}
272281
defer func() {
273282
if deleteNSIfNotNilErr != nil {
274-
logger.Info("removing vlan veth due to failure...")
283+
logger.Info("removing vlan interface due to failure...")
275284
if delErr := client.netlink.DeleteLink(client.vlanIfName); delErr != nil {
276-
logger.Error("deleting vlan veth failed on addendpoint failure with", zap.Any("error:", delErr.Error()))
285+
logger.Error("deleting vlan interface failed on addendpoint failure with", zap.Any("error:", delErr.Error()))
277286
}
278287
}
279288
}()
280289

281290
// sometimes there is slight delay in interface creation. check if it exists
282291
err = RunWithRetries(func() error {
283292
_, err = client.netioshim.GetNetworkInterfaceByName(client.vlanIfName)
284-
return errors.Wrap(err, "failed to get vlan veth")
293+
return errors.Wrap(err, "failed to get vlan interface")
285294
}, numRetries, sleepInMs)
286295

287296
if err != nil {
288-
deleteNSIfNotNilErr = errors.Wrapf(err, "failed to get vlan veth interface:%s", client.vlanIfName)
297+
deleteNSIfNotNilErr = errors.Wrapf(err, "failed to get vlan interface: %s", client.vlanIfName)
289298
return deleteNSIfNotNilErr
290299
}
291300

292301
deleteNSIfNotNilErr = client.netUtilsClient.DisableRAForInterface(client.vlanIfName)
293302
if deleteNSIfNotNilErr != nil {
294303
return errors.Wrap(deleteNSIfNotNilErr, "failed to disable router advertisements for vlan vnet link")
295304
}
296-
// vlan veth was created successfully, so move the vlan veth you created
305+
// vlan interface was created successfully, so move the vlan interface you created
297306
logger.Info("Move vlan link to vnet NS", zap.String("vlanIfName", client.vlanIfName), zap.Any("vnetNSFileDescriptor", uintptr(client.vnetNSFileDescriptor)))
298-
deleteNSIfNotNilErr = client.setLinkNetNSAndConfirm(client.vlanIfName, uintptr(client.vnetNSFileDescriptor))
307+
deleteNSIfNotNilErr = client.setLinkNetNSAndConfirm(client.vlanIfName, uintptr(client.vnetNSFileDescriptor), client.vnetNSName)
299308
if deleteNSIfNotNilErr != nil {
300-
return errors.Wrap(deleteNSIfNotNilErr, "failed to move or detect vlan veth inside vnet namespace")
309+
return errors.Wrap(deleteNSIfNotNilErr, "failed to move or detect vlan interface inside vnet namespace")
301310
}
302-
logger.Info("Moving vlan veth into namespace confirmed")
311+
logger.Info("Moving vlan interface into namespace confirmed")
303312
} else {
304313
logger.Info("Existing NS detected. vlan interface should exist or namespace would've been deleted.", zap.String("vnetNSName", client.vnetNSName), zap.String("vlanIfName", client.vlanIfName))
305314
}
@@ -310,6 +319,14 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
310319
logger.Info("Failed to parse the mac address", zap.String("defaultHostVethHwAddr", defaultHostVethHwAddr))
311320
}
312321

322+
// Proactively clean up any leftover veth interfaces before creating new ones
323+
if vnetDelErr := client.cleanupInterfaceIfExists(client.vnetVethName); vnetDelErr != nil {
324+
logger.Info("Could not proactively clean up vnet veth", zap.String("vnetVethName", client.vnetVethName), zap.Error(vnetDelErr))
325+
}
326+
if containerDelErr := client.cleanupInterfaceIfExists(client.containerVethName); containerDelErr != nil {
327+
logger.Info("Could not proactively clean up container veth", zap.String("containerVethName", client.containerVethName), zap.Error(containerDelErr))
328+
}
329+
313330
// Create veth pair
314331
if err = client.netUtilsClient.CreateEndpoint(client.vnetVethName, client.containerVethName, mac); err != nil {
315332
return errors.Wrap(err, "failed to create veth pair")
@@ -349,7 +366,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
349366
return errors.Wrap(err, "failed to disable RA on container veth, deleting")
350367
}
351368

352-
if err = client.setLinkNetNSAndConfirm(client.vnetVethName, uintptr(client.vnetNSFileDescriptor)); err != nil {
369+
if err = client.setLinkNetNSAndConfirm(client.vnetVethName, uintptr(client.vnetNSFileDescriptor), client.vnetNSName); err != nil {
353370
if delErr := client.netlink.DeleteLink(client.vnetVethName); delErr != nil {
354371
logger.Error("Deleting vnet veth failed on addendpoint failure with", zap.Error(delErr))
355372
}
@@ -363,7 +380,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
363380
func (client *TransparentVlanEndpointClient) PopulateVnet(epInfo *EndpointInfo) error {
364381
_, err := client.netioshim.GetNetworkInterfaceByName(client.vlanIfName)
365382
if err != nil {
366-
return errors.Wrap(err, "vlan veth doesn't exist")
383+
return errors.Wrap(err, "vlan interface doesn't exist")
367384
}
368385
vnetVethIf, err := client.netioshim.GetNetworkInterfaceByName(client.vnetVethName)
369386
if err != nil {
@@ -659,7 +676,7 @@ func (client *TransparentVlanEndpointClient) DeleteEndpointsImpl(ep *endpoint, _
659676
// TODO: revist if this require in future.
660677
//nolint gocritic
661678
/* if routesLeft <= numDefaultRoutes {
662-
// Deletes default arp, default routes, vlan veth; there are two default routes
679+
// Deletes default arp, default routes, vlan interface; there are two default routes
663680
// so when we have <= numDefaultRoutes routes left, no containers use this namespace
664681
log.Printf("[transparent vlan] Deleting namespace %s as no containers occupy it", client.vnetNSName)
665682
delErr := client.netnsClient.DeleteNamed(client.vnetNSName)

network/transparent_vlan_endpointclient_linux_test.go

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,14 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
121121
nl := netlink.NewMockNetlink(false, "")
122122
plc := platform.NewMockExecClient(false)
123123

124-
tests := []struct {
125-
name string
126-
client *TransparentVlanEndpointClient
127-
epInfo *EndpointInfo
128-
wantErr bool
129-
wantErrMsg string
124+
setLinkNetNSTests := []struct {
125+
name string
126+
client *TransparentVlanEndpointClient
127+
epInfo *EndpointInfo
128+
moveInterface string
129+
moveNS string
130+
wantErr bool
131+
wantErrMsg string
130132
}{
131133
// Set the link network namespace and confirm that it was moved inside
132134
{
@@ -143,8 +145,10 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
143145
netioshim: netio.NewMockNetIO(false, 0),
144146
nsClient: NewMockNamespaceClient(),
145147
},
146-
epInfo: &EndpointInfo{},
147-
wantErr: false,
148+
moveInterface: "eth0.1",
149+
moveNS: "az_ns_1",
150+
epInfo: &EndpointInfo{},
151+
wantErr: false,
148152
},
149153
{
150154
name: "Set link netns fail to set",
@@ -160,9 +164,11 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
160164
netioshim: netio.NewMockNetIO(false, 0),
161165
nsClient: NewMockNamespaceClient(),
162166
},
163-
epInfo: &EndpointInfo{},
164-
wantErr: true,
165-
wantErrMsg: "failed to set eth0.1",
167+
moveInterface: "A1veth0",
168+
moveNS: "az_ns_2",
169+
epInfo: &EndpointInfo{},
170+
wantErr: true,
171+
wantErrMsg: "failed to set A1veth0 inside namespace 1 (az_ns_2)",
166172
},
167173
{
168174
name: "Set link netns fail to detect",
@@ -181,15 +187,17 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
181187
},
182188
nsClient: NewMockNamespaceClient(),
183189
},
184-
epInfo: &EndpointInfo{},
185-
wantErr: true,
186-
wantErrMsg: "failed to detect eth0.1",
190+
moveInterface: "eth0.1",
191+
moveNS: "az_ns_1",
192+
epInfo: &EndpointInfo{},
193+
wantErr: true,
194+
wantErrMsg: "failed to detect eth0.1 inside namespace 1 (az_ns_1)",
187195
},
188196
}
189-
for _, tt := range tests {
197+
for _, tt := range setLinkNetNSTests {
190198
tt := tt
191199
t.Run(tt.name, func(t *testing.T) {
192-
err := tt.client.setLinkNetNSAndConfirm(tt.client.vlanIfName, 1)
200+
err := tt.client.setLinkNetNSAndConfirm(tt.moveInterface, 1, tt.moveNS)
193201
if tt.wantErr {
194202
require.Error(t, err)
195203
require.Contains(t, err.Error(), tt.wantErrMsg, "Expected:%v actual:%v", tt.wantErrMsg, err.Error())
@@ -199,7 +207,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
199207
})
200208
}
201209

202-
tests = []struct {
210+
tests := []struct {
203211
name string
204212
client *TransparentVlanEndpointClient
205213
epInfo *EndpointInfo
@@ -256,7 +264,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
256264
},
257265
epInfo: &EndpointInfo{},
258266
wantErr: true,
259-
wantErrMsg: "failed to cleanup/delete ns after noticing vlan veth does not exist: netns failure: " + errNetnsMock.Error(),
267+
wantErrMsg: "failed to cleanup/delete ns after noticing vlan interface does not exist: netns failure: " + errNetnsMock.Error(),
260268
},
261269
{
262270
name: "Ensure clean populate VM cleanup straggling vlan if in vm ns",
@@ -344,7 +352,12 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
344352
set: defaultSet,
345353
deleteNamed: defaultDeleteNamed,
346354
},
347-
netlink: netlink.NewMockNetlink(false, ""),
355+
netlink: &netlink.MockNetlink{
356+
DeleteLinkFn: func(_ string) error {
357+
// should still succeed
358+
return netlink.ErrorMockNetlink
359+
},
360+
},
348361
plClient: platform.NewMockExecClient(false),
349362
netUtilsClient: networkutils.NewNetworkUtils(nl, plc),
350363
netioshim: netio.NewMockNetIO(false, 0),
@@ -375,7 +388,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
375388
},
376389
epInfo: &EndpointInfo{},
377390
wantErr: true,
378-
wantErrMsg: "failed to move or detect vnetVethName in vnet ns, deleting: failed to set A1veth0 inside namespace 1: " + netlink.ErrorMockNetlink.Error() + " : netlink fail",
391+
wantErrMsg: "failed to move or detect vnetVethName in vnet ns, deleting: failed to set A1veth0 inside namespace 1 (az_ns_1): " + netlink.ErrorMockNetlink.Error() + " : netlink fail",
379392
},
380393
{
381394
name: "Add endpoints get interface fail for primary interface (eth0)",
@@ -523,7 +536,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
523536
wantErr: false,
524537
},
525538
{
526-
name: "Add endpoints fail check vlan veth exists",
539+
name: "Add endpoints fail check vlan interface exists",
527540
client: &TransparentVlanEndpointClient{
528541
primaryHostIfName: "eth0",
529542
vlanIfName: "eth0.1",
@@ -537,7 +550,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) {
537550
},
538551
epInfo: &EndpointInfo{},
539552
wantErr: true,
540-
wantErrMsg: "vlan veth doesn't exist: " + netio.ErrMockNetIOFail.Error() + ":eth0.1",
553+
wantErrMsg: "vlan interface doesn't exist: " + netio.ErrMockNetIOFail.Error() + ":eth0.1",
541554
},
542555
{
543556
name: "Add endpoints fail check vnet veth exists",

0 commit comments

Comments
 (0)