From 9c147e8341e287366790e60658f646cdcc59bef2 Mon Sep 17 00:00:00 2001
From: James Falcon <james.falcon@canonical.com>
Date: Thu, 7 Oct 2021 11:27:36 -0500
Subject: [PATCH] Allow disabling of network activation (SC-307) (#1048)

In #919 (81299de), we refactored some of the code used to bring up
networks across distros. Previously, the call to bring up network
interfaces during 'init' stage unintentionally resulted in a no-op
such that network interfaces were NEVER brought up by cloud-init, even
if new network interfaces were found after crawling the metadata.

The code was altered to bring up these discovered network interfaces.
On ubuntu, this results in a 'netplan apply' call during 'init' stage
for any ubuntu-based distro on a datasource that has a NETWORK
dependency. On GCE, this additional 'netplan apply' conflicts with the
google-guest-agent service, resulting in an instance that can no
be connected to.

This commit adds a 'disable_network_activation' option that can be
enabled in /etc/cloud.cfg to disable the activation of network
interfaces in 'init' stage.

LP: #1938299
---
 cloudinit/cmd/main.py                         | 11 ++++-
 cloudinit/cmd/tests/test_main.py              | 23 ++++++++++
 cloudinit/distros/__init__.py                 |  3 ++
 doc/rtd/topics/network-config.rst             | 11 +++++
 .../datasources/test_network_dependency.py    | 43 +++++++++++++++++++
 5 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration_tests/datasources/test_network_dependency.py

--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -239,6 +239,12 @@ def purge_cache_on_python_version_change
         util.write_file(python_version_path, current_python_version)
 
 
+def _should_bring_up_interfaces(init, args):
+    if util.get_cfg_option_bool(init.cfg, 'disable_network_activation'):
+        return False
+    return not args.local
+
+
 def main_init(name, args):
     deps = [sources.DEP_FILESYSTEM, sources.DEP_NETWORK]
     if args.local:
@@ -348,6 +354,7 @@ def main_init(name, args):
         util.del_file(os.path.join(path_helper.get_cpath("data"), "no-net"))
 
     # Stage 5
+    bring_up_interfaces = _should_bring_up_interfaces(init, args)
     try:
         init.fetch(existing=existing)
         # if in network mode, and the datasource is local
@@ -367,7 +374,7 @@ def main_init(name, args):
             util.logexc(LOG, ("No instance datasource found!"
                               " Likely bad things to come!"))
         if not args.force:
-            init.apply_network_config(bring_up=not args.local)
+            init.apply_network_config(bring_up=bring_up_interfaces)
             LOG.debug("[%s] Exiting without datasource", mode)
             if mode == sources.DSMODE_LOCAL:
                 return (None, [])
@@ -388,7 +395,7 @@ def main_init(name, args):
         # dhcp clients to advertize this hostname to any DDNS services
         # LP: #1746455.
         _maybe_set_hostname(init, stage='local', retry_stage='network')
-    init.apply_network_config(bring_up=bool(mode != sources.DSMODE_LOCAL))
+    init.apply_network_config(bring_up=bring_up_interfaces)
 
     if mode == sources.DSMODE_LOCAL:
         if init.datasource.dsmode != mode:
--- a/cloudinit/cmd/tests/test_main.py
+++ b/cloudinit/cmd/tests/test_main.py
@@ -4,6 +4,9 @@ from collections import namedtuple
 import copy
 import os
 from io import StringIO
+from unittest import mock
+
+import pytest
 
 from cloudinit.cmd import main
 from cloudinit import safeyaml
@@ -162,4 +165,24 @@ class TestMain(FilesystemMockingTestCase
         for log in expected_logs:
             self.assertIn(log, self.stderr.getvalue())
 
+
+class TestShouldBringUpInterfaces:
+    @pytest.mark.parametrize('cfg_disable,args_local,expected', [
+        (True, True, False),
+        (True, False, False),
+        (False, True, False),
+        (False, False, True),
+    ])
+    def test_should_bring_up_interfaces(
+        self, cfg_disable, args_local, expected
+    ):
+        init = mock.Mock()
+        init.cfg = {'disable_network_activation': cfg_disable}
+
+        args = mock.Mock()
+        args.local = args_local
+
+        result = main._should_bring_up_interfaces(init, args)
+        assert result == expected
+
 # vi: ts=4 expandtab
--- a/cloudinit/distros/__init__.py
+++ b/cloudinit/distros/__init__.py
@@ -227,8 +227,11 @@ class Distro(persistence.CloudInitPickle
 
         # Now try to bring them up
         if bring_up:
+            LOG.debug('Bringing up newly configured network interfaces')
             network_activator = activators.select_activator()
             network_activator.bring_up_all_interfaces(network_state)
+        else:
+            LOG.debug("Not bringing up newly configured network interfaces")
         return False
 
     def apply_network_config_names(self, netconfig):
--- a/doc/rtd/topics/network-config.rst
+++ b/doc/rtd/topics/network-config.rst
@@ -75,6 +75,17 @@ If `Cloud-init`_ 's networking config ha
 no other network information is found, then it will proceed
 to generate a fallback networking configuration.
 
+Disabling Network Activation
+----------------------------
+
+Some datasources may not be initialized until after network has been brought
+up. In this case, cloud-init will attempt to bring up the interfaces specified
+by the datasource metadata.
+
+This behavior can be disabled in the cloud-init configuration dictionary,
+merged from ``/etc/cloud/cloud.cfg`` and ``/etc/cloud/cloud.cfg.d/*``::
+
+  disable_network_activation: true
 
 Fallback Network Configuration
 ==============================
--- /dev/null
+++ b/tests/integration_tests/datasources/test_network_dependency.py
@@ -0,0 +1,43 @@
+import pytest
+
+from tests.integration_tests.clouds import IntegrationCloud
+from tests.integration_tests.conftest import get_validated_source
+
+
+def _setup_custom_image(session_cloud: IntegrationCloud):
+    """Like `setup_image` in conftest.py, but with customized content."""
+    source = get_validated_source(session_cloud)
+    if not source.installs_new_version():
+        return
+    client = session_cloud.launch()
+
+    # Insert our "disable_network_activation" file here
+    client.write_to_file(
+        '/etc/cloud/cloud.cfg.d/99-disable-network-activation.cfg',
+        'disable_network_activation: true\n',
+    )
+
+    client.install_new_cloud_init(source)
+    # Even if we're keeping instances, we don't want to keep this
+    # one around as it was just for image creation
+    client.destroy()
+
+
+# This test should be able to work on any cloud whose datasource specifies
+# a NETWORK dependency
+@pytest.mark.gce
+@pytest.mark.ubuntu  # Because netplan
+def test_network_activation_disabled(session_cloud: IntegrationCloud):
+    """Test that the network is not activated during init mode."""
+    _setup_custom_image(session_cloud)
+    with session_cloud.launch() as client:
+        result = client.execute('systemctl status google-guest-agent.service')
+        if not result.ok:
+            raise AssertionError('google-guest-agent is not active:\n%s',
+                                 result.stdout)
+        log = client.read_from_file('/var/log/cloud-init.log')
+
+    assert "Running command ['netplan', 'apply']" not in log
+
+    assert 'Not bringing up newly configured network interfaces' in log
+    assert 'Bringing up newly configured network interfaces' not in log
