/usr/share/doc/ganeti/html/design-os.html is in ganeti-doc 2.16.0~rc2-1build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Ganeti OS installation redesign — Ganeti 2.16.0~rc2 documentation</title>
<link rel="stylesheet" href="_static/style.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: './',
VERSION: '2.16.0~rc2',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: '.txt'
};
</script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Ganeti Instance Import/Export using Open Virtualization Format" href="design-ovf-support.html" />
<link rel="prev" title="Filtering of jobs for the Ganeti job queue" href="design-optables.html" />
</head>
<body>
<div class="related" role="navigation" aria-label="related navigation">
<h3>Navigation</h3>
<ul>
<li class="right" style="margin-right: 10px">
<a href="design-ovf-support.html" title="Ganeti Instance Import/Export using Open Virtualization Format"
accesskey="N">next</a></li>
<li class="right" >
<a href="design-optables.html" title="Filtering of jobs for the Ganeti job queue"
accesskey="P">previous</a> |</li>
<li class="nav-item nav-item-0"><a href="index.html">Ganeti 2.16.0~rc2 documentation</a> »</li>
</ul>
</div>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<div class="section" id="ganeti-os-installation-redesign">
<h1><a class="toc-backref" href="#id1">Ganeti OS installation redesign</a><a class="headerlink" href="#ganeti-os-installation-redesign" title="Permalink to this headline">¶</a></h1>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Created:</th><td class="field-body">2013-Dec-12</td>
</tr>
<tr class="field-even field"><th class="field-name">Status:</th><td class="field-body">Partially Implemented</td>
</tr>
<tr class="field-odd field"><th class="field-name">Ganeti-Version:</th><td class="field-body">2.12.0, 2.13.0</td>
</tr>
</tbody>
</table>
<div class="contents topic" id="contents">
<p class="topic-title first">Contents</p>
<ul class="simple">
<li><a class="reference internal" href="#ganeti-os-installation-redesign" id="id1">Ganeti OS installation redesign</a><ul>
<li><a class="reference internal" href="#current-state-and-shortcomings" id="id2">Current state and shortcomings</a></li>
<li><a class="reference internal" href="#proposed-changes" id="id3">Proposed changes</a><ul>
<li><a class="reference internal" href="#os-parameter-categories" id="id4">OS parameter categories</a></li>
<li><a class="reference internal" href="#metadata" id="id5">Metadata</a></li>
<li><a class="reference internal" href="#installation-procedure" id="id6">Installation procedure</a></li>
</ul>
</li>
<li><a class="reference internal" href="#implementation" id="id7">Implementation</a><ul>
<li><a class="reference internal" href="#communication-mechanism" id="id8">Communication mechanism</a></li>
<li><a class="reference internal" href="#dnsmasq" id="id9">dnsmasq</a></li>
<li><a class="reference internal" href="#metadata-service" id="id10">Metadata service</a></li>
<li><a class="reference internal" href="#installation-process-in-a-virtualized-environment" id="id11">Installation process in a virtualized environment</a></li>
</ul>
</li>
<li><a class="reference internal" href="#alternatives-to-design-and-implementation" id="id12">Alternatives to design and implementation</a><ul>
<li><a class="reference internal" href="#port-forwarding-in-kvm" id="id13">Port forwarding in KVM</a></li>
<li><a class="reference internal" href="#alternatives-to-the-dhcp-server" id="id14">Alternatives to the DHCP server</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<p>This is a design document detailing a new OS installation procedure, which is
more secure, able to provide more features and easier to use for many common
tasks w.r.t. the current one.</p>
<div class="section" id="current-state-and-shortcomings">
<h2><a class="toc-backref" href="#id2">Current state and shortcomings</a><a class="headerlink" href="#current-state-and-shortcomings" title="Permalink to this headline">¶</a></h2>
<p>As of Ganeti 2.10, each instance is associated with an OS definition. An OS
definition is a set of scripts (i.e., <code class="docutils literal"><span class="pre">create</span></code>, <code class="docutils literal"><span class="pre">export</span></code>, <code class="docutils literal"><span class="pre">import</span></code>,
<code class="docutils literal"><span class="pre">rename</span></code>) that are executed with root privileges on the primary host of the
instance. These scripts are responsible for performing all the OS-related
tasks, namely, create an instance, setup an operating system on the instance’s
disks, export/import the instance, and rename the instance.</p>
<p>These scripts receive, through environment variables, a fixed set of instance
parameters (such as, the hypervisor, the name of the instance, the number of
disks and their location) and a set of user defined parameters. Both the
instance and user defined parameters are written in the configuration file of
Ganeti, to allow future reinstalls of the instance, and in various log files,
namely:</p>
<ul class="simple">
<li>node daemon log file: contains DEBUG strings of the <code class="docutils literal"><span class="pre">/os_validate</span></code>,
<code class="docutils literal"><span class="pre">/instance_os_add</span></code> and <code class="docutils literal"><span class="pre">/instance_start</span></code> RPC calls.</li>
<li>master daemon log file: DEBUG strings related to the same RPC calls are stored
here as well.</li>
<li>commands log: the CLI commands that create a new instance, including their
parameters, are logged here.</li>
<li>RAPI log: the RAPI commands that create a new instance, including their
parameters, are logged here.</li>
<li>job logs: the job files stored in the job queue, or in its archive, contain
the parameters.</li>
</ul>
<p>The current situation presents a number of shortcomings:</p>
<ul class="simple">
<li>Having the installation scripts run as root on the nodes does not allow
user-defined OS scripts, as they would pose a huge security risk.
Furthermore, even a script without malicious intentions might end up
disrupting a node because of due to a bug.</li>
<li>Ganeti cannot be used to create instances starting from user provided disk
images: even in the (hypothetical) case in which the scripts are completely
secure and run not by root but by an unprivileged user with only the power to
mount arbitrary files as disk images, this is still a security issue. It has
been proven that a carefully crafted file system might exploit kernel
vulnerabilities to gain control of the system. Therefore, directly mounting
images on the Ganeti nodes is not an option.</li>
<li>There is no way to inject files into an existing disk image. A common use case
is for the system administrator to provide a standard image of the system, to
be later personalized with the network configuration, private keys identifying
the machine, ssh keys of the users, and so on. A possible workaround would be
for the scripts to mount the image (only if this is trusted!) and to receive
the configurations and ssh keys as user defined OS parameters. Unfortunately,
this is also not an option for security sensitive material (such as the ssh
keys) because the OS parameters are stored in many places on the system, as
already described above.</li>
<li>Most other virtualization software allow only instance images, but no
installation scripts. This difference makes the interaction between Ganeti and
other software difficult.</li>
</ul>
</div>
<div class="section" id="proposed-changes">
<h2><a class="toc-backref" href="#id3">Proposed changes</a><a class="headerlink" href="#proposed-changes" title="Permalink to this headline">¶</a></h2>
<p>In order to fix the shortcomings of the current state, we plan to introduce the
following changes.</p>
<div class="section" id="os-parameter-categories">
<h3><a class="toc-backref" href="#id4">OS parameter categories</a><a class="headerlink" href="#os-parameter-categories" title="Permalink to this headline">¶</a></h3>
<p>Change the OS parameters to have three categories:</p>
<ul class="simple">
<li><code class="docutils literal"><span class="pre">public</span></code>: the current behavior. The parameter is logged and stored freely.</li>
<li><code class="docutils literal"><span class="pre">private</span></code>: the parameter is saved inside the Ganeti configuration (to allow
for instance reinstall) but it is not shown in logs, job logs, or passed back
via RAPI.</li>
<li><code class="docutils literal"><span class="pre">secret</span></code>: the parameter is not saved inside the Ganeti configuration.
Reinstalls are impossible unless the data is passed again. The parameter will
not appear in any log file. When a functionality is performed jointly by
multiple daemons (such as MasterD and LuxiD), currently Ganeti sometimes
serializes jobs on disk and later reloads them. Secret parameters will not be
serialized to disk. They will be passed around as part of the LUXI calls
exchanged by the daemons, and only kept in memory, in order to reduce their
accessibility as much as possible. In case of failure of the master node,
these parameters will be lost and cannot be recovered because they are not
serialized. As a result, the job cannot be taken over by the new master. This
is an expected and accepted side effect of jobs with secret parameters: if
they fail, they’ll have to be restarted manually.</li>
</ul>
</div>
<div class="section" id="metadata">
<h3><a class="toc-backref" href="#id5">Metadata</a><a class="headerlink" href="#metadata" title="Permalink to this headline">¶</a></h3>
<p>In order to allow metadata to be sent inside the instance, a communication
mechanism between the instance and the host will be created. This mechanism
will be bidirectional (e.g.: to allow the setup process going on inside the
instance to communicate its progress to the host). Each instance will have
access exclusively to its own metadata, and it will be only able to communicate
with its host over this channel. This is the approach followed the
<code class="docutils literal"><span class="pre">cloud-init</span></code> tool and more details will be provided in the <a class="reference internal" href="#communication-mechanism">Communication
mechanism</a> and <a class="reference internal" href="#metadata-service">Metadata service</a> sections.</p>
</div>
<div class="section" id="installation-procedure">
<h3><a class="toc-backref" href="#id6">Installation procedure</a><a class="headerlink" href="#installation-procedure" title="Permalink to this headline">¶</a></h3>
<p>A new installation procedure will be introduced. There will be two sets of
parameters, namely, installation parameters, which are used mainly for installs
and reinstalls, and execution parameters, which are used in all the other runs
that are not part of an installation procedure. Also, it will be possible to
use an installation medium and/or run the OS scripts in an optional virtualized
environment, and optionally use a personalization package. This section details
all of these options.</p>
<p>The set of installation parameters will allow, for example, to attach an
installation floppy/cdrom/network, change the boot device order, or specify a
disk image to be used. Through this set of parameters, the administrator will
have to provide the hypervisor a location for an installation medium for the
instance (e.g., a boot disk, a network image, etc). This medium will carry out
the installation of the instance onto the instance’s disks and will then be
responsible for getting the parameters for configuring the instance, such as,
network interfaces, IP address, and hostname. These parameters are taken from
the metadata. The installation parameters will be stored in the configuration
of Ganeti and used in future reinstalls, but not during normal execution.</p>
<p>The instance is reinstalled using the same installation parameters from the
first installation. However, it will be the administrator’s responsibility to
ensure that the installation media is still available at the proper location
when a reinstall occurs.</p>
<p>The parameter <code class="docutils literal"><span class="pre">--os-parameters</span></code> can still be used to specify the OS
parameters. However, without OS scripts, Ganeti cannot do more than a syntactic
check to validate the supplied OS parameter string. As a result, this string
will be passed directly to the instance as part of the metadata. If OS scripts
are used and the installation procedure is running inside a virtualized
environment, Ganeti will take these parameters from the metadata and pass them
to the OS scripts as environment variables.</p>
<p>Ganeti allows the following installation options:</p>
<ul>
<li><p class="first">Use a disk image:</p>
<p>Currently, it is already possible to specify an installation medium, such as,
a cdrom, but not a disk image. Therefore, a new parameter <code class="docutils literal"><span class="pre">--os-image</span></code> will
be used to specify the location of a disk image which will be dumped to the
instance’s first disk before the instance is started. The location of the
image can be a URL and, if this is the case, Ganeti will download this image.</p>
</li>
<li><p class="first">Run OS scripts:</p>
<p>The parameter <code class="docutils literal"><span class="pre">--os-type</span></code> (short version: <code class="docutils literal"><span class="pre">-o</span></code>), is currently used to
specify the OS scripts. This parameter will still be used to specify the OS
scripts with the difference that these scripts may optionally run inside a
virtualized environment for safety reasons, depending on whether they are
trusted or not. For more details on trusted and untrusted OS scripts, refer
to the <a class="reference internal" href="#installation-process-in-a-virtualized-environment">Installation process in a virtualized environment</a> section. Note
that this parameter will become optional thus allowing a user to create an
instance specifying only, for example, a disk image or a cdrom image to boot
from.</p>
</li>
<li><p class="first">Personalization package</p>
<p>As part of the instance creation command, it will be possible to indicate a
URL for a “personalization package”, which is an archive containing a set of
files meant to be overlayed on top of the OS file system at the end of the
setup process and before the VM is started for the first time in normal mode.
Ganeti will provide a mechanism for receiving and unpacking this archive,
independently of whether the installation is being performed inside the
virtualized environment or not.</p>
<p>The archive will be in TAR-GZIP format (with extension <code class="docutils literal"><span class="pre">.tar.gz</span></code> or
<code class="docutils literal"><span class="pre">.tgz</span></code>) and contain the files according to the directory structure that will
be recreated on the installation disk. Files contained in this archive will
overwrite files with the same path created during the installation procedure
(if any). The URL of the “personalization package” will have to specify an
extension to identify the file format (in order to allow for more formats to
be supported in the future). The URL will be stored as part of the
configuration of the instance (therefore, the URL should not contain
confidential information, but the files there available can).</p>
<p>It is up to the system administrator to ensure that a package is actually
available at that URL at install and reinstall time. The contents of the
package are allowed to change. E.g.: a system administrator might create a
package containing the private keys of the instance being created. When the
instance is reinstalled, a new package with new keys can be made available
there, thus allowing instance reinstall without the need to store keys. A
username and a password can be specified together with the URL. If the URL is
a HTTP(S) URL, they will be used as basic access authentication credentials to
access that URL. The username and password will not be saved in the config,
and will have to be provided again in case a reinstall is requested.</p>
<p>The downloaded personalization package will not be stored locally on the node
for longer than it is needed while unpacking it and adding its files to the
instance being created. The personalization package will be overlayed on top
of the instance filesystem after the scripts that created it have been
executed. In order for the files in the package to be automatically overlayed
on top of the instance filesystem, it is required that the appliance is
actually able to mount the instance’s disks. As a result, this will not work
for every filesystem.</p>
</li>
<li><p class="first">Combine a disk image, OS scripts, and a personalization package</p>
<p>It will possible to combine a disk image, OS scripts, and a personalization
package, both with or without a virtualized environment (see the exception
below). At least, an installation medium or OS scripts should be specified.</p>
<p>The disk image of the actual virtual appliance, which bootstraps the virtual
environment used in the installation procedure, will be read only, so that a
pristine copy of the appliance can be started every time a new instance needs
to be created and to further increase security. The data the instance needs
to write at runtime will only be stored in RAM and disappear as soon as the
instance is stopped.</p>
<p>The parameter <code class="docutils literal"><span class="pre">--enable-safe-install=yes|no</span></code> will be used to give the
administrator control over whether to use a virtualized environment for the
installation procedure. By default, a virtualized environment will be used.
Note that some feature combinations, such as, using untrusted scripts, will
require the virtualized environment. In this case, Ganeti will not allow
disabling the virtualized environment.</p>
</li>
</ul>
</div>
</div>
<div class="section" id="implementation">
<h2><a class="toc-backref" href="#id7">Implementation</a><a class="headerlink" href="#implementation" title="Permalink to this headline">¶</a></h2>
<p>The implementation of this design will happen as an ordered sequence of steps,
of increasing impact on the system and, in some cases, dependent on each other:</p>
<ol class="arabic simple">
<li>Private and secret instance parameters</li>
<li>Communication mechanism between host and instance</li>
<li>Metadata service</li>
<li>Personalization package (inside a virtualization environment)</li>
<li>Instance creation via a disk image</li>
<li>Instance creation inside a virtualized environment</li>
</ol>
<p>Some of these steps need to be more deeply specified w.r.t. what is already
written in the <a class="reference internal" href="#proposed-changes">Proposed changes</a> Section. Extra details will be provided in
the following subsections.</p>
<div class="section" id="communication-mechanism">
<h3><a class="toc-backref" href="#id8">Communication mechanism</a><a class="headerlink" href="#communication-mechanism" title="Permalink to this headline">¶</a></h3>
<p>The communication mechanism will be an exclusive, generic, bidirectional
communication channel between Ganeti hosts and guests.</p>
<dl class="docutils">
<dt>exclusive</dt>
<dd>The communication mechanism allows communication between a guest and its host,
but it does not allow a guest to communicate with other guests or reach the
outside world.</dd>
<dt>generic</dt>
<dd>The communication mechanism allows a guest to reach any service on the host,
not just the metadata service. Examples of valid communication include, but
are not limited to, access to the metadata service, send commands to Ganeti,
request changes to parameters, such as, those related to the distribution
upgrades, and let Ganeti control a helper instance, such as, the one for
performing OS installs inside a safe environment.</dd>
<dt>bidirectional</dt>
<dd>The communication mechanism allows communication to be initiated from either
party, namely, from a host to a guest or guest to host.</dd>
</dl>
<p>Note that Ganeti will allow communication with any service (e.g., daemon) running
on the host and, as a result, Ganeti will not be responsible for ensuring that
only the metadata service is reachable. It is the responsibility of each system
administrator to ensure that the extra firewalling and routing rules specified
on the host provide the necessary protection on a given Ganeti installation and,
at the same time, do not accidentally override the behaviour hereby described
which makes the communication between the host and the guest exclusive, generic,
and bidirectional, unless intended.</p>
<p>The communication mechanism will be enabled automatically during an installation
procedure that requires a virtualized environment, but, for backwards
compatibility, it will be disabled when the instance is running normally, unless
explicitly requested. Specifically, a new parameter <code class="docutils literal"><span class="pre">--communication=yes|no</span></code>
(short version: <code class="docutils literal"><span class="pre">-C</span></code>) will be added to <code class="docutils literal"><span class="pre">gnt-instance</span> <span class="pre">add</span></code> and <code class="docutils literal"><span class="pre">gnt-instance</span>
<span class="pre">modify</span></code>. This parameter will determine whether the communication mechanism is
enabled for a particular instance. The value of this parameter will be saved as
part of the instance’s configuration.</p>
<p>The communication mechanism will be implemented through network interfaces on
the host and the guest, and Ganeti will be responsible for the host side,
namely, creating a TAP interface for each guest and configuring these interfaces
to have name <code class="docutils literal"><span class="pre">gnt.com.%d</span></code>, where <code class="docutils literal"><span class="pre">%d</span></code> is a unique number within the host
(e.g., <code class="docutils literal"><span class="pre">gnt.com.0</span></code> and <code class="docutils literal"><span class="pre">gnt.com.1</span></code>), IP address <code class="docutils literal"><span class="pre">169.254.169.254</span></code>, and
netmask <code class="docutils literal"><span class="pre">255.255.255.255</span></code>. The interface’s name allows DHCP servers to
recognize which interfaces are part of the communication mechanism.</p>
<p>This network interface will be connected to the guest’s last network interface,
which is meant to be used exclusively for the communication mechanism and is
defined after all the used-defined interfaces. The last interface was chosen
(as opposed to the first one, for example) because the first interface is
generally understood and the main gateway out, and also because it minimizes the
impact on existing systems, for example, in a scenario where the system
administrator has a running cluster and wants to enable the communication
mechanism for already existing instances, which might have been created with
older versions of Ganeti. Further, DBus should assist in keeping the guest
network interfaces more stable.</p>
<p>On the guest side, each instance will have its own MAC address and IP address.
Both the guest’s MAC address and IP address must be unique within a single
cluster. An IP is unique within a single cluster, and not within a single host,
in order to minimize disruption of connectivity, for example, during live
migration, in particular since an instance is not aware when it changes host.
Unfortunately, a side-effect of this decision is that a cluster can have a
maximum of a <code class="docutils literal"><span class="pre">/16</span></code> network allowed instances (with communication enabled). If
necessary to overcome this limit, it should be possible to allow different
networks to be configured link-local only.</p>
<p>The guest will use the DHCP protocol on its last network interface to contact a
DHCP server running on the host and thus determine its IP address. The DHCP
server is configured, started, and stopped, by Ganeti and it will be listening
exclusively on the TAP network interfaces of the guests in order not to
interfere with a potential DHCP server running on the same host. Furthermore,
the DHCP server will only recognize MAC and IP address pairs that have been
approved by Ganeti.</p>
<p>The TAP network interfaces created for each guest share the same IP address.
Therefore, it will be necessary to extend the routing table with rules specific
to each guest. This can be achieved with the following command, which takes the
guest’s unique IP address and its TAP interface:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">route</span> <span class="n">add</span> <span class="o">-</span><span class="n">host</span> <span class="o"><</span><span class="n">ip</span><span class="o">></span> <span class="n">dev</span> <span class="o"><</span><span class="n">ifname</span><span class="o">></span>
</pre></div>
</div>
<p>This rule has the additional advantage of preventing guests from trying to lease
IP addresses from the DHCP server other than the own that has been assigned to
them by Ganeti. The guest could lie about its MAC address to the DHCP server
and try to steal another guest’s IP address, however, this routing rule will
block traffic (i.e., IP packets carrying the wrong IP) from the DHCP server to
the malicious guest. Similarly, the guest could lie about its IP address (i.e.,
simply assign a predefined IP address, perhaps from another guest), however,
replies from the host will not be routed to the malicious guest.</p>
<p>This routing rule ensures that the communication channel is exclusive but, as
mentioned before, it will not prevent guests from accessing any service on the
host. It is the system administrator’s responsibility to employ the necessary
<code class="docutils literal"><span class="pre">iptables</span></code> rules. In order to achieve this, Ganeti will provide <code class="docutils literal"><span class="pre">ifup</span></code>
hooks associated with the guest network interfaces which will give system
administrator’s the opportunity to customize their own <code class="docutils literal"><span class="pre">iptables</span></code>, if
necessary. Ganeti will also provide examples of such hooks. However, these are
meant to personalized to each Ganeti installation and not to be taken as
production ready scripts.</p>
<p>For KVM, an instance will be started with a unique MAC address and the file
descriptor for the TAP network interface meant to be used by the communication
mechanism. Ganeti will be responsible for generating a unique MAC address for
the guest, opening the TAP interface, and passing its file descriptor to KVM:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">kvm</span> <span class="o">-</span><span class="n">net</span> <span class="n">nic</span><span class="p">,</span><span class="n">macaddr</span><span class="o">=<</span><span class="n">mac</span><span class="o">></span> <span class="o">-</span><span class="n">net</span> <span class="n">tap</span><span class="p">,</span><span class="n">fd</span><span class="o">=<</span><span class="n">tap</span><span class="o">-</span><span class="n">fd</span><span class="o">></span> <span class="o">...</span>
</pre></div>
</div>
<p>For Xen, a network interface will be created on the host (using the <code class="docutils literal"><span class="pre">vif</span></code>
parameter of the Xen configuration file). Each instance will have its
corresponding <code class="docutils literal"><span class="pre">vif</span></code> network interface on the host. The <code class="docutils literal"><span class="pre">vif-route</span></code> script
of Xen might be helpful in implementing this.</p>
</div>
<div class="section" id="dnsmasq">
<h3><a class="toc-backref" href="#id9">dnsmasq</a><a class="headerlink" href="#dnsmasq" title="Permalink to this headline">¶</a></h3>
<p>The previous section describes the communication mechanism and explains the role
of the DHCP server. Note that any DHCP server can be used in the implementation
of the communication mechanism. However, the DHCP server employed should not
violate the properties described in the previous section, which state that the
communication mechanism should be exclusive, generic, and bidirectional, unless
this is intentional.</p>
<p>In our experiments, we have used dnsmasq. In this section, we describe how to
properly configure dnsmasq to work on a given Ganeti installation. This is
particularly important if, in this Ganeti installation, dnsmasq will share the
node with one or more DHCP servers running in parallel.</p>
<p>First, it is important to become familiar with the operational modes of dnsmasq,
which are well explained in the <a class="reference external" href="http://www.thekelleys.org.uk/dnsmasq/docs/FAQ">FAQ</a> under the question <code class="docutils literal"><span class="pre">What</span> <span class="pre">are</span>
<span class="pre">these</span> <span class="pre">strange</span> <span class="pre">"bind-interface"</span> <span class="pre">and</span> <span class="pre">"bind-dynamic"</span> <span class="pre">options?</span></code>. The rest of this
section assumes the reader is familiar with these operational modes.</p>
<dl class="docutils">
<dt>bind-dynamic</dt>
<dd>dnsmasq SHOULD be configured in the <code class="docutils literal"><span class="pre">bind-dynamic</span></code> mode (if supported) in
order to allow other DHCP servers to run on the same node. In this mode,
dnsmasq can listen on the TAP interfaces for the communication mechanism by
listening on the TAP interfaces that match the pattern <code class="docutils literal"><span class="pre">gnt.com.*</span></code> (e.g.,
<code class="docutils literal"><span class="pre">interface=gnt.com.*</span></code>). For extra safety, interfaces matching the pattern
<code class="docutils literal"><span class="pre">eth*</span></code> and the name <code class="docutils literal"><span class="pre">lo</span></code> should be configured such that dnsmasq will
always ignore them (e.g., <code class="docutils literal"><span class="pre">except-interface=eth*</span></code> and
<code class="docutils literal"><span class="pre">except-interface=lo</span></code>).</dd>
<dt>bind-interfaces</dt>
<dd><p class="first">dnsmasq MAY be configured in the <code class="docutils literal"><span class="pre">bind-interfaces</span></code> mode (if supported) in
order to allow other DHCP servers to run on the same node. Unfortunately,
because dnsmasq cannot dynamically adjust to TAP interfaces that are created
and destroyed by the system, dnsmasq must be restarted with a new
configuration file each time an instance is created or destroyed.</p>
<p class="last">Also, the interfaces cannot be patterns, such as, <code class="docutils literal"><span class="pre">gnt.com.*</span></code>. Instead, the
interfaces must be explictly specified, for example,
<code class="docutils literal"><span class="pre">interface=gnt.com.0,gnt.com.1</span></code>. Moreover, dnsmasq cannot bind to the TAP
interfaces if they have all the same IPv4 address. As a result, it is
necessary to configure these TAP interfaces to enable IPv6 and an IPv6 address
must be assigned to them.</p>
</dd>
<dt>wildcard</dt>
<dd>dnsmasq CANNOT be configured in the <code class="docutils literal"><span class="pre">wildcard</span></code> mode if there is
(at least) another DHCP server running on the same node.</dd>
</dl>
</div>
<div class="section" id="metadata-service">
<h3><a class="toc-backref" href="#id10">Metadata service</a><a class="headerlink" href="#metadata-service" title="Permalink to this headline">¶</a></h3>
<p>An instance will be able to reach metadata service on <code class="docutils literal"><span class="pre">169.254.169.254:80</span></code> in
order to, for example, retrieve its metadata. This IP address and port were
chosen for compatibility with the OpenStack and Amazon EC2 metadata service.
The metadata service will be provided by a single daemon, which will determine
the source instance for a given request and reply with the metadata pertaining
to that instance.</p>
<p>Where possible, the metadata will be provided in a way compatible with Amazon
EC2, at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">meta</span><span class="o">-</span><span class="n">data</span><span class="o">/*</span>
</pre></div>
</div>
<p>Ganeti-specific metadata, that does not fit this structure, will be provided
at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/</span><span class="n">ganeti</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">meta_data</span><span class="o">.</span><span class="n">json</span>
</pre></div>
</div>
<p>where <code class="docutils literal"><span class="pre"><version></span></code> is either a date in YYYY-MM-DD format, or <code class="docutils literal"><span class="pre">latest</span></code> to
indicate the most recent available protocol version.</p>
<p>If needed in the future, this structure also allows us to support OpenStack’s
metadata at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/</span><span class="n">openstack</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">meta_data</span><span class="o">.</span><span class="n">json</span>
</pre></div>
</div>
<p>A bi-directional, pipe-like communication channel will also be provided. The
instance will be able to receive data from the host by a GET request at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/</span><span class="n">ganeti</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">read</span>
</pre></div>
</div>
<p>and to send data to the host by a POST request at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/</span><span class="n">ganeti</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">write</span>
</pre></div>
</div>
<p>As in a pipe, once the data are read, they will not be in the buffer anymore, so
subsequent GET requests to <code class="docutils literal"><span class="pre">read</span></code> will not return the same data. However,
unlike a pipe, it will not be possible to perform blocking I/O operations.</p>
<p>The OS parameters will be accessible through a GET request at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/</span><span class="n">ganeti</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">os</span><span class="o">/</span><span class="n">parameters</span><span class="o">.</span><span class="n">json</span>
</pre></div>
</div>
<p>as a JSON serialized dictionary having the parameter name as the key, and the
pair <code class="docutils literal"><span class="pre">(<value>,</span> <span class="pre"><visibility>)</span></code> as the value, where <code class="docutils literal"><span class="pre"><value></span></code> is the
user-provided value of the parameter, and <code class="docutils literal"><span class="pre"><visibility></span></code> is either <code class="docutils literal"><span class="pre">public</span></code>,
<code class="docutils literal"><span class="pre">private</span></code> or <code class="docutils literal"><span class="pre">secret</span></code>.</p>
<p>The installation scripts to be run inside the virtualized environment will be
available at:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="o">/</span><span class="n">ganeti</span><span class="o">/<</span><span class="n">version</span><span class="o">>/</span><span class="n">os</span><span class="o">/</span><span class="n">scripts</span><span class="o">/<</span><span class="n">script_name</span><span class="o">></span>
</pre></div>
</div>
<p>where <code class="docutils literal"><span class="pre"><script_name></span></code> is the name of the script.</p>
<div class="section" id="rationale">
<h4>Rationale<a class="headerlink" href="#rationale" title="Permalink to this headline">¶</a></h4>
<p>The choice of using a network interface for instance-host communication, as
opposed to VirtIO, XenBus or other methods, is due to the will of having a
generic, hypervisor-independent way of creating a communication channel, that
doesn’t require unusual (para)virtualization drivers.
At the same time, a network interface was preferred over solutions involving
virtual floppy or USB devices because the latter tend to be detected and
configured by the guest operating systems, sometimes even in prominent positions
in the user interface, whereas it is fairly common to have an unconfigured
network interface in a system, usually without any negative side effects.</p>
</div>
</div>
<div class="section" id="installation-process-in-a-virtualized-environment">
<h3><a class="toc-backref" href="#id11">Installation process in a virtualized environment</a><a class="headerlink" href="#installation-process-in-a-virtualized-environment" title="Permalink to this headline">¶</a></h3>
<p>In the new OS installation scenario, we distinguish between trusted and
untrusted code.</p>
<p>The trusted installation code maintains the behavior of the current one and
requires no modifications, with the scripts running on the node the instance is
being created on. The untrusted code is stored in a subdirectory of the OS
definition called <code class="docutils literal"><span class="pre">untrusted</span></code>. This directory contains scripts that are
equivalent to the already existing ones (<code class="docutils literal"><span class="pre">create</span></code>, <code class="docutils literal"><span class="pre">export</span></code>, <code class="docutils literal"><span class="pre">import</span></code>,
<code class="docutils literal"><span class="pre">rename</span></code>) but that will be run inside an virtualized environment, to protect
the host from malicious tampering.</p>
<p>The <code class="docutils literal"><span class="pre">untrusted</span></code> code is meant to either be untrusted itself, or to be trusted
code running operations that might be dangerous (such as mounting a
user-provided image).</p>
<p>By default, all new OS definitions will have to be explicitly marked as trusted
by the cluster administrator (with a new <code class="docutils literal"><span class="pre">gnt-os</span> <span class="pre">modify</span></code> command) before they
can run code on the host. Otherwise, only the untrusted part of the code will be
allowed to run, inside the virtual appliance. For backwards compatibility
reasons, when upgrading an existing cluster, all the installed OSes will be
marked as trusted, so that they can keep running with no changes.</p>
<p>In order to allow for the highest flexibility, if both a trusted and an
untrusted script are provided for the same operation (i.e. <code class="docutils literal"><span class="pre">create</span></code>), both of
them will be executed at the same time, one on the host, and one inside the
installation appliance. They will be allowed to communicate with each other
through the already described communication mechanism, in order to orchestrate
their execution (e.g.: the untrusted code might execute the installation, while
the trusted one receives status updates from it and delivers them to a user
interface).</p>
<p>The cluster administrator will have an option to completely disable scripts
running on the host, leaving only the ones running in the VM.</p>
<p>Ganeti will provide a script to be run at install time that can be used to
create the virtualized environment that will perform the OS installation of new
instances.
This script will build a debootstrapped basic Debian system including a software
that will read the metadata, setup the environment variables and launch the
installation scripts inside the virtualized environment. The script will also
provide hooks for personalization.</p>
<p>It will also be possible to use other self-made virtualized environments, as
long as they connect to Ganeti over the described communication mechanism and
they know how to read and use the provided metadata to create a new instance.</p>
<p>While performing an installation in the virtualized environment, a customizable
timeout will be used to detect possible problems with the installation process,
and to kill the virtualized environment. The timeout will be optional and set on
a cluster basis by the administrator. If set, it will be the total time allowed
to setup an instance inside the appliance. It is mainly meant as a safety
measure to prevent an instance taken over by malicious scripts to be available
for a long time.</p>
</div>
</div>
<div class="section" id="alternatives-to-design-and-implementation">
<h2><a class="toc-backref" href="#id12">Alternatives to design and implementation</a><a class="headerlink" href="#alternatives-to-design-and-implementation" title="Permalink to this headline">¶</a></h2>
<p>This section lists alternatives to design and implementation, which came up
during the development of this design document, that will not be implemented.
Please read carefully through the limitations and security concerns of each of
these alternatives.</p>
<div class="section" id="port-forwarding-in-kvm">
<h3><a class="toc-backref" href="#id13">Port forwarding in KVM</a><a class="headerlink" href="#port-forwarding-in-kvm" title="Permalink to this headline">¶</a></h3>
<p>The communication mechanism could have been implemented in KVM using guest port
forwarding, as opposed to network interfaces. There are two alternatives in
KVM’s guest port forwarding, namely, creating a forwarding device, such as, a
TCP/IP connection, or executing a command. However, we have determined that
both of these options are not viable.</p>
<p>A TCP/IP forwarding device can be created through the following KVM invocation:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">kvm</span> <span class="o">-</span><span class="n">net</span> <span class="n">nic</span> <span class="o">-</span><span class="n">net</span> \
<span class="n">user</span><span class="p">,</span><span class="n">restrict</span><span class="o">=</span><span class="n">on</span><span class="p">,</span><span class="n">net</span><span class="o">=</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">0.0</span><span class="o">/</span><span class="mi">16</span><span class="p">,</span><span class="n">host</span><span class="o">=</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.253</span><span class="p">,</span>
<span class="n">guestfwd</span><span class="o">=</span><span class="n">tcp</span><span class="p">:</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="p">:</span><span class="mi">80</span><span class="o">-</span><span class="n">tcp</span><span class="p">:</span><span class="mf">127.0</span><span class="o">.</span><span class="mf">0.1</span><span class="p">:</span><span class="mi">8080</span> <span class="o">...</span>
</pre></div>
</div>
<p>This invocation even has the advantage that it can block undesired traffic
(i.e., traffic that is not explicitly specified in the arguments) and it can
remap ports, which would have allowed the metadata service daemon to run in port
8080 instead of 80. However, in this scheme, KVM opens the TCP connection only
once, when it is started, and, if the connection breaks, KVM will not
reestablish the connection. Furthermore, opening the TCP connection only once
interferes with the HTTP protocol, which needs to dynamically establish and
close connections.</p>
<p>The alternative to the TCP/IP forwarding device is to execute a command. The
KVM invocation for this is, for example, the following:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">kvm</span> <span class="o">-</span><span class="n">net</span> <span class="n">nic</span> <span class="o">-</span><span class="n">net</span> \
<span class="s2">"user,restrict=on,net=169.254.0.0/16,host=169.254.169.253,</span>
<span class="n">guestfwd</span><span class="o">=</span><span class="n">tcp</span><span class="p">:</span><span class="mf">169.254</span><span class="o">.</span><span class="mf">169.254</span><span class="p">:</span><span class="mi">80</span><span class="o">-</span><span class="n">netcat</span> <span class="mf">127.0</span><span class="o">.</span><span class="mf">0.1</span> <span class="mi">8080</span><span class="s2">" ...</span>
</pre></div>
</div>
<p>The advantage of this approach is that the command is executed each time the
guest initiates a connection. This is the ideal situation, however, it is only
supported in KVM 1.2 and above, and, therefore, not viable because we want to
provide support for at least KVM version 1.0, which is the version provided by
Ubuntu LTS.</p>
</div>
<div class="section" id="alternatives-to-the-dhcp-server">
<h3><a class="toc-backref" href="#id14">Alternatives to the DHCP server</a><a class="headerlink" href="#alternatives-to-the-dhcp-server" title="Permalink to this headline">¶</a></h3>
<p>There are alternatives to using the DHCP server, for example, by assigning a
fixed IP address to guests, such as, the IP address <code class="docutils literal"><span class="pre">169.254.169.253</span></code>.
However, this introduces a routing problem, namely, how to route incoming
packets from the same source IP to the host. This problem can be overcome in a
number of ways.</p>
<p>The first solution is to use NAT to translate the incoming guest IP address, for
example, <code class="docutils literal"><span class="pre">169.254.169.253</span></code>, to a unique IP address, for example,
<code class="docutils literal"><span class="pre">169.254.0.1</span></code>. Given that NAT through <code class="docutils literal"><span class="pre">ip</span> <span class="pre">rule</span></code> is deprecated, users can
resort to <code class="docutils literal"><span class="pre">iptables</span></code>. Note that this has not yet been tested.</p>
<p>Another option, which has been tested, but only in a prototype, is to connect
the TAP network interfaces of the guests to a bridge. The bridge takes the
configuration from the TAP network interfaces, namely, IP address
<code class="docutils literal"><span class="pre">169.254.169.254</span></code> and netmask <code class="docutils literal"><span class="pre">255.255.255.255</span></code>, thus leaving those
interfaces without an IP address. Note that in this setting, guests will be
able to reach each other, therefore, if necessary, additional <code class="docutils literal"><span class="pre">iptables</span></code> rules
can be put in place to prevent it.</p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<h3><a href="index.html">Table Of Contents</a></h3>
<ul>
<li><a class="reference internal" href="#">Ganeti OS installation redesign</a><ul>
<li><a class="reference internal" href="#current-state-and-shortcomings">Current state and shortcomings</a></li>
<li><a class="reference internal" href="#proposed-changes">Proposed changes</a><ul>
<li><a class="reference internal" href="#os-parameter-categories">OS parameter categories</a></li>
<li><a class="reference internal" href="#metadata">Metadata</a></li>
<li><a class="reference internal" href="#installation-procedure">Installation procedure</a></li>
</ul>
</li>
<li><a class="reference internal" href="#implementation">Implementation</a><ul>
<li><a class="reference internal" href="#communication-mechanism">Communication mechanism</a></li>
<li><a class="reference internal" href="#dnsmasq">dnsmasq</a></li>
<li><a class="reference internal" href="#metadata-service">Metadata service</a><ul>
<li><a class="reference internal" href="#rationale">Rationale</a></li>
</ul>
</li>
<li><a class="reference internal" href="#installation-process-in-a-virtualized-environment">Installation process in a virtualized environment</a></li>
</ul>
</li>
<li><a class="reference internal" href="#alternatives-to-design-and-implementation">Alternatives to design and implementation</a><ul>
<li><a class="reference internal" href="#port-forwarding-in-kvm">Port forwarding in KVM</a></li>
<li><a class="reference internal" href="#alternatives-to-the-dhcp-server">Alternatives to the DHCP server</a></li>
</ul>
</li>
</ul>
</li>
</ul>
<h4>Previous topic</h4>
<p class="topless"><a href="design-optables.html"
title="previous chapter">Filtering of jobs for the Ganeti job queue</a></p>
<h4>Next topic</h4>
<p class="topless"><a href="design-ovf-support.html"
title="next chapter">Ganeti Instance Import/Export using Open Virtualization Format</a></p>
<div role="note" aria-label="source link">
<h3>This Page</h3>
<ul class="this-page-menu">
<li><a href="_sources/design-os.rst.txt"
rel="nofollow">Show Source</a></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
<h3>Quick search</h3>
<form class="search" action="search.html" method="get">
<div><input type="text" name="q" /></div>
<div><input type="submit" value="Go" /></div>
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="related" role="navigation" aria-label="related navigation">
<h3>Navigation</h3>
<ul>
<li class="right" style="margin-right: 10px">
<a href="design-ovf-support.html" title="Ganeti Instance Import/Export using Open Virtualization Format"
>next</a></li>
<li class="right" >
<a href="design-optables.html" title="Filtering of jobs for the Ganeti job queue"
>previous</a> |</li>
<li class="nav-item nav-item-0"><a href="index.html">Ganeti 2.16.0~rc2 documentation</a> »</li>
</ul>
</div>
<div class="footer" role="contentinfo">
© Copyright 2018, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Google Inc..
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.7.
</div>
</body>
</html>
|