[pve-devel] [PATCH manager v3 2/5] ceph: add MDS create/delete/list API

Thomas Lamprecht t.lamprecht at proxmox.com
Fri Nov 23 12:01:38 CET 2018


Allow to create, list and destroy and Ceph Metadata Server (MDS) over
the API and the CLI `pveceph` tool.

Besides setting up the local systemd service template and the MDS
data directory we also add a reference to the MDS in the ceph.conf
We note the backing host (node) from the respective MDS and set up a
'mds standby for name' = 'pve' so that the PVE created ones are a
single group. If we decide to add integration for rank/path specific
MDS (possible useful for CephFS with quite a bit of load) then this
may help as a starting point.

On create, check early if a reference already exists in ceph.conf and
abort in that case. If we only see existing data directories later
on we abort but do not remove them, they could well be from an older
manual create - where it's possible dangerous to just remove it. Let
the user handle it themself in that case.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
Co-authored-by: Alwin Antreich <a.antreich at proxmox.com>
---

changes v2 -> v3:
* always return the MDS list info from `mds stat` this way people see
  their MDS in the GUI even if it waysn't created through the API,
  else you saw them too, but only after you created at least one
  through our API - which was a bit weird.
* us postfix for not unnecessary map


 PVE/API2/Ceph.pm       |   7 ++
 PVE/API2/Ceph/MDS.pm   | 243 +++++++++++++++++++++++++++++++++++++++++
 PVE/API2/Ceph/Makefile |  15 +++
 PVE/API2/Makefile      |   2 +-
 PVE/CLI/pveceph.pm     |   3 +
 PVE/CephTools.pm       | 140 +++++++++++++++++++++++-
 6 files changed, 408 insertions(+), 2 deletions(-)
 create mode 100644 PVE/API2/Ceph/MDS.pm
 create mode 100644 PVE/API2/Ceph/Makefile

diff --git a/PVE/API2/Ceph.pm b/PVE/API2/Ceph.pm
index a6eec24a..d3e8d665 100644
--- a/PVE/API2/Ceph.pm
+++ b/PVE/API2/Ceph.pm
@@ -548,6 +548,7 @@ use PVE::RPCEnvironment;
 use PVE::Storage;
 use PVE::Tools qw(run_command file_get_contents file_set_contents);
 
+use PVE::API2::Ceph::MDS;
 use PVE::API2::Storage::Config;
 
 use base qw(PVE::RESTHandler);
@@ -559,6 +560,11 @@ __PACKAGE__->register_method ({
     path => 'osd',
 });
 
+__PACKAGE__->register_method ({
+    subclass => "PVE::API2::Ceph::MDS",
+    path => 'mds',
+});
+
 __PACKAGE__->register_method ({
     name => 'index',
     path => '',
@@ -590,6 +596,7 @@ __PACKAGE__->register_method ({
 	    { name => 'mon' },
 	    { name => 'osd' },
 	    { name => 'pools' },
+	    { name => 'mds' },
 	    { name => 'stop' },
 	    { name => 'start' },
 	    { name => 'status' },
diff --git a/PVE/API2/Ceph/MDS.pm b/PVE/API2/Ceph/MDS.pm
new file mode 100644
index 00000000..9a2791ae
--- /dev/null
+++ b/PVE/API2/Ceph/MDS.pm
@@ -0,0 +1,243 @@
+package PVE::API2::Ceph::MDS;
+
+use strict;
+use warnings;
+
+use PVE::CephTools;
+use PVE::INotify;
+use PVE::JSONSchema qw(get_standard_option);
+use PVE::RADOS;
+use PVE::RESTHandler;
+use PVE::RPCEnvironment;
+
+use base qw(PVE::RESTHandler);
+
+__PACKAGE__->register_method ({
+    name => 'index',
+    path => '',
+    method => 'GET',
+    description => "MDS directory index.",
+    permissions => {
+	check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
+    },
+    proxyto => 'node',
+    protected => 1,
+    parameters => {
+	additionalProperties => 0,
+	properties => {
+	    node => get_standard_option('pve-node'),
+	},
+    },
+    returns => {
+	type => 'array',
+	items => {
+	    type => "object",
+	    properties => {
+		name => {
+		    description => "The name (ID) for the MDS",
+		},
+		addr => {
+		    type => 'string',
+		    optional => 1,
+		},
+		host => {
+		    type => 'string',
+		    optional => 1,
+		},
+		state => {
+		    type => 'string',
+		    description => 'State of the MDS',
+		},
+		standby_replay => {
+		    type => 'boolean',
+		    optional => 1,
+		    description => 'If true, the standby MDS is polling the active MDS for faster recovery (hot standby).',
+		},
+		rank => {
+		    type => 'integer',
+		    optional => 1,
+		},
+	    },
+	},
+	links => [ { rel => 'child', href => "{name}" } ],
+    },
+    code => sub {
+	my ($param) = @_;
+
+	my $res = [];
+
+	my $cfg = PVE::CephTools::parse_ceph_config();
+
+	my $mds_hash = {};
+
+	foreach my $section (keys %$cfg) {
+	    my $d = $cfg->{$section};
+
+	    if ($section =~ m/^mds\.(\S+)$/) {
+		my $mds_id = $1;
+		if (defined($d->{host})) {
+		    $mds_hash->{$mds_id} = {
+			name => $mds_id,
+			state => 'unknown',
+			addr => $d->{host},
+			host => $d->{host},
+		    };
+		}
+	    }
+	}
+
+	my $mds_state = PVE::CephTools::get_cluster_mds_state();
+	foreach my $name (keys %$mds_state) {
+	    my $d = $mds_state->{$name};
+	    # just overwrite, this always provides more info
+	    $mds_hash->{$name}->{$_} = $d->{$_} for keys %$d;
+	}
+
+	return PVE::RESTHandler::hash_to_array($mds_hash, 'name');
+    }
+});
+
+__PACKAGE__->register_method ({
+    name => 'createmds',
+    path => '{name}',
+    method => 'POST',
+    description => "Create Ceph Metadata Server (MDS)",
+    proxyto => 'node',
+    protected => 1,
+    permissions => {
+	check => ['perm', '/', [ 'Sys.Modify' ]],
+    },
+    parameters => {
+	additionalProperties => 0,
+	properties => {
+	    node => get_standard_option('pve-node'),
+	    name => {
+		type => 'string',
+		optional => 1,
+		default => 'nodename',
+		pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
+		description => "The ID for the mds, when omitted the same as the nodename",
+	    },
+	    hotstandby => {
+		type => 'boolean',
+		optional => 1,
+		default => '0',
+		description => "Determines whether a ceph-mds daemon should poll and replay the log of an active MDS. ".
+		    "Faster switch on MDS failure, but needs more idle resources.",
+	    },
+	},
+    },
+    returns => { type => 'string' },
+    code => sub {
+	my ($param) = @_;
+
+	PVE::CephTools::check_ceph_installed('ceph_mds');
+
+	PVE::CephTools::check_ceph_inited();
+
+	my $rpcenv = PVE::RPCEnvironment::get();
+	my $authuser = $rpcenv->get_user();
+
+	my $nodename = $param->{node};
+	$nodename = INotify::nodename() if $nodename eq 'localhost';
+
+	my $mds_id = $param->{name} // $nodename;
+
+	my $worker = sub {
+	    my $timeout = PVE::CephTools::get_config('long_rados_timeout');
+	    my $rados = PVE::RADOS->new(timeout => $timeout);
+
+	    my $cfg = PVE::CephTools::parse_ceph_config();
+
+	    my $section = "mds.$mds_id";
+
+	    if (defined($cfg->{$section})) {
+		die "MDS '$mds_id' already referenced in ceph config, abort!\n"
+	    }
+
+	    if (!defined($cfg->{mds}->{keyring})) {
+		# $id isn't a perl variable but a ceph metavariable
+		my $keyring = '/var/lib/ceph/mds/ceph-$id/keyring';
+
+		$cfg->{mds}->{keyring} = $keyring;
+	    }
+
+	    $cfg->{$section}->{host} = $nodename;
+	    $cfg->{$section}->{"mds standby for name"} = 'pve';
+
+	    if ($param->{hotstandby}) {
+		$cfg->{$section}->{"mds standby replay"} = 'true';
+	    }
+
+	    PVE::CephTools::write_ceph_config($cfg);
+
+	    eval { PVE::CephTools::create_mds($mds_id, $rados) };
+	    if (my $err = $@) {
+		# we abort early if the section is defined, so we know that we
+		# wrote it at this point. Do not auto remove the service, could
+		# do real harm for previously manual setup MDS
+		warn "Encountered error, remove '$section' from ceph.conf\n";
+		$cfg = PVE::CephTools::parse_ceph_config();
+		delete $cfg->{$section};
+		PVE::CephTools::write_ceph_config($cfg);
+
+		die "$err\n";
+	    }
+	};
+
+	return $rpcenv->fork_worker('cephcreatemds', "mds.$mds_id", $authuser, $worker);
+    }
+});
+
+__PACKAGE__->register_method ({
+    name => 'destroymds',
+    path => '{name}',
+    method => 'DELETE',
+    description => "Destroy Ceph Metadata Server",
+    proxyto => 'node',
+    protected => 1,
+    permissions => {
+	check => ['perm', '/', [ 'Sys.Modify' ]],
+    },
+    parameters => {
+	additionalProperties => 0,
+	properties => {
+	    node => get_standard_option('pve-node'),
+	    name => {
+		description => 'The name (ID) of the mds',
+		type => 'string',
+		pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
+	    },
+	},
+    },
+    returns => { type => 'string' },
+    code => sub {
+	my ($param) = @_;
+
+	my $rpcenv = PVE::RPCEnvironment::get();
+
+	my $authuser = $rpcenv->get_user();
+
+	PVE::CephTools::check_ceph_inited();
+
+	my $mds_id = $param->{name};
+
+	my $worker = sub {
+	    my $timeout = PVE::CephTools::get_config('long_rados_timeout');
+	    my $rados = PVE::RADOS->new(timeout => $timeout);
+
+	    my $cfg = PVE::CephTools::parse_ceph_config();
+
+	    if (defined($cfg->{"mds.$mds_id"})) {
+		delete $cfg->{"mds.$mds_id"};
+		PVE::CephTools::write_ceph_config($cfg);
+	    }
+
+	    PVE::CephTools::destroy_mds($mds_id, $rados);
+	};
+
+	return $rpcenv->fork_worker('cephdestroymds', "mds.$mds_id",  $authuser, $worker);
+    }
+});
+
+1;
diff --git a/PVE/API2/Ceph/Makefile b/PVE/API2/Ceph/Makefile
new file mode 100644
index 00000000..be4d740c
--- /dev/null
+++ b/PVE/API2/Ceph/Makefile
@@ -0,0 +1,15 @@
+include ../../../defines.mk
+
+PERLSOURCE= 			\
+	MDS.pm
+
+all:
+
+.PHONY: clean
+clean:
+	rm -rf *~
+
+.PHONY: install
+install: ${PERLSOURCE}
+	install -d ${PERLLIBDIR}/PVE/API2/Ceph
+	install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/API2/Ceph
diff --git a/PVE/API2/Makefile b/PVE/API2/Makefile
index a62bf909..c5868d7f 100644
--- a/PVE/API2/Makefile
+++ b/PVE/API2/Makefile
@@ -1,6 +1,6 @@
 include ../../defines.mk
 
-SUBDIRS=Hardware
+SUBDIRS=Hardware Ceph
 
 PERLSOURCE = 			\
 	Replication.pm		\
diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm
index a5a04949..90878d9e 100755
--- a/PVE/CLI/pveceph.pm
+++ b/PVE/CLI/pveceph.pm
@@ -19,6 +19,7 @@ use PVE::Tools qw(run_command);
 use PVE::JSONSchema qw(get_standard_option);
 use PVE::CephTools;
 use PVE::API2::Ceph;
+use PVE::API2::Ceph::MDS;
 
 use PVE::CLIHandler;
 
@@ -175,6 +176,8 @@ our $cmddef = {
     destroymon => [ 'PVE::API2::Ceph', 'destroymon', ['monid'], { node => $nodename }, $upid_exit],
     createmgr => [ 'PVE::API2::Ceph', 'createmgr', [], { node => $nodename }, $upid_exit],
     destroymgr => [ 'PVE::API2::Ceph', 'destroymgr', ['id'], { node => $nodename }, $upid_exit],
+    createmds => [ 'PVE::API2::Ceph::MDS', 'createmds', [], { node => $nodename }, $upid_exit],
+    destroymds => [ 'PVE::API2::Ceph::MDS', 'destroymds', ['id'], { node => $nodename }, $upid_exit],
     start => [ 'PVE::API2::Ceph', 'start', ['service'], { node => $nodename }, $upid_exit],
     stop => [ 'PVE::API2::Ceph', 'stop', ['service'], { node => $nodename }, $upid_exit],
     install => [ __PACKAGE__, 'install', [] ],
diff --git a/PVE/CephTools.pm b/PVE/CephTools.pm
index 8a9afa84..cc594f8a 100644
--- a/PVE/CephTools.pm
+++ b/PVE/CephTools.pm
@@ -18,12 +18,14 @@ my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
 my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
 my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
 my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
+my $ceph_mds_data_dir = '/var/lib/ceph/mds';
 
 my $ceph_service = {
     ceph_bin => "/usr/bin/ceph",
     ceph_mon => "/usr/bin/ceph-mon",
     ceph_mgr => "/usr/bin/ceph-mgr",
-    ceph_osd => "/usr/bin/ceph-osd"
+    ceph_osd => "/usr/bin/ceph-osd",
+    ceph_mds => "/usr/bin/ceph-mds",
 };
 
 my $config_hash = {
@@ -33,6 +35,7 @@ my $config_hash = {
     pve_ckeyring_path => $pve_ckeyring_path,
     ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
     ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
+    ceph_mds_data_dir => $ceph_mds_data_dir,
     long_rados_timeout => 60,
 };
 
@@ -297,4 +300,139 @@ sub systemd_managed {
     }
 }
 
+sub list_local_mds_ids {
+    my $mds_list = [];
+
+    PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub {
+	my (undef, $mds_id) = @_;
+	push @$mds_list, $mds_id;
+    });
+
+    return $mds_list;
+}
+
+sub get_cluster_mds_state {
+    my ($rados) = @_;
+
+    my $mds_state = {};
+
+    if (!defined($rados)) {
+	$rados = PVE::RADOS->new();
+    }
+
+    my $add_state = sub {
+	my ($mds) = @_;
+
+	my $state = {};
+	$state->{addr} = $mds->{addr};
+	$state->{rank} = $mds->{rank};
+	$state->{standby_replay} = $mds->{standby_replay} ? 1 : 0;
+	$state->{state} = $mds->{state};
+
+	$mds_state->{$mds->{name}} = $state;
+    };
+
+    my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
+    my $fsmap = $mds_dump->{fsmap};
+
+
+    foreach my $mds (@{$fsmap->{standbys}}) {
+	$add_state->($mds);
+    }
+
+    my $fs_info = $fsmap->{filesystems}->[0];
+    my $active_mds = $fs_info->{mdsmap}->{info};
+
+    # normally there's only one active MDS, but we can have multiple active for
+    # different ranks (e.g., different cephs path hierarchy). So just add all.
+    foreach my $mds (values %$active_mds) {
+	$add_state->($mds);
+    }
+
+    return $mds_state;
+}
+
+sub create_mds {
+    my ($id, $rados) = @_;
+
+    # `ceph fs status` fails with numeric only ID.
+    die "ID: $id, numeric only IDs are not supported\n"
+	if $id =~ /^\d+$/;
+
+    if (!defined($rados)) {
+	$rados = PVE::RADOS->new();
+    }
+
+    my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
+    my $service_keyring = "$service_dir/keyring";
+    my $service_name = "mds.$id";
+
+    die "ceph MDS directory '$service_dir' already exists\n"
+	if -d $service_dir;
+
+    print "creating MDS directory '$service_dir'\n";
+    eval { File::Path::mkpath($service_dir) };
+    my $err = $@;
+    die "creation MDS directory '$service_dir' failed\n" if $err;
+
+    # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds
+    my $priv = [
+	mon => 'allow profile mds',
+	osd => 'allow rwx',
+	mds => 'allow *',
+    ];
+
+    print "creating keys for '$service_name'\n";
+    my $output = $rados->mon_command({
+	prefix => 'auth get-or-create',
+	entity => $service_name,
+	caps => $priv,
+	format => 'plain',
+    });
+
+    PVE::Tools::file_set_contents($service_keyring, $output);
+
+    print "setting ceph as owner for service directory\n";
+    run_command(["chown", 'ceph:ceph', '-R', $service_dir]);
+
+    print "enabling service 'ceph-mds\@$id.service'\n";
+    ceph_service_cmd('enable', $service_name);
+    print "starting service 'ceph-mds\@$id.service'\n";
+    ceph_service_cmd('start', $service_name);
+
+    return undef;
+};
+
+sub destroy_mds {
+    my ($id, $rados) = @_;
+
+    if (!defined($rados)) {
+	$rados = PVE::RADOS->new();
+    }
+
+    my $service_name = "mds.$id";
+    my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
+
+    print "disabling service 'ceph-mds\@$id.service'\n";
+    ceph_service_cmd('disable', $service_name);
+    print "stopping service 'ceph-mds\@$id.service'\n";
+    ceph_service_cmd('stop', $service_name);
+
+    if (-d $service_dir) {
+	print "removing ceph-mds directory '$service_dir'\n";
+	File::Path::remove_tree($service_dir);
+    } else {
+	warn "cannot cleanup MDS $id directory, '$service_dir' not found\n"
+    }
+
+    print "removing ceph auth for '$service_name'\n";
+    $rados->mon_command({
+	    prefix => 'auth del',
+	    entity => $service_name,
+	    format => 'plain'
+	});
+
+    return undef;
+};
+
 1;
-- 
2.19.1





More information about the pve-devel mailing list