[pve-devel] [PATCH manager v2 2/5] ceph: add MDS create/delete/list API

Dominik Csapak d.csapak at proxmox.com
Fri Nov 23 11:02:00 CET 2018


1 minor thing inline, rest looks good

On 11/22/18 8:34 PM, Thomas Lamprecht wrote:
> Allow to create, list and destroy and Ceph Metadata Server (MDS) over
> the API and the CLI `pveceph` tool.
> 
> Besides setting up the local systemd service template and the MDS
> data directory we also add a reference to the MDS in the ceph.conf
> We note the backing host (node) from the respective MDS and set up a
> 'mds standby for name' = 'pve' so that the PVE created ones are a
> single group. If we decide to add integration for rank/path specific
> MDS (possible useful for CephFS with quite a bit of load) then this
> may help as a starting point.
> 
> On create, check early if a reference already exists in ceph.conf and
> abort in that case. If we only see existing data directories later
> on we do not remove them, they could well be from an older manual
> create - where it's possible dangerous to just remove it. Let the
> user handle it themself in that case.
> 
> Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
> Co-authored-by: Alwin Antreich <a.antreich at proxmox.com>
> ---
> 
> changes v1 -> v2:
> * fix directory index, return more info
> * added get_cluster_mds_state
> 
> 
>   PVE/API2/Ceph.pm       |   7 ++
>   PVE/API2/Ceph/MDS.pm   | 247 +++++++++++++++++++++++++++++++++++++++++
>   PVE/API2/Ceph/Makefile |  15 +++
>   PVE/API2/Makefile      |   2 +-
>   PVE/CLI/pveceph.pm     |   3 +
>   PVE/CephTools.pm       | 140 ++++++++++++++++++++++-
>   6 files changed, 412 insertions(+), 2 deletions(-)
>   create mode 100644 PVE/API2/Ceph/MDS.pm
>   create mode 100644 PVE/API2/Ceph/Makefile
> 
> diff --git a/PVE/API2/Ceph.pm b/PVE/API2/Ceph.pm
> index a6eec24a..d3e8d665 100644
> --- a/PVE/API2/Ceph.pm
> +++ b/PVE/API2/Ceph.pm
> @@ -548,6 +548,7 @@ use PVE::RPCEnvironment;
>   use PVE::Storage;
>   use PVE::Tools qw(run_command file_get_contents file_set_contents);
>   
> +use PVE::API2::Ceph::MDS;
>   use PVE::API2::Storage::Config;
>   
>   use base qw(PVE::RESTHandler);
> @@ -559,6 +560,11 @@ __PACKAGE__->register_method ({
>       path => 'osd',
>   });
>   
> +__PACKAGE__->register_method ({
> +    subclass => "PVE::API2::Ceph::MDS",
> +    path => 'mds',
> +});
> +
>   __PACKAGE__->register_method ({
>       name => 'index',
>       path => '',
> @@ -590,6 +596,7 @@ __PACKAGE__->register_method ({
>   	    { name => 'mon' },
>   	    { name => 'osd' },
>   	    { name => 'pools' },
> +	    { name => 'mds' },
>   	    { name => 'stop' },
>   	    { name => 'start' },
>   	    { name => 'status' },
> diff --git a/PVE/API2/Ceph/MDS.pm b/PVE/API2/Ceph/MDS.pm
> new file mode 100644
> index 00000000..30d03285
> --- /dev/null
> +++ b/PVE/API2/Ceph/MDS.pm
> @@ -0,0 +1,247 @@
> +package PVE::API2::Ceph::MDS;
> +
> +use strict;
> +use warnings;
> +
> +use PVE::CephTools;
> +use PVE::INotify;
> +use PVE::JSONSchema qw(get_standard_option);
> +use PVE::RADOS;
> +use PVE::RESTHandler;
> +use PVE::RPCEnvironment;
> +
> +use base qw(PVE::RESTHandler);
> +
> +__PACKAGE__->register_method ({
> +    name => 'index',
> +    path => '',
> +    method => 'GET',
> +    description => "MDS directory index.",
> +    permissions => {
> +	check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
> +    },
> +    proxyto => 'node',
> +    protected => 1,
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {
> +	    node => get_standard_option('pve-node'),
> +	},
> +    },
> +    returns => {
> +	type => 'array',
> +	items => {
> +	    type => "object",
> +	    properties => {
> +		name => {
> +		    description => "The name (ID) for the MDS",
> +		},
> +		addr => {
> +		    type => 'string',
> +		    optional => 1,
> +		},
> +		host => {
> +		    type => 'string',
> +		    optional => 1,
> +		},
> +		state => {
> +		    type => 'string',
> +		    description => 'State of the MDS',
> +		},
> +		standby_replay => {
> +		    type => 'boolean',
> +		    optional => 1,
> +		    description => 'If true, the standby MDS is polling the active MDS for faster recovery (hot standby).',
> +		},
> +		rank => {
> +		    type => 'integer',
> +		    optional => 1,
> +		},
> +	    },
> +	},
> +	links => [ { rel => 'child', href => "{name}" } ],
> +    },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $res = [];
> +
> +	my $cfg = PVE::CephTools::parse_ceph_config();
> +
> +	my $mds_hash = {};
> +
> +	foreach my $section (keys %$cfg) {
> +	    my $d = $cfg->{$section};
> +
> +	    if ($section =~ m/^mds\.(\S+)$/) {
> +		my $mds_id = $1;
> +		if (defined($d->{host})) {
> +		    $mds_hash->{$mds_id} = {
> +			name => $mds_id,
> +			state => 'unknown',
> +			addr => $d->{host},
> +			host => $d->{host},
> +		    };
> +		}
> +	    }
> +	}
> +
> +	if (scalar(keys %$mds_hash) > 0) {
> +	    # does not include configured but stopped ones
> +	    my $mds_state = PVE::CephTools::get_cluster_mds_state();
> +
> +	    foreach my $name (keys %$mds_state) {
> +		my $d = $mds_state->{$name};
> +		# just overwrite, this always provides more info
> +		map { $mds_hash->{$name}->{$_} = $d->{$_} } keys %$d;
> +	    }
> +	}
> +
> +	return PVE::RESTHandler::hash_to_array($mds_hash, 'name');
> +    }
> +});
> +
> +__PACKAGE__->register_method ({
> +    name => 'createmds',
> +    path => '{name}',
> +    method => 'POST',
> +    description => "Create Ceph Metadata Server (MDS)",
> +    proxyto => 'node',
> +    protected => 1,
> +    permissions => {
> +	check => ['perm', '/', [ 'Sys.Modify' ]],
> +    },
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {
> +	    node => get_standard_option('pve-node'),
> +	    name => {
> +		type => 'string',
> +		optional => 1,
> +		default => 'nodename',
> +		pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
> +		description => "The ID for the mds, when omitted the same as the nodename",
> +	    },
> +	    hotstandby => {
> +		type => 'boolean',
> +		optional => 1,
> +		default => '0',
> +		description => "Determines whether a ceph-mds daemon should poll and replay the log of an active MDS. ".
> +		    "Faster switch on MDS failure, but needs more idle resources.",
> +	    },
> +	},
> +    },
> +    returns => { type => 'string' },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	PVE::CephTools::check_ceph_installed('ceph_mds');
> +
> +	PVE::CephTools::check_ceph_inited();
> +
> +	my $rpcenv = PVE::RPCEnvironment::get();
> +	my $authuser = $rpcenv->get_user();
> +
> +	my $nodename = $param->{node};
> +	$nodename = INotify::nodename() if $nodename eq 'localhost';
> +
> +	my $mds_id = $param->{name} // $nodename;
> +
> +	my $worker = sub {
> +	    my $timeout = PVE::CephTools::get_config('long_rados_timeout');
> +	    my $rados = PVE::RADOS->new(timeout => $timeout);
> +
> +	    my $cfg = PVE::CephTools::parse_ceph_config();
> +
> +	    my $section = "mds.$mds_id";
> +
> +	    if (defined($cfg->{$section})) {
> +		die "MDS '$mds_id' already referenced in ceph config, abort!\n"
> +	    }
> +
> +	    if (!defined($cfg->{mds}->{keyring})) {
> +		# $id isn't a perl variable but a ceph metavariable
> +		my $keyring = '/var/lib/ceph/mds/ceph-$id/keyring';
> +
> +		$cfg->{mds}->{keyring} = $keyring;
> +	    }
> +
> +	    $cfg->{$section}->{host} = $nodename;
> +	    $cfg->{$section}->{"mds standby for name"} = 'pve';
> +
> +	    if ($param->{hotstandby}) {
> +		$cfg->{$section}->{"mds standby replay"} = 'true';
> +	    }
> +
> +	    PVE::CephTools::write_ceph_config($cfg);
> +
> +	    eval { PVE::CephTools::create_mds($mds_id, $rados) };
> +	    if (my $err = $@) {
> +		# we abort early if the section is defined, so we know that we
> +		# wrote it at this point. Do not auto remove the service, could
> +		# do real harm for previously manual setup MDS
> +		warn "Encountered error, remove '$section' from ceph.conf\n";
> +		$cfg = PVE::CephTools::parse_ceph_config();
> +		delete $cfg->{$section};
> +		PVE::CephTools::write_ceph_config($cfg);
> +
> +		die "$err\n";
> +	    }
> +	};
> +
> +	return $rpcenv->fork_worker('cephcreatemds', "mds.$mds_id", $authuser, $worker);
> +    }
> +});
> +
> +__PACKAGE__->register_method ({
> +    name => 'destroymds',
> +    path => '{name}',
> +    method => 'DELETE',
> +    description => "Destroy Ceph Metadata Server",
> +    proxyto => 'node',
> +    protected => 1,
> +    permissions => {
> +	check => ['perm', '/', [ 'Sys.Modify' ]],
> +    },
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {
> +	    node => get_standard_option('pve-node'),
> +	    name => {
> +		description => 'The name (ID) of the mds',
> +		type => 'string',
> +		pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
> +	    },
> +	},
> +    },
> +    returns => { type => 'string' },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $rpcenv = PVE::RPCEnvironment::get();
> +
> +	my $authuser = $rpcenv->get_user();
> +
> +	PVE::CephTools::check_ceph_inited();
> +
> +	my $mds_id = $param->{name};
> +
> +	my $worker = sub {
> +	    my $timeout = PVE::CephTools::get_config('long_rados_timeout');
> +	    my $rados = PVE::RADOS->new(timeout => $timeout);
> +
> +	    my $cfg = PVE::CephTools::parse_ceph_config();
> +
> +	    if (defined($cfg->{"mds.$mds_id"})) {
> +		delete $cfg->{"mds.$mds_id"};
> +		PVE::CephTools::write_ceph_config($cfg);
> +	    }
> +
> +	    PVE::CephTools::destroy_mds($mds_id, $rados);
> +	};
> +
> +	return $rpcenv->fork_worker('cephdestroymds', "mds.$mds_id",  $authuser, $worker);
> +    }
> +});
> +
> +1;
> diff --git a/PVE/API2/Ceph/Makefile b/PVE/API2/Ceph/Makefile
> new file mode 100644
> index 00000000..be4d740c
> --- /dev/null
> +++ b/PVE/API2/Ceph/Makefile
> @@ -0,0 +1,15 @@
> +include ../../../defines.mk
> +
> +PERLSOURCE= 			\
> +	MDS.pm
> +
> +all:
> +
> +.PHONY: clean
> +clean:
> +	rm -rf *~
> +
> +.PHONY: install
> +install: ${PERLSOURCE}
> +	install -d ${PERLLIBDIR}/PVE/API2/Ceph
> +	install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/API2/Ceph
> diff --git a/PVE/API2/Makefile b/PVE/API2/Makefile
> index a62bf909..c5868d7f 100644
> --- a/PVE/API2/Makefile
> +++ b/PVE/API2/Makefile
> @@ -1,6 +1,6 @@
>   include ../../defines.mk
>   
> -SUBDIRS=Hardware
> +SUBDIRS=Hardware Ceph
>   
>   PERLSOURCE = 			\
>   	Replication.pm		\
> diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm
> index a5a04949..90878d9e 100755
> --- a/PVE/CLI/pveceph.pm
> +++ b/PVE/CLI/pveceph.pm
> @@ -19,6 +19,7 @@ use PVE::Tools qw(run_command);
>   use PVE::JSONSchema qw(get_standard_option);
>   use PVE::CephTools;
>   use PVE::API2::Ceph;
> +use PVE::API2::Ceph::MDS;
>   
>   use PVE::CLIHandler;
>   
> @@ -175,6 +176,8 @@ our $cmddef = {
>       destroymon => [ 'PVE::API2::Ceph', 'destroymon', ['monid'], { node => $nodename }, $upid_exit],
>       createmgr => [ 'PVE::API2::Ceph', 'createmgr', [], { node => $nodename }, $upid_exit],
>       destroymgr => [ 'PVE::API2::Ceph', 'destroymgr', ['id'], { node => $nodename }, $upid_exit],
> +    createmds => [ 'PVE::API2::Ceph::MDS', 'createmds', [], { node => $nodename }, $upid_exit],
> +    destroymds => [ 'PVE::API2::Ceph::MDS', 'destroymds', ['id'], { node => $nodename }, $upid_exit],
>       start => [ 'PVE::API2::Ceph', 'start', ['service'], { node => $nodename }, $upid_exit],
>       stop => [ 'PVE::API2::Ceph', 'stop', ['service'], { node => $nodename }, $upid_exit],
>       install => [ __PACKAGE__, 'install', [] ],
> diff --git a/PVE/CephTools.pm b/PVE/CephTools.pm
> index 8a9afa84..da31ccae 100644
> --- a/PVE/CephTools.pm
> +++ b/PVE/CephTools.pm
> @@ -18,12 +18,14 @@ my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
>   my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
>   my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
>   my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
> +my $ceph_mds_data_dir = '/var/lib/ceph/mds';
>   
>   my $ceph_service = {
>       ceph_bin => "/usr/bin/ceph",
>       ceph_mon => "/usr/bin/ceph-mon",
>       ceph_mgr => "/usr/bin/ceph-mgr",
> -    ceph_osd => "/usr/bin/ceph-osd"
> +    ceph_osd => "/usr/bin/ceph-osd",
> +    ceph_mds => "/usr/bin/ceph-mds",
>   };
>   
>   my $config_hash = {
> @@ -33,6 +35,7 @@ my $config_hash = {
>       pve_ckeyring_path => $pve_ckeyring_path,
>       ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
>       ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
> +    ceph_mds_data_dir => $ceph_mds_data_dir,
>       long_rados_timeout => 60,
>   };
>   
> @@ -297,4 +300,139 @@ sub systemd_managed {
>       }
>   }
>   
> +sub list_local_mds_ids {
> +    my $mds_list = [];
> +
> +    PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub {
> +	my (undef, $mds_id) = @_;
> +	push @$mds_list, $mds_id;
> +    });
> +
> +    return $mds_list;
> +}

this is not used afaics ? could be removed in a fixup ofc

> +
> +sub get_cluster_mds_state {
> +    my ($rados) = @_;
> +
> +    my $mds_state = {};
> +
> +    if (!defined($rados)) {
> +	$rados = PVE::RADOS->new();
> +    }
> +
> +    my $add_state = sub {
> +	my ($mds) = @_;
> +
> +	my $state = {};
> +	$state->{addr} = $mds->{addr};
> +	$state->{rank} = $mds->{rank};
> +	$state->{standby_replay} = $mds->{standby_replay} ? 1 : 0;
> +	$state->{state} = $mds->{state};
> +
> +	$mds_state->{$mds->{name}} = $state;
> +    };
> +
> +    my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
> +    my $fsmap = $mds_dump->{fsmap};
> +
> +
> +    foreach my $mds (@{$fsmap->{standbys}}) {
> +	$add_state->($mds);
> +    }
> +
> +    my $fs_info = $fsmap->{filesystems}->[0];
> +    my $active_mds = $fs_info->{mdsmap}->{info};
> +
> +    # normally there'S only one active MDS, but we can have multiple active for
> +    # different ranks (e.g., different cephs path hierachy). So just add all.
> +    foreach my $mds (values %$active_mds) {
> +	$add_state->($mds);
> +    }
> +
> +    return $mds_state;
> +}
> +
> +sub create_mds {
> +    my ($id, $rados) = @_;
> +
> +    # `ceph fs status` fails with numeric only ID.
> +    die "ID: $id, numeric only IDs are not supported\n"
> +	if $id =~ /^\d+$/;
> +
> +    if (!defined($rados)) {
> +	$rados = PVE::RADOS->new();
> +    }
> +
> +    my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
> +    my $service_keyring = "$service_dir/keyring";
> +    my $service_name = "mds.$id";
> +
> +    die "ceph MDS directory '$service_dir' already exists\n"
> +	if -d $service_dir;
> +
> +    print "creating MDS directory '$service_dir'\n";
> +    eval { File::Path::mkpath($service_dir) };
> +    my $err = $@;
> +    die "creation MDS directory '$service_dir' failed\n" if $err;
> +
> +    # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds
> +    my $priv = [
> +	mon => 'allow profile mds',
> +	osd => 'allow rwx',
> +	mds => 'allow *',
> +    ];
> +
> +    print "creating keys for '$service_name'\n";
> +    my $output = $rados->mon_command({
> +	prefix => 'auth get-or-create',
> +	entity => $service_name,
> +	caps => $priv,
> +	format => 'plain',
> +    });
> +
> +    PVE::Tools::file_set_contents($service_keyring, $output);
> +
> +    print "setting ceph as owner for service directory\n";
> +    run_command(["chown", 'ceph:ceph', '-R', $service_dir]);
> +
> +    print "enabling service 'ceph-mds\@$id.service'\n";
> +    ceph_service_cmd('enable', $service_name);
> +    print "starting service 'ceph-mds\@$id.service'\n";
> +    ceph_service_cmd('start', $service_name);
> +
> +    return undef;
> +};
> +
> +sub destroy_mds {
> +    my ($id, $rados) = @_;
> +
> +    if (!defined($rados)) {
> +	$rados = PVE::RADOS->new();
> +    }
> +
> +    my $service_name = "mds.$id";
> +    my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
> +
> +    print "disabling service 'ceph-mds\@$id.service'\n";
> +    ceph_service_cmd('disable', $service_name);
> +    print "stopping service 'ceph-mds\@$id.service'\n";
> +    ceph_service_cmd('stop', $service_name);
> +
> +    if (-d $service_dir) {
> +	print "removing ceph-mds directory '$service_dir'\n";
> +	File::Path::remove_tree($service_dir);
> +    } else {
> +	warn "cannot cleanup MDS $id directory, '$service_dir' not found\n"
> +    }
> +
> +    print "removing ceph auth for '$service_name'\n";
> +    $rados->mon_command({
> +	    prefix => 'auth del',
> +	    entity => $service_name,
> +	    format => 'plain'
> +	});
> +
> +    return undef;
> +};
> +
>   1;
> 





More information about the pve-devel mailing list