#!/bin/sh
#
#             OCF Resource Agent for managing CTDB
#
# Copyright (c) 2009-2010 Novell Inc., Tim Serong
#                    All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
# TODO:
# - Verify timeouts are sane
# - Monitor differentiate between error and not running?
# - Do we need to verify globally unique setting?
# - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on
#   current nodes)
# - Be more clever about monitor op:
#   ctdb --socket=/tmp/ctdb.socket status
#	Number of nodes:2
#	pnn:0 192.168.101.14   DISABLED (THIS NODE)
#	pnn:1 192.168.101.15   DISABLED
#	Generation:665993634
#	Size:2
#	hash:0 lmaster:0
#	hash:1 lmaster:1
#	Recovery mode:NORMAL (0)
#	Recovery master:1
#   ^ if this says pnn:0...DISABLED|UNHEALTHY, there is a problem,
#     e.g. ctdb socket not specified in smb.conf.
# - Lots of "No public addresses file found. Nothing to do for
#   10.interfaces" junk in ctdb log file.  Can we fix/suppress this?
# - Look at enabling set_ctdb_variables() if necessary.
# - Probably possible for sysconfig file to not be restored if
#   CTDB dies unexpectedly.
#
#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs

#######################################################################
# Default parameter values:

: ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb}
: ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb}
: ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd}
: ${OCF_RESKEY_ctdb_socket:=/var/lib/ctdb/ctdb.socket}
: ${OCF_RESKEY_ctdb_dbdir:=/var/lib/ctdb}
: ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb}
: ${OCF_RESKEY_ctdb_debuglevel:=2}
: ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf}

#######################################################################

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="CTDB" version="1.0">
<version>1.0</version>

<longdesc lang="en">
This resource agent manages CTDB, allowing one to use Clustered Samba
in a Linux-HA/Pacemaker cluster.  You need a shared filesystem
(e.g. OCFS2) on which the CTDB lock will be stored.  Configure shares
in smb.conf on all nodes, and create /etc/ctdb/nodes containing a list
of private IP addresses of each node in the cluster.  Configure this RA
as a clone, and it will take care of the rest.  For more information
see http://linux-ha.org/wiki/CTDB_(resource_agent)
</longdesc>
<shortdesc lang="en">CTDB Resource Agent</shortdesc>

<parameters>

<parameter name="ctdb_recovery_lock" unique="1" required="1">
<longdesc lang="en">
The location of a shared lock file, common across all nodes.
This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock
</longdesc>
<shortdesc lang="en">CTDB shared lock file</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="smb_private_dir" unique="1" required="0">
<longdesc lang="en">
The directory for smbd to use for storing such files as
smbpasswd and secrets.tdb.  Old versions of CTBD (prior to 1.0.50)
required this to be on shared storage.  This parameter should not
be set for current versions of CTDB, and only remains in the RA
for backwards compatibility.
</longdesc>
<shortdesc lang="en">Samba private dir (deprecated)</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="ctdb_config_dir" unique="0" required="0">
<longdesc lang="en">
The directory containing various CTDB configuration files.
The "nodes" and "notify.sh" scripts are expected to be
in this directory, as is the "events.d" subdirectory.
</longdesc>
<shortdesc lang="en">CTDB config file directory</shortdesc>
<content type="string" default="/etc/ctdb" />
</parameter>

<parameter name="ctdb_binary" unique="0" required="0">
<longdesc lang="en">
Full path to the CTDB binary.
</longdesc>
<shortdesc lang="en">CTDB binary path</shortdesc>
<content type="string" default="/usr/bin/ctdb" />
</parameter>

<parameter name="ctdbd_binary" unique="0" required="0">
<longdesc lang="en">
Full path to the CTDB cluster daemon binary.
</longdesc>
<shortdesc lang="en">CTDB Daemon binary path</shortdesc>
<content type="string" default="/usr/sbin/ctdbd" />
</parameter>

<parameter name="ctdb_socket" unique="0" required="0">
<longdesc lang="en">
Full path to the domain socket that ctdbd will create, used for
local clients to attach and communicate with the ctdb daemon.
</longdesc>
<shortdesc lang="en">CTDB socket location</shortdesc>
<content type="string" default="/var/lib/ctdb/ctdb.socket" />
</parameter>

<parameter name="ctdb_dbdir" unique="0" required="0">
<longdesc lang="en">
The directory to put the local CTDB database files in.
Persistent database files will be put in ctdb_dbdir/persistent.
</longdesc>
<shortdesc lang="en">CTDB database directory</shortdesc>
<content type="string" default="/var/lib/ctdb" />
</parameter>

<parameter name="ctdb_logfile" unique="0" required="0">
<longdesc lang="en">
Full path to log file. To log to syslog instead, use the
value "syslog".
</longdesc>
<shortdesc lang="en">CTDB log file location</shortdesc>
<content type="string" default="/var/log/ctdb/log.ctdb" />
</parameter>

<parameter name="ctdb_debuglevel" unique="0" required="0">
<longdesc lang="en">
What debug level to run at (0-10). Higher means more verbose.
</longdesc>
<shortdesc lang="en">CTDB debug level</shortdesc>
<content type="integer" default="2" />
</parameter>

<parameter name="smb_conf" unique="0" required="0">
<longdesc lang="en">
Path to default samba config file.
</longdesc>
<shortdesc lang="en">Path to smb.conf</shortdesc>
<content type="string" default="/etc/samba/smb.conf" />
</parameter>

</parameters>

<actions>
<action name="start"        timeout="90" />
<action name="stop"         timeout="100" />
<action name="monitor"      timeout="20" interval="10" depth="0" />
<action name="meta-data"    timeout="5" />
<action name="validate-all"   timeout="30" />
</actions>
</resource-agent>
END
}

#######################################################################

# Figure out path to /etc/sysconfig/ctdb (same logic as
# loadconfig() from /etc/ctdb/functions
if [ -f /etc/sysconfig/ctdb ]; then
	CTDB_SYSCONFIG=/etc/sysconfig/ctdb
elif [ -f /etc/default/ctdb ]; then
	CTDB_SYSCONFIG=/etc/default/ctdb
elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then
	CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb
fi

# Backup paths
CTDB_SYSCONFIG_BACKUP=${HA_RSCTMP}/ctdb-${OCF_RESOURCE_INSTANCE}

# This function has no effect (currently no way to set CTDB_SET_*)
# but remains here in case we need it in future.
set_ctdb_variables() {
	rv=$OCF_SUCCESS
	set | grep ^CTDB_SET_ | cut -d_ -f3- |
	while read v; do
		varname=`echo $v | cut -d= -f1`
		value=`echo $v | cut -d= -f2`
		$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket setvar $varname $value || rv=$OCF_ERR_GENERIC
	done || rv=$OCF_ERR_GENERIC
	return $rv
}


# Add necessary settings to /etc/samba/smb.conf.  In a perfect world,
# we'd be able to generate a new, temporary, smb.conf file somewhere,
# something like:
#     include = /etc/samba/smb.conf
#     [global]
#       clustering = yes
#       # ...etc...
# Unfortunately, we can't do this, because there's no way to tell the
# smb init script where the temporary config is, so we just edit
# the default config file.
init_smb_conf() {
	local private_dir
	[ -n "$OCF_RESKEY_smb_private_dir" ] && private_dir="\tprivate dir = $OCF_RESKEY_smb_private_dir\n"
	grep -Eiv \
		'^[[:space:]]*(# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket)' \
		$OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\
\t# CTDB-RA: Begin auto-generated section (do not change below)\n\
\tpassdb backend = tdbsam\n\
\tclustering = yes\n\
\tidmap backend = tdb2\n\
\tctdbd socket = $OCF_RESKEY_ctdb_socket\n$private_dir\
\t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$
	mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}


# Get rid of that section we added
cleanup_smb_conf() {
	sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$
	mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}


# Save current CTDB config file and generate a new, minimal version
# that is just enough to get Samba running.
save_ctdb_sysconfig() {
	# If one of our auto-generated config files is already present, return immediately
	grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG && return
	
	# Otherwise, backup...
	cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP
	if [ $? -eq 0 ]; then
		ocf_log info "Saved $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP, generating new runtime $CTDB_SYSCONFIG"
		# ...and generate
		cat >$CTDB_SYSCONFIG <<EOF
# CTDB-RA: Auto-generated by ${0}, backup is at $CTDB_SYSCONFIG_BACKUP
CTDB_MONITOR_FREE_MEMORY=100
CTDB_SAMBA_SKIP_SHARE_CHECK=yes
CTDB_MANAGES_SAMBA=yes
CTDB_MANAGES_WINBIND=yes
CTDB_SERVICE_SMB=smb
CTDB_SERVICE_NMB=nmb
CTDB_SERVICE_WINBIND=winbind
EOF
	else
		ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP - not making any changes"
	fi
}


restore_ctdb_sysconfig() {
	if [ -f $CTDB_SYSCONFIG_BACKUP ]; then
		ocf_log info "Restoring $CTDB_SYSCONFIG_BACKUP to $CTDB_SYSCONFIG"
		# cp handles destination being a symlink (as opposed to mv)
		cp -p $CTDB_SYSCONFIG_BACKUP $CTDB_SYSCONFIG
		[ $? -ne 0 ] && ocf_log warn "Unable to restore $CTDB_SYSCONFIG_BACKUP to $CTDB_SYSCONFIG"
		rm $CTDB_SYSCONFIG_BACKUP
	fi
}


ctdb_usage() {
	cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}


ctdb_start() {
	# Do nothing if already running
	ctdb_monitor && return $OCF_SUCCESS

	# Make sure config is adequate
	ctdb_validate
	rv=$?
	[ $rv -ne 0 ] && return $rv

	# Die if databases are corrupted
	persistent_db_dir="${OCF_RESKEY_ctdb_dbdir}/persistent"
	mkdir -p $persistent_db_dir 2>/dev/null
	for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do
		/usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || {
			ocf_log err "Persistent database $pdbase is corrupted!  CTDB will not start."
			return $OCF_ERR_GENERIC
		}
	done

	# Add necessary configuration to smb.conf
	init_smb_conf
	if [ $? -ne 0 ]; then
		ocf_log err "Failed to update $OCF_RESKEY_smb_conf."
		return $OCF_ERR_GENERIC
	fi

	# Save sysconfig (we're going to generate a minimal one
	# in place of what's there)
	save_ctdb_sysconfig

	# Use logfile by default, or syslog if asked for
	log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
	[ "$OCF_RESKEY_ctdb_logfile" = "syslog" ] && log_option="--syslog"
	
	# Start her up
	$OCF_RESKEY_ctdbd_binary \
		--reclock=$OCF_RESKEY_ctdb_recovery_lock \
		--nlist=$OCF_RESKEY_ctdb_config_dir/nodes \
		--socket=$OCF_RESKEY_ctdb_socket \
		--dbdir=$OCF_RESKEY_ctdb_dbdir \
		--dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \
		--event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \
		--notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \
		--transport=tcp \
		--start-as-disabled \
		$log_option \
		-d $OCF_RESKEY_ctdb_debuglevel
	if [ $? -ne 0 ]; then
		# restore sysconfig & cleanup smb.conf
		restore_ctdb_sysconfig
		cleanup_smb_conf
		
		ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary."
		return $OCF_ERR_GENERIC
	else
		# Wait a bit for CTDB to stabilize
		# (until start times out if necessary)
		while true; do
			# Initial sleep is intentional (ctdb init script
			# has sleep after ctdbd start, but before invoking
			# ctdb to talk to it)
			sleep 1
			status=$($OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status 2>/dev/null)
			if [ $? -ne 0 ]; then
				# CTDB will be running, kill it before returning
				ctdb_stop
				ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status"
				return $OCF_ERR_GENERIC
			fi
			if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then
				# Status does not say this node is unhealthy,
				# so we're good to go - set up any extra
				# variables and (hopefully) return success
				set_ctdb_variables
				return $?
			fi
		done
	fi
	
	# ctdbd will (or can) actually still be running at this point, so kill it
	ctdb_stop
	
	ocf_log err "Timeout waiting for CTDB to stabilize"
	return $OCF_ERR_GENERIC
}


ctdb_stop() {
	# Do nothing if already stopped
	pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS
	
	# Tell it to die nicely
	$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket shutdown >/dev/null 2>&1
	rv=$?

	# No more Mr. Nice Guy
	count=0
	while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do
		sleep 1
		count=$(($count + 1))
		[ $count -gt 10 ] && {
			ocf_log info "killing ctdbd "
			pkill -9 -f $OCF_RESKEY_ctdbd_binary
			pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/
		}
	done

	# Restore saved sysconfig & cleanup smb.conf
	restore_ctdb_sysconfig
	cleanup_smb_conf

	# Be paranoid about return codes
	[ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS

	return $OCF_ERR_GENERIC
}


ctdb_monitor() {
	$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket ping > /dev/null 2>&1 && return $OCF_SUCCESS
	return $OCF_NOT_RUNNING
}


ctdb_validate() {
	if [ -z "$CTDB_SYSCONFIG" ]; then
		ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)"
		return $OCF_ERR_INSTALLED
	fi

	if [ ! -f "$OCF_RESKEY_smb_conf" ]; then
		ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist."
		return $OCF_ERR_INSTALLED
	fi

	if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
		ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
	fi

	if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then
		ocf_log err "ctdb_recovery_lock not specified."
		return $OCF_ERR_ARGS
	fi

	lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
	touch "$lock_dir/$$" 2>/dev/null
	if [ $? != 0 ]; then
		ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
		return $OCF_ERR_ARGS
	fi
	rm "$lock_dir/$$"
	
	return $OCF_SUCCESS
}


case $__OCF_ACTION in
meta-data)	meta_data
		exit $OCF_SUCCESS
		;;
start)		ctdb_start;;
stop)		ctdb_stop;;
monitor)	ctdb_monitor;;
validate-all)	ctdb_validate;;
usage|help)	ctdb_usage
		exit $OCF_SUCCESS
		;;
*)		ctdb_usage
		exit $OCF_ERR_UNIMPLEMENTED
		;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
