Add server start/stop timeout to force kill server if it gets stuck

This commit is contained in:
timvisee 2021-11-10 23:43:47 +01:00
parent 2026b6c846
commit 18fdf4c5f9
No known key found for this signature in database
GPG Key ID: B8DB720BC383E172
9 changed files with 125 additions and 9 deletions

1
Cargo.lock generated
View File

@ -657,6 +657,7 @@ dependencies = [
"thiserror", "thiserror",
"tokio", "tokio",
"toml", "toml",
"winapi",
] ]
[[package]] [[package]]

View File

@ -30,7 +30,6 @@ colored = "2.0"
derive_builder = "0.10" derive_builder = "0.10"
dotenv = "0.15" dotenv = "0.15"
futures = { version = "0.3", default-features = false } futures = { version = "0.3", default-features = false }
libc = "0.2"
log = "0.4" log = "0.4"
minecraft-protocol = { git = "https://github.com/timvisee/minecraft-protocol", rev = "31041b8" } minecraft-protocol = { git = "https://github.com/timvisee/minecraft-protocol", rev = "31041b8" }
pretty_env_logger = "0.4" pretty_env_logger = "0.4"
@ -43,3 +42,9 @@ toml = "0.5"
# Feature: rcon # Feature: rcon
rust_rcon = { package = "rcon", version = "0.5", optional = true } rust_rcon = { package = "rcon", version = "0.5", optional = true }
[target.'cfg(unix)'.dependencies]
libc = "0.2"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["winuser", "processthreadsapi", "handleapi"] }

View File

@ -73,3 +73,9 @@ command = "java -Xmx1G -Xms1G -jar server.jar --nogui"
[advanced] [advanced]
# Automatically update values in Minecraft server.properties file as required. # Automatically update values in Minecraft server.properties file as required.
#rewrite_server_properties = true #rewrite_server_properties = true
# Server starting timeout. Force kill server process if it takes longer.
#start_timeout = 300
# Server stopping timeout. Force kill server process if it takes longer.
#stop_timeout = 150

View File

@ -232,12 +232,22 @@ impl Default for Rcon {
pub struct Advanced { pub struct Advanced {
/// Rewrite server.properties. /// Rewrite server.properties.
pub rewrite_server_properties: bool, pub rewrite_server_properties: bool,
/// Server starting timeout. Force kill server process if it takes longer.
#[serde(alias = "starting_timeout")]
pub start_timeout: u32,
/// Server stopping timeout. Force kill server process if it takes longer.
#[serde(alias = "stopping_timeout")]
pub stop_timeout: u32,
} }
impl Default for Advanced { impl Default for Advanced {
fn default() -> Self { fn default() -> Self {
Self { Self {
rewrite_server_properties: true, rewrite_server_properties: true,
start_timeout: 300,
stop_timeout: 150,
} }
} }
} }

View File

@ -39,7 +39,6 @@ pub async fn monitor_server(config: Arc<Config>, server: Arc<Server>) {
// Poll server state and update internal status // Poll server state and update internal status
trace!(target: "lazymc::monitor", "Fetching status for {} ... ", addr); trace!(target: "lazymc::monitor", "Fetching status for {} ... ", addr);
let status = poll_server(&config, &server, addr).await; let status = poll_server(&config, &server, addr).await;
match status { match status {
// Got status, update // Got status, update
Ok(Some(status)) => server.update_status(&config, Some(status)), Ok(Some(status)) => server.update_status(&config, Some(status)),
@ -61,6 +60,14 @@ pub async fn monitor_server(config: Arc<Config>, server: Arc<Server>) {
} }
} }
// Check whether we should force kill server
if server.should_kill() {
error!(target: "lazymc::montior", "Force killing server, took too long to start/stop");
if !server.force_kill().await {
warn!(target: "lazymc", "Failed to force kill server");
}
}
poll_interval.tick().await; poll_interval.tick().await;
} }
} }

View File

@ -1,17 +1,38 @@
#[cfg(unix)] #[cfg(unix)]
pub mod unix; pub mod unix;
#[cfg(windows)]
pub mod windows;
/// Force kill process.
///
/// Results in undefined behavior if PID is invalid.
#[allow(unreachable_code)]
pub fn force_kill(pid: u32) -> bool {
#[cfg(unix)]
unsafe {
return unix::force_kill(pid);
}
#[cfg(windows)]
unsafe {
return windodws::force_kill(pid);
}
unimplemented!("force killing Minecraft server process not implemented on this platform");
}
/// Gracefully kill process. /// Gracefully kill process.
/// ///
/// Results in undefined behavior if PID is invalid.
///
/// # Panics /// # Panics
/// ///
/// Panics on platforms other than Unix. /// Panics on platforms other than Unix.
#[allow(unreachable_code)] #[allow(unreachable_code)]
pub fn kill_gracefully(pid: u32) { pub fn kill_gracefully(pid: u32) -> bool {
#[cfg(unix)] #[cfg(unix)]
unsafe { unsafe {
unix::kill_gracefully(pid); return unix::kill_gracefully(pid);
return;
} }
unimplemented!( unimplemented!(

View File

@ -1,11 +1,27 @@
/// Force kill process on Unix by sending SIGKILL.
///
/// This is unsafe because the PID isn't checked.
pub unsafe fn force_kill(pid: u32) -> bool {
debug!(target: "lazymc", "Sending SIGKILL signal to {} to kill server", pid);
let result = libc::kill(pid as i32, libc::SIGKILL);
if result != 0 {
trace!(target: "lazymc", "SIGKILL failed: {}", result);
}
result == 0
}
/// Gracefully kill process on Unix by sending SIGTERM. /// Gracefully kill process on Unix by sending SIGTERM.
/// ///
/// This is unsafe because the PID isn't checked. /// This is unsafe because the PID isn't checked.
pub unsafe fn kill_gracefully(pid: u32) { pub unsafe fn kill_gracefully(pid: u32) -> bool {
debug!(target: "lazymc", "Sending SIGTERM signal to {} to kill server", pid); debug!(target: "lazymc", "Sending SIGTERM signal to {} to kill server", pid);
let result = libc::kill(pid as i32, libc::SIGTERM); let result = libc::kill(pid as i32, libc::SIGTERM);
trace!(target: "lazymc", "SIGTERM result: {}", result);
// TODO: send sigterm to childs as well? if result != 0 {
// TODO: handle error if result != 0 trace!(target: "lazymc", "SIGTERM failed: {}", result);
}
result == 0
} }

13
src/os/windows.rs Normal file
View File

@ -0,0 +1,13 @@
use winapi::um::handleapi::CloseHandle;
use winapi::um::processthreadsapi::{OpenProcess, TerminateProcess};
use winapi::um::winnt::PROCESS_TERMINATE;
/// Force kill process on Windows.
///
/// This is unsafe because the PID isn't checked.
pub unsafe fn force_kill(pid: u32) -> bool {
debug!(target: "lazymc", "Sending force kill to {} to kill server", pid);
let handle = OpenProcess(PROCESS_TERMINATE, false, pid);
let mut ok = TerminateProcess(handle, 1);
CloseHandle(handle) && ok
}

View File

@ -7,6 +7,7 @@ use minecraft_protocol::data::server_status::ServerStatus;
use tokio::process::Command; use tokio::process::Command;
use crate::config::Config; use crate::config::Config;
use crate::os;
/// Server state. /// Server state.
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -73,6 +74,11 @@ pub struct Server {
/// Force server to stay online until. /// Force server to stay online until.
keep_online_until: RwLock<Option<Instant>>, keep_online_until: RwLock<Option<Instant>>,
/// Time to force kill the server process at.
///
/// Used as starting/stopping timeout.
kill_at: RwLock<Option<Instant>>,
} }
impl Server { impl Server {
@ -117,6 +123,17 @@ impl Server {
trace!("Change server state from {:?} to {:?}", old, new); trace!("Change server state from {:?} to {:?}", old, new);
// Update kill at time for starting/stopping state
*self.kill_at.write().unwrap() = match new {
State::Starting if config.advanced.start_timeout > 0 => {
Some(Instant::now() + Duration::from_secs(config.advanced.start_timeout as u64))
}
State::Stopping if config.advanced.stop_timeout > 0 => {
Some(Instant::now() + Duration::from_secs(config.advanced.stop_timeout as u64))
}
_ => None,
};
// Online/offline messages // Online/offline messages
match new { match new {
State::Started => info!(target: "lazymc::monitor", "Server is now online"), State::Started => info!(target: "lazymc::monitor", "Server is now online"),
@ -206,6 +223,16 @@ impl Server {
false false
} }
/// Force kill running server.
///
/// This requires the server PID to be known.
pub async fn force_kill(&self) -> bool {
if let Some(pid) = *self.pid.lock().unwrap() {
return os::force_kill(pid);
}
false
}
/// Decide whether the server should sleep. /// Decide whether the server should sleep.
/// ///
/// Always returns false if it is currently not online. /// Always returns false if it is currently not online.
@ -248,6 +275,15 @@ impl Server {
false false
} }
/// Decide whether to force kill the server process.
pub fn should_kill(&self) -> bool {
self.kill_at
.read()
.unwrap()
.map(|t| t <= Instant::now())
.unwrap_or(false)
}
/// Read last known server status. /// Read last known server status.
pub fn status(&self) -> RwLockReadGuard<Option<ServerStatus>> { pub fn status(&self) -> RwLockReadGuard<Option<ServerStatus>> {
self.status.read().unwrap() self.status.read().unwrap()
@ -274,6 +310,7 @@ impl Default for Server {
status: Default::default(), status: Default::default(),
last_active: Default::default(), last_active: Default::default(),
keep_online_until: Default::default(), keep_online_until: Default::default(),
kill_at: Default::default(),
} }
} }
} }