Add server start/stop timeout to force kill server if it gets stuck
This commit is contained in:
parent
2026b6c846
commit
18fdf4c5f9
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -657,6 +657,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"toml",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -30,7 +30,6 @@ colored = "2.0"
|
||||
derive_builder = "0.10"
|
||||
dotenv = "0.15"
|
||||
futures = { version = "0.3", default-features = false }
|
||||
libc = "0.2"
|
||||
log = "0.4"
|
||||
minecraft-protocol = { git = "https://github.com/timvisee/minecraft-protocol", rev = "31041b8" }
|
||||
pretty_env_logger = "0.4"
|
||||
@ -43,3 +42,9 @@ toml = "0.5"
|
||||
|
||||
# Feature: rcon
|
||||
rust_rcon = { package = "rcon", version = "0.5", optional = true }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3", features = ["winuser", "processthreadsapi", "handleapi"] }
|
||||
|
@ -73,3 +73,9 @@ command = "java -Xmx1G -Xms1G -jar server.jar --nogui"
|
||||
[advanced]
|
||||
# Automatically update values in Minecraft server.properties file as required.
|
||||
#rewrite_server_properties = true
|
||||
|
||||
# Server starting timeout. Force kill server process if it takes longer.
|
||||
#start_timeout = 300
|
||||
|
||||
# Server stopping timeout. Force kill server process if it takes longer.
|
||||
#stop_timeout = 150
|
||||
|
@ -232,12 +232,22 @@ impl Default for Rcon {
|
||||
pub struct Advanced {
|
||||
/// Rewrite server.properties.
|
||||
pub rewrite_server_properties: bool,
|
||||
|
||||
/// Server starting timeout. Force kill server process if it takes longer.
|
||||
#[serde(alias = "starting_timeout")]
|
||||
pub start_timeout: u32,
|
||||
|
||||
/// Server stopping timeout. Force kill server process if it takes longer.
|
||||
#[serde(alias = "stopping_timeout")]
|
||||
pub stop_timeout: u32,
|
||||
}
|
||||
|
||||
impl Default for Advanced {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
rewrite_server_properties: true,
|
||||
start_timeout: 300,
|
||||
stop_timeout: 150,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,6 @@ pub async fn monitor_server(config: Arc<Config>, server: Arc<Server>) {
|
||||
// Poll server state and update internal status
|
||||
trace!(target: "lazymc::monitor", "Fetching status for {} ... ", addr);
|
||||
let status = poll_server(&config, &server, addr).await;
|
||||
|
||||
match status {
|
||||
// Got status, update
|
||||
Ok(Some(status)) => server.update_status(&config, Some(status)),
|
||||
@ -61,6 +60,14 @@ pub async fn monitor_server(config: Arc<Config>, server: Arc<Server>) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check whether we should force kill server
|
||||
if server.should_kill() {
|
||||
error!(target: "lazymc::montior", "Force killing server, took too long to start/stop");
|
||||
if !server.force_kill().await {
|
||||
warn!(target: "lazymc", "Failed to force kill server");
|
||||
}
|
||||
}
|
||||
|
||||
poll_interval.tick().await;
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,38 @@
|
||||
#[cfg(unix)]
|
||||
pub mod unix;
|
||||
#[cfg(windows)]
|
||||
pub mod windows;
|
||||
|
||||
/// Force kill process.
|
||||
///
|
||||
/// Results in undefined behavior if PID is invalid.
|
||||
#[allow(unreachable_code)]
|
||||
pub fn force_kill(pid: u32) -> bool {
|
||||
#[cfg(unix)]
|
||||
unsafe {
|
||||
return unix::force_kill(pid);
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
unsafe {
|
||||
return windodws::force_kill(pid);
|
||||
}
|
||||
|
||||
unimplemented!("force killing Minecraft server process not implemented on this platform");
|
||||
}
|
||||
|
||||
/// Gracefully kill process.
|
||||
///
|
||||
/// Results in undefined behavior if PID is invalid.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics on platforms other than Unix.
|
||||
#[allow(unreachable_code)]
|
||||
pub fn kill_gracefully(pid: u32) {
|
||||
pub fn kill_gracefully(pid: u32) -> bool {
|
||||
#[cfg(unix)]
|
||||
unsafe {
|
||||
unix::kill_gracefully(pid);
|
||||
return;
|
||||
return unix::kill_gracefully(pid);
|
||||
}
|
||||
|
||||
unimplemented!(
|
||||
|
@ -1,11 +1,27 @@
|
||||
/// Force kill process on Unix by sending SIGKILL.
|
||||
///
|
||||
/// This is unsafe because the PID isn't checked.
|
||||
pub unsafe fn force_kill(pid: u32) -> bool {
|
||||
debug!(target: "lazymc", "Sending SIGKILL signal to {} to kill server", pid);
|
||||
let result = libc::kill(pid as i32, libc::SIGKILL);
|
||||
|
||||
if result != 0 {
|
||||
trace!(target: "lazymc", "SIGKILL failed: {}", result);
|
||||
}
|
||||
|
||||
result == 0
|
||||
}
|
||||
|
||||
/// Gracefully kill process on Unix by sending SIGTERM.
|
||||
///
|
||||
/// This is unsafe because the PID isn't checked.
|
||||
pub unsafe fn kill_gracefully(pid: u32) {
|
||||
pub unsafe fn kill_gracefully(pid: u32) -> bool {
|
||||
debug!(target: "lazymc", "Sending SIGTERM signal to {} to kill server", pid);
|
||||
let result = libc::kill(pid as i32, libc::SIGTERM);
|
||||
trace!(target: "lazymc", "SIGTERM result: {}", result);
|
||||
|
||||
// TODO: send sigterm to childs as well?
|
||||
// TODO: handle error if result != 0
|
||||
if result != 0 {
|
||||
trace!(target: "lazymc", "SIGTERM failed: {}", result);
|
||||
}
|
||||
|
||||
result == 0
|
||||
}
|
||||
|
13
src/os/windows.rs
Normal file
13
src/os/windows.rs
Normal file
@ -0,0 +1,13 @@
|
||||
use winapi::um::handleapi::CloseHandle;
|
||||
use winapi::um::processthreadsapi::{OpenProcess, TerminateProcess};
|
||||
use winapi::um::winnt::PROCESS_TERMINATE;
|
||||
|
||||
/// Force kill process on Windows.
|
||||
///
|
||||
/// This is unsafe because the PID isn't checked.
|
||||
pub unsafe fn force_kill(pid: u32) -> bool {
|
||||
debug!(target: "lazymc", "Sending force kill to {} to kill server", pid);
|
||||
let handle = OpenProcess(PROCESS_TERMINATE, false, pid);
|
||||
let mut ok = TerminateProcess(handle, 1);
|
||||
CloseHandle(handle) && ok
|
||||
}
|
@ -7,6 +7,7 @@ use minecraft_protocol::data::server_status::ServerStatus;
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::os;
|
||||
|
||||
/// Server state.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
@ -73,6 +74,11 @@ pub struct Server {
|
||||
|
||||
/// Force server to stay online until.
|
||||
keep_online_until: RwLock<Option<Instant>>,
|
||||
|
||||
/// Time to force kill the server process at.
|
||||
///
|
||||
/// Used as starting/stopping timeout.
|
||||
kill_at: RwLock<Option<Instant>>,
|
||||
}
|
||||
|
||||
impl Server {
|
||||
@ -117,6 +123,17 @@ impl Server {
|
||||
|
||||
trace!("Change server state from {:?} to {:?}", old, new);
|
||||
|
||||
// Update kill at time for starting/stopping state
|
||||
*self.kill_at.write().unwrap() = match new {
|
||||
State::Starting if config.advanced.start_timeout > 0 => {
|
||||
Some(Instant::now() + Duration::from_secs(config.advanced.start_timeout as u64))
|
||||
}
|
||||
State::Stopping if config.advanced.stop_timeout > 0 => {
|
||||
Some(Instant::now() + Duration::from_secs(config.advanced.stop_timeout as u64))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// Online/offline messages
|
||||
match new {
|
||||
State::Started => info!(target: "lazymc::monitor", "Server is now online"),
|
||||
@ -206,6 +223,16 @@ impl Server {
|
||||
false
|
||||
}
|
||||
|
||||
/// Force kill running server.
|
||||
///
|
||||
/// This requires the server PID to be known.
|
||||
pub async fn force_kill(&self) -> bool {
|
||||
if let Some(pid) = *self.pid.lock().unwrap() {
|
||||
return os::force_kill(pid);
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Decide whether the server should sleep.
|
||||
///
|
||||
/// Always returns false if it is currently not online.
|
||||
@ -248,6 +275,15 @@ impl Server {
|
||||
false
|
||||
}
|
||||
|
||||
/// Decide whether to force kill the server process.
|
||||
pub fn should_kill(&self) -> bool {
|
||||
self.kill_at
|
||||
.read()
|
||||
.unwrap()
|
||||
.map(|t| t <= Instant::now())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Read last known server status.
|
||||
pub fn status(&self) -> RwLockReadGuard<Option<ServerStatus>> {
|
||||
self.status.read().unwrap()
|
||||
@ -274,6 +310,7 @@ impl Default for Server {
|
||||
status: Default::default(),
|
||||
last_active: Default::default(),
|
||||
keep_online_until: Default::default(),
|
||||
kill_at: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user