Clean up internal server state handling, various fixes

This commit is contained in:
timvisee 2021-11-09 14:31:14 +01:00
parent 6821cf850e
commit 86c88d85f2
No known key found for this signature in database
GPG Key ID: B8DB720BC383E172
12 changed files with 293 additions and 226 deletions

7
Cargo.lock generated
View File

@ -653,6 +653,7 @@ dependencies = [
"rand 0.8.4",
"rcon",
"serde",
"shlex",
"thiserror",
"tokio",
"toml",
@ -1107,6 +1108,12 @@ dependencies = [
"serde",
]
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
[[package]]
name = "signal-hook-registry"
version = "1.4.0"

View File

@ -35,10 +35,11 @@ log = "0.4"
minecraft-protocol = { git = "https://github.com/timvisee/minecraft-protocol", rev = "4348c27" }
pretty_env_logger = "0.4"
serde = "1.0"
shlex = "1.1"
thiserror = "1.0"
tokio = { version = "1", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "macros", "time", "process", "signal"] }
toml = "0.5"
# Feature: rcon
rust_rcon = { package = "rcon", version = "0.5", optional = true }
rand = { version = "0.8", optional = true }
rust_rcon = { package = "rcon", version = "0.5", optional = true }

View File

@ -54,7 +54,7 @@ lazymc --help
```
When `lazymc` is available, change into your server directory. Then set up the
configuration and start it up:
[configuration](./res/lazymc.toml) and start it up:
```bash
# Change into your server directory

View File

@ -33,9 +33,15 @@ motd_sleeping = "☠ Server is sleeping\n§2☻ Join to start it up"
# MOTD when server is starting.
motd_starting = "§2☻ Server is starting...\n§7⌛ Please wait..."
# MOTD when server is stopping.
motd_stopping = "☠ Server going to sleep...\n⌛ Please wait..."
# Login (kick) message when server is starting.
login_starting = "Server is starting... §c♥§r\n\nThis may take some time.\n\nPlease try to reconnect in a minute."
# Login (kick) message when server is stopping.
login_stopping = "Server is going to sleep... §7☠§r\n\nPlease try to reconnect in a minute to wake it again."
[rcon]
# Enable sleeping server through RCON.
# Must be enabled on Windows.

View File

@ -131,8 +131,14 @@ pub struct Messages {
/// MOTD when server is starting.
pub motd_starting: String,
/// MOTD when server is stopping.
pub motd_stopping: String,
/// Login message when server is starting.
pub login_starting: String,
/// Login message when server is stopping.
pub login_stopping: String,
}
/// RCON configuration.

View File

@ -7,16 +7,12 @@ pub struct Rcon {
impl Rcon {
/// Connect to a host.
pub async fn connect(addr: &str, pass: &str) -> Result<Self, ()> {
pub async fn connect(addr: &str, pass: &str) -> Result<Self, Box<dyn std::error::Error>> {
// Start connection
let con = Connection::builder()
.enable_minecraft_quirks(true)
.connect(addr, pass)
.await
.map_err(|err| {
dbg!(err);
()
})?;
.await?;
Ok(Self { con })
}

View File

@ -15,7 +15,7 @@ use tokio::net::TcpStream;
use crate::config::Config;
use crate::proto::{self, ClientState, RawPacket, PROTO_DEFAULT_PROTOCOL};
use crate::server::ServerState;
use crate::server::Server;
/// Monitor ping inverval in seconds.
const MONITOR_PING_INTERVAL: u64 = 2;
@ -24,7 +24,7 @@ const MONITOR_PING_INTERVAL: u64 = 2;
const STATUS_TIMEOUT: u64 = 8;
/// Monitor server.
pub async fn monitor_server(config: Arc<Config>, state: Arc<ServerState>) {
pub async fn monitor_server(config: Arc<Config>, state: Arc<Server>) {
// Server address
let addr = config.server.address;
@ -37,7 +37,7 @@ pub async fn monitor_server(config: Arc<Config>, state: Arc<ServerState>) {
// Sleep server when it's bedtime
if state.should_sleep(&config) {
info!(target: "lazymc::montior", "Server has been idle, sleeping...");
if !state.kill_server(&config).await {
if !state.stop(&config).await {
warn!(target: "lazymc", "Failed to stop server");
}
}

View File

@ -1,4 +1,3 @@
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
@ -8,153 +7,184 @@ use tokio::process::Command;
use crate::config::Config;
/// Server state.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum State {
/// Server is starting.
Starting,
/// Server is online and responding.
Started,
/// Server is stopping.
Stopping,
/// Server is stopped.
Stopped,
}
/// Shared server state.
#[derive(Default, Debug)]
pub struct ServerState {
/// Whether the server is online.
online: AtomicBool,
#[derive(Debug)]
pub struct Server {
/// Server state.
state: Mutex<State>,
/// Whether the server is starting.
// TODO: use enum for starting/started/stopping states
starting: AtomicBool,
/// Whether the server is stopping.
stopping: AtomicBool,
/// Server PID.
/// Server process PID.
///
/// Set if a server process is running.
pid: Mutex<Option<u32>>,
/// Last known server status.
///
/// Once set, this will remain set, and isn't cleared when the server goes offline.
// TODO: make this private?
pub status: Mutex<Option<ServerStatus>>,
/// Will remain set once known, not cleared if server goes offline.
status: Mutex<Option<ServerStatus>>,
/// Last active time.
///
/// The last known time when the server was active with online players.
/// The last time there was activity on the server. Also set at the moment the server comes
/// online.
last_active: Mutex<Option<Instant>>,
/// Keep server online until.
/// Force server to stay online until.
keep_online_until: Mutex<Option<Instant>>,
}
impl ServerState {
/// Whether the server is online.
pub fn online(&self) -> bool {
self.online.load(Ordering::Relaxed)
impl Server {
/// Get current state.
pub fn state(&self) -> State {
*self.state.lock().unwrap()
}
/// Set whether the server is online.
pub fn set_online(&self, online: bool) {
self.online.store(online, Ordering::Relaxed)
/// Set a new state.
///
/// This updates various other internal things depending on how the state changes.
///
/// Returns false if the state didn't change, in which case nothing happens.
fn update_state(&self, state: State, config: &Config) -> bool {
self.update_state_from(None, state, config)
}
/// Whether the server is starting.
pub fn starting(&self) -> bool {
self.starting.load(Ordering::Relaxed)
/// Set new state, from a current state.
///
/// This updates various other internal things depending on how the state changes.
///
/// Returns false if current state didn't match `from` or if nothing changed.
fn update_state_from(&self, from: Option<State>, state: State, config: &Config) -> bool {
// Get current state, must differ from current, must match from
let mut cur = self.state.lock().unwrap();
if *cur == state || (from.is_some() && from != Some(*cur)) {
return false;
}
/// Set whether the server is starting.
pub fn set_starting(&self, starting: bool) {
self.starting.store(starting, Ordering::Relaxed)
trace!("Change server state from {:?} to {:?}", *cur, state);
// Online/offline messages
match state {
State::Started => info!(target: "lazymc::monitor", "Server is now online"),
State::Stopped => info!(target: "lazymc::monitor", "Server is now sleeping"),
_ => {}
}
/// Kill any running server.
// If Starting -> Started, update active time and keep it online for configured time
if *cur == State::Starting && state == State::Started {
self.update_last_active();
self.keep_online_for(Some(config.time.min_online_time));
}
*cur = state;
true
}
/// Update status as polled from the server.
///
/// This updates various other internal things depending on the current state and the given
/// status.
pub fn update_status(&self, config: &Config, status: Option<ServerStatus>) {
let state = *self.state.lock().unwrap();
// Update state based on curren
match (state, &status) {
(State::Stopped | State::Starting, Some(_)) => {
self.update_state(State::Started, config);
}
(State::Started, None) => {
self.update_state(State::Stopped, config);
}
_ => {}
}
// Update last status if known
if let Some(status) = status {
// Update last active time if there are online players
if status.players.online > 0 {
self.update_last_active();
}
self.status.lock().unwrap().replace(status);
}
}
/// Try to start the server.
///
/// Does nothing if currently not in stopped state.
pub fn start(config: Arc<Config>, server: Arc<Server>) -> bool {
// Must set state from stopped to starting
if !server.update_state_from(Some(State::Stopped), State::Starting, &config) {
return false;
}
// Invoke server command in separate task
tokio::spawn(invoke_server_cmd(config, server).map(|_| ()));
true
}
/// Stop running server.
///
/// This requires the server PID to be known.
#[allow(unused_variables)]
pub async fn kill_server(&self, config: &Config) -> bool {
// Ensure we have a running process
pub async fn stop(&self, config: &Config) -> bool {
// We must have a running process
let has_process = self.pid.lock().unwrap().is_some();
if !has_process {
return false;
}
// Try to kill through RCON
// Try to stop through RCON if started
#[cfg(feature = "rcon")]
if stop_server_rcon(config, &self).await {
// TODO: set stopping state elsewhere
self.stopping.store(true, Ordering::Relaxed);
if self.state() == State::Started && stop_server_rcon(config, &self).await {
return true;
}
// Try to kill through signal
// Try to stop through signal
#[cfg(unix)]
if stop_server_signal(&self) {
// TODO: set stopping state elsewhere
self.stopping.store(true, Ordering::Relaxed);
if stop_server_signal(config, &self) {
return true;
}
false
}
/// Set server PID.
pub fn set_pid(&self, pid: Option<u32>) {
*self.pid.lock().unwrap() = pid;
}
/// Clone the last known server status.
pub fn clone_status(&self) -> Option<ServerStatus> {
self.status.lock().unwrap().clone()
}
/// Update the server status.
pub fn set_status(&self, status: ServerStatus) {
self.status.lock().unwrap().replace(status);
}
/// Update the last active time.
pub fn update_last_active_time(&self) {
self.last_active.lock().unwrap().replace(Instant::now());
}
/// Update the last active time.
pub fn set_keep_online_until(&self, duration: Option<u32>) {
*self.keep_online_until.lock().unwrap() = duration
.filter(|d| *d > 0)
.map(|d| Instant::now() + Duration::from_secs(d as u64));
}
/// Update the server status, online state and last active time.
// TODO: clean this up
pub fn update_status(&self, config: &Config, status: Option<ServerStatus>) {
let stopping = self.stopping.load(Ordering::Relaxed);
let was_online = self.online();
let online = status.is_some() && !stopping;
self.set_online(online);
// If server just came online, update last active time
if !was_online && online {
// TODO: move this somewhere else
info!(target: "lazymc::monitor", "Server is now online");
self.update_last_active_time();
self.set_keep_online_until(Some(config.time.min_online_time));
}
// // If server just went offline, reset stopping state
// // TODO: do this elsewhere
// if stopping && was_online && !online {
// self.stopping.store(false, Ordering::Relaxed);
// }
if let Some(status) = status {
// Update last active time if there are online players
if status.players.online > 0 {
self.update_last_active_time();
}
// Update last known players
self.set_status(status);
}
}
/// Check whether the server should now sleep.
/// Decide whether the server should sleep.
///
/// Always returns false if it is currently not online.
pub fn should_sleep(&self, config: &Config) -> bool {
// TODO: when initating server start, set last active time!
// TODO: do not initiate sleep when starting?
// TODO: do not initiate sleep when already initiated (with timeout)
// Server must be online
if *self.state.lock().unwrap() != State::Started {
return false;
}
// Never sleep if players are online
let players_online = self
.status
.lock()
.unwrap()
.as_ref()
.map(|status| status.players.online > 0)
.unwrap_or(false);
if players_online {
trace!(target: "lazymc", "Not sleeping because players are online");
return false;
}
// Don't sleep when keep online until isn't expired
let keep_online = self
@ -168,23 +198,6 @@ impl ServerState {
return false;
}
// Server must be online, and must not be starting
if !self.online() || !self.starting() {
return false;
}
// Never idle if players are online
let players_online = self
.status
.lock()
.unwrap()
.as_ref()
.map(|status| status.players.online > 0)
.unwrap_or(false);
if players_online {
return false;
}
// Last active time must have passed sleep threshold
if let Some(last_idle) = self.last_active.lock().unwrap().as_ref() {
return last_idle.elapsed() >= Duration::from_secs(config.time.sleep_after as u64);
@ -192,68 +205,95 @@ impl ServerState {
false
}
/// Clone last known server status.
// TODO: return mutex guard here
pub fn clone_status(&self) -> Option<ServerStatus> {
self.status.lock().unwrap().clone()
}
/// Try to start the server.
///
/// Does not start if alreayd starting.
// TODO: move this into server state struct?
pub fn start_server(config: Arc<Config>, server: Arc<ServerState>) {
// Ensure it is not starting yet
if server.starting() {
return;
/// Update the last active time.
fn update_last_active(&self) {
self.last_active.lock().unwrap().replace(Instant::now());
}
// Update starting states
// TODO: this may data race, use single atomic operation
server.set_starting(true);
server.update_last_active_time();
/// Force the server to be online for the given number of seconds.
fn keep_online_for(&self, duration: Option<u32>) {
*self.keep_online_until.lock().unwrap() = duration
.filter(|d| *d > 0)
.map(|d| Instant::now() + Duration::from_secs(d as u64));
}
}
// Spawn server in separate task
tokio::spawn(invoke_server_command(config, server).map(|_| ()));
impl Default for Server {
fn default() -> Self {
Self {
state: Mutex::new(State::Stopped),
pid: Default::default(),
status: Default::default(),
last_active: Default::default(),
keep_online_until: Default::default(),
}
}
}
/// Invoke server command, store PID and wait for it to quit.
pub async fn invoke_server_command(
pub async fn invoke_server_cmd(
config: Arc<Config>,
state: Arc<ServerState>,
state: Arc<Server>,
) -> Result<(), Box<dyn std::error::Error>> {
// TODO: this doesn't properly handle quotes
let args = config
.server
.command
.split_terminator(" ")
.collect::<Vec<_>>();
// Build command
let mut cmd = Command::new(args[0]);
let args = shlex::split(&config.server.command).expect("invalid server command");
let mut cmd = Command::new(&args[0]);
cmd.args(args.iter().skip(1));
cmd.kill_on_drop(true);
// Set working directory
if let Some(ref dir) = config.server.directory {
cmd.current_dir(dir);
}
cmd.kill_on_drop(true);
// Spawn process
info!(target: "lazymc", "Starting server...");
let mut child = cmd.spawn()?;
let mut child = match cmd.spawn() {
Ok(child) => child,
Err(err) => {
error!(target: "lazymc", "Failed to start server process through command");
return Err(err.into());
}
};
state.set_pid(Some(child.id().expect("unknown server PID")));
// Remember PID
state
.pid
.lock()
.unwrap()
.replace(child.id().expect("unknown server PID"));
let status = child.wait().await?;
info!(target: "lazymc", "Server stopped (status: {})\n", status);
// Wait for process to exit, handle status
match child.wait().await {
Ok(status) if status.success() => {
debug!(target: "lazymc", "Server process stopped successfully ({})", status);
}
Ok(status) => {
warn!(target: "lazymc", "Server process stopped with error code ({})", status);
}
Err(err) => {
error!(target: "lazymc", "Failed to wait for server process to quit: {}", err);
error!(target: "lazymc", "Assuming server quit, cleaning up...");
}
}
// Reset online and starting state
// TODO: also set this when returning early due to error
state.set_pid(None);
state.set_online(false);
state.set_starting(false);
state.stopping.store(false, Ordering::Relaxed);
// Set state to stopped, update server PID
state.pid.lock().unwrap().take();
state.update_state(State::Stopped, &config);
Ok(())
}
/// Stop server through RCON.
#[cfg(feature = "rcon")]
async fn stop_server_rcon(config: &Config, server: &ServerState) -> bool {
async fn stop_server_rcon(config: &Config, server: &Server) -> bool {
use crate::mc::rcon::Rcon;
// RCON must be enabled
@ -269,39 +309,39 @@ async fn stop_server_rcon(config: &Config, server: &ServerState) -> bool {
// Create RCON client
let mut rcon = match Rcon::connect(&addr, &config.rcon.password).await {
Ok(rcon) => rcon,
Err(_) => {
error!(target: "lazymc", "failed to create RCON client to sleep server");
Err(err) => {
error!(target: "lazymc", "Failed to RCON server to sleep: {}", err);
return false;
}
};
// Invoke stop
if let Err(err) = rcon.cmd("stop").await {
error!(target: "lazymc", "failed to invoke stop through RCON: {}", err);
error!(target: "lazymc", "Failed to invoke stop through RCON: {}", err);
}
// TODO: should we set this?
server.set_online(false);
server.set_keep_online_until(None);
// Set server to stopping state
// TODO: set before stop command, revert state on failure
server.update_state(State::Stopping, config);
true
}
/// Stop server by sending SIGTERM signal.
///
/// Only works on Unix.
/// Only available on Unix.
#[cfg(unix)]
fn stop_server_signal(server: &ServerState) -> bool {
if let Some(pid) = *server.pid.lock().unwrap() {
debug!(target: "lazymc", "Sending kill signal to server");
fn stop_server_signal(config: &Config, server: &Server) -> bool {
// Grab PID
let pid = match *server.pid.lock().unwrap() {
Some(pid) => pid,
None => return false,
};
// Set stopping state, send kill signal
// TODO: revert state on failure
server.update_state(State::Stopping, config);
crate::os::kill_gracefully(pid);
// TODO: should we set this?
server.set_online(false);
server.set_keep_online_until(None);
return true;
}
false
true
}

View File

@ -2,9 +2,9 @@ use std::sync::Arc;
use crate::config::Config;
use crate::monitor;
use crate::server::ServerState;
use crate::server::Server;
/// Server monitor task.
pub async fn service(config: Arc<Config>, state: Arc<ServerState>) {
pub async fn service(config: Arc<Config>, state: Arc<Server>) {
monitor::monitor_server(config, state).await
}

View File

@ -6,8 +6,7 @@ use tokio::net::TcpListener;
use crate::config::Config;
use crate::proto::Client;
use crate::proxy;
use crate::server;
use crate::server::ServerState;
use crate::server::{self, Server};
use crate::service;
use crate::status;
use crate::util::error::{quit_error, ErrorHints};
@ -15,7 +14,7 @@ use crate::util::error::{quit_error, ErrorHints};
/// Start lazymc.
pub async fn service(config: Arc<Config>) -> Result<(), ()> {
// Load server state
let server_state = Arc::new(ServerState::default());
let server = Arc::new(Server::default());
// Listen for new connections
// TODO: do not drop error here
@ -35,28 +34,23 @@ pub async fn service(config: Arc<Config>) -> Result<(), ()> {
);
// Spawn server monitor and signal handler services
tokio::spawn(service::monitor::service(
config.clone(),
server_state.clone(),
));
tokio::spawn(service::signal::service(
config.clone(),
server_state.clone(),
));
tokio::spawn(service::monitor::service(config.clone(), server.clone()));
tokio::spawn(service::signal::service(config.clone(), server.clone()));
// Initiate server start
if config.server.wake_on_start {
server::start_server(config.clone(), server_state.clone());
Server::start(config.clone(), server.clone());
}
// Proxy all incomming connections
while let Ok((inbound, _)) = listener.accept().await {
let client = Client::default();
if !server_state.online() {
let online = server.state() == server::State::Started;
if !online {
// When server is not online, spawn a status server
let transfer = status::serve(client, inbound, config.clone(), server_state.clone())
.map(|r| {
let transfer =
status::serve(client, inbound, config.clone(), server.clone()).map(|r| {
if let Err(err) = r {
warn!(target: "lazymc", "Failed to serve status: {:?}", err);
}

View File

@ -1,21 +1,32 @@
use std::sync::Arc;
use crate::config::Config;
use crate::server::ServerState;
use crate::server::{self, Server};
use crate::util::error;
/// Signal handler task.
pub async fn service(config: Arc<Config>, server_state: Arc<ServerState>) {
pub async fn service(config: Arc<Config>, server: Arc<Server>) {
loop {
// Wait for SIGTERM/SIGINT signal
tokio::signal::ctrl_c().await.unwrap();
// Attemp to kill server
let killed = !server_state.kill_server(&config).await;
// Quit if stopped
if server.state() == server::State::Stopped {
quit();
}
// If we don't kill the server, quit this process
if !killed {
// TODO: gracefully kill itself instead
std::process::exit(1)
// Try to stop server
let stopping = server.stop(&config).await;
// If not stopping, maybe due to failure, just quit
if !stopping {
quit();
}
}
}
/// Gracefully quit.
fn quit() -> ! {
// TODO: gracefully quit self
error::quit();
}

View File

@ -16,7 +16,7 @@ use crate::config::*;
use crate::proto::{
self, Client, ClientState, RawPacket, PROTO_DEFAULT_PROTOCOL, PROTO_DEFAULT_VERSION,
};
use crate::server::{self, ServerState};
use crate::server::{self, Server};
/// Proxy the given inbound stream to a target address.
// TODO: do not drop error here, return Box<dyn Error>
@ -24,7 +24,7 @@ pub async fn serve(
client: Client,
mut inbound: TcpStream,
config: Arc<Config>,
server: Arc<ServerState>,
server: Arc<Server>,
) -> Result<(), ()> {
let (mut reader, mut writer) = inbound.split();
@ -44,20 +44,26 @@ pub async fn serve(
// Hijack login start
if client.state() == ClientState::Login && packet.id == proto::LOGIN_PACKET_ID_LOGIN_START {
// Select message
let msg = match server.state() {
server::State::Starting | server::State::Stopped | server::State::Started => {
&config.messages.login_starting
}
server::State::Stopping => &config.messages.login_stopping,
};
let packet = LoginDisconnect {
reason: Message::new(Payload::text(&config.messages.login_starting)),
reason: Message::new(Payload::text(msg)),
};
let mut data = Vec::new();
packet.encode(&mut data).map_err(|_| ())?;
let response = RawPacket::new(0, data).encode()?;
writer.write_all(&response).await.map_err(|_| ())?;
// Start server if not starting yet
server::start_server(config, server);
Server::start(config, server);
break;
}
@ -90,10 +96,10 @@ pub async fn serve(
};
// Select description
let description = if server.starting() {
&config.messages.motd_starting
} else {
&config.messages.motd_sleeping
let description = match server.state() {
server::State::Stopped | server::State::Started => &config.messages.motd_sleeping,
server::State::Starting => &config.messages.motd_starting,
server::State::Stopping => &config.messages.motd_stopping,
};
// Build status resposne