Change YamlConfiguration encoding styles.

On JVMs with UTF-8 default encoding, this commit has no change in behavior.

On JVMs with ascii default encoding (like some minimal linux installa-
tions), this commit now uses UTF-8 for YamlConfiguration operations.
Because all ascii is valid UTF-8, there is no feature degradation or data
loss during the transition.

On JVMs with any non-unicode but ascii-compliant encoding, this commit now
forces YamlConfiguration to escape special characters when writing to
files, effectively rendering the encoding to be plain ascii. Any affected
file will now be able to migrate to UTF-8 in the future without data-loss
or explicit conversion. When reading files, YamlConfiguration will use the
system default encoding to handle any incoming non-utf8 data, with the
expectation that any newly written file is still compliant with the
system's default encoding.

On JVMs with any non-unicode, but ascii-incompliant encoding (this may be
the case for some Eastern character sets on Windows systems), this change
is breaking, but is justified in claim that these systems would otherwise
be unable to read YamlConfiguration for implementation dependent settings
or from plugins themselves. For these systems, all uses of the encoding
will be forced to use UTF-8 in all cases, and is effectively treated as if
it was configured to be UTF-8 by default.

On JVMs with unicode encoding of UTF-16 or UTF-32, the ability to load any
configurations from almost any source prior to this change would have been
unfeasible, if not impossible. As of this change, however, these systems
now behave as expected when writing or reading files. However, when
reading from any plugin jar, UTF-8 will be used, matching a super-majority
of plugin developer base and requirements for the plugin.yml.

Plugin developers may now mark their plugin as UTF-8 compliant, as
documented in the PluginDescriptionFile class. This change will cause the
appropriate APIs in JavaPlugin to ignore any system default encoding,
instead using a Reader with the UTF-8 encoding, effectively rendering the
jar system independent. This does not affect the aformentioned JVM
settings for reading and writing files.

To coincide with these changes, YamlConfiguration methods that utilize a
stream are now deprecated to encourage use of a more strict denotation.
File methods carry system-specific behaviors to prevent unncessary data
loss during the transitional phase, while Reader methods are now provided
that have a very well-defined encoder behavior. For the transition from
InputStream methods to Reader methods, an API has been added to JavaPlugin
to provide a Reader that matches the previous behavior as well as
compliance to the UTF-8 flag in the PluginDescriptionFile.

Addresses BUKKIT-314, BUKKIT-1466, BUKKIT-3377

By: Wesley Wolfe <wesley.d.wolfe+git@gmail.com>
This commit is contained in:
Bukkit/Spigot
2014-03-25 00:05:21 -05:00
parent 14d4cd125c
commit 300e61658e
5 changed files with 327 additions and 15 deletions

View File

@@ -1,25 +1,68 @@
package org.bukkit.configuration.file;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import org.apache.commons.lang.Validate;
import org.bukkit.configuration.InvalidConfigurationException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import org.bukkit.configuration.Configuration;
import org.bukkit.configuration.MemoryConfiguration;
import org.yaml.snakeyaml.external.biz.base64Coder.Base64Coder;
/**
* This is a base class for all File based implementations of {@link
* Configuration}
*/
public abstract class FileConfiguration extends MemoryConfiguration {
/**
* This value specified that the system default encoding should be
* completely ignored, as it cannot handle the ASCII character set, or it
* is a strict-subset of UTF8 already (plain ASCII).
*
* @deprecated temporary compatibility measure
*/
@Deprecated
public static final boolean UTF8_OVERRIDE;
/**
* This value specifies if the system default encoding is unicode, but
* cannot parse standard ASCII.
*
* @deprecated temporary compatibility measure
*/
@Deprecated
public static final boolean UTF_BIG;
/**
* This value specifies if the system supports unicode.
*
* @deprecated temporary compatibility measure
*/
@Deprecated
public static final boolean SYSTEM_UTF;
static {
final byte[] testBytes = Base64Coder.decode("ICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX4NCg==");
final String testString = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\r\n";
final Charset defaultCharset = Charset.defaultCharset();
final String resultString = new String(testBytes, defaultCharset);
final boolean trueUTF = defaultCharset.name().contains("UTF");
UTF8_OVERRIDE = !testString.equals(resultString) || defaultCharset.equals(Charset.forName("US-ASCII"));
SYSTEM_UTF = trueUTF || UTF8_OVERRIDE;
UTF_BIG = trueUTF && UTF8_OVERRIDE;
}
/**
* Creates an empty {@link FileConfiguration} with no default values.
*/
@@ -43,6 +86,9 @@ public abstract class FileConfiguration extends MemoryConfiguration {
* If the file does not exist, it will be created. If already exists, it
* will be overwritten. If it cannot be overwritten or created, an
* exception will be thrown.
* <p>
* This method will save using the system default encoding, or possibly
* using UTF8.
*
* @param file File to save to.
* @throws IOException Thrown when the given file cannot be written to for
@@ -56,7 +102,7 @@ public abstract class FileConfiguration extends MemoryConfiguration {
String data = saveToString();
FileWriter writer = new FileWriter(file);
Writer writer = new OutputStreamWriter(new FileOutputStream(file), UTF8_OVERRIDE && !UTF_BIG ? Charsets.UTF_8 : Charset.defaultCharset());
try {
writer.write(data);
@@ -71,6 +117,9 @@ public abstract class FileConfiguration extends MemoryConfiguration {
* If the file does not exist, it will be created. If already exists, it
* will be overwritten. If it cannot be overwritten or created, an
* exception will be thrown.
* <p>
* This method will save using the system default encoding, or possibly
* using UTF8.
*
* @param file File to save to.
* @throws IOException Thrown when the given file cannot be written to for
@@ -99,6 +148,10 @@ public abstract class FileConfiguration extends MemoryConfiguration {
* <p>
* If the file cannot be loaded for any reason, an exception will be
* thrown.
* <p>
* This will attempt to use the {@link Charset#defaultCharset()} for
* files, unless {@link #UTF8_OVERRIDE} but not {@link #UTF_BIG} is
* specified.
*
* @param file File to load from.
* @throws FileNotFoundException Thrown when the given file cannot be
@@ -111,7 +164,9 @@ public abstract class FileConfiguration extends MemoryConfiguration {
public void load(File file) throws FileNotFoundException, IOException, InvalidConfigurationException {
Validate.notNull(file, "File cannot be null");
load(new FileInputStream(file));
final FileInputStream stream = new FileInputStream(file);
load(new InputStreamReader(stream, UTF8_OVERRIDE && !UTF_BIG ? Charsets.UTF_8 : Charset.defaultCharset()));
}
/**
@@ -120,20 +175,42 @@ public abstract class FileConfiguration extends MemoryConfiguration {
* All the values contained within this configuration will be removed,
* leaving only settings and defaults, and the new values will be loaded
* from the given stream.
* <p>
* This will attempt to use the {@link Charset#defaultCharset()}, unless
* {@link #UTF8_OVERRIDE} or {@link #UTF_BIG} is specified.
*
* @param stream Stream to load from
* @throws IOException Thrown when the given file cannot be read.
* @throws InvalidConfigurationException Thrown when the given file is not
* a valid Configuration.
* @throws IllegalArgumentException Thrown when stream is null.
* @deprecated This does not consider encoding
* @see #load(Reader)
*/
@Deprecated
public void load(InputStream stream) throws IOException, InvalidConfigurationException {
Validate.notNull(stream, "Stream cannot be null");
InputStreamReader reader = new InputStreamReader(stream);
StringBuilder builder = new StringBuilder();
BufferedReader input = new BufferedReader(reader);
load(new InputStreamReader(stream, UTF8_OVERRIDE ? Charsets.UTF_8 : Charset.defaultCharset()));
}
/**
* Loads this {@link FileConfiguration} from the specified reader.
* <p>
* All the values contained within this configuration will be removed,
* leaving only settings and defaults, and the new values will be loaded
* from the given stream.
*
* @param reader the reader to load from
* @throws IOException thrown when underlying reader throws an IOException
* @throws InvalidConfigurationException thrown when the reader does not
* represent a valid Configuration
* @throws IllegalArgumentException thrown when reader is null
*/
public void load(Reader reader) throws IOException, InvalidConfigurationException {
BufferedReader input = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);
StringBuilder builder = new StringBuilder();
try {
String line;

View File

@@ -4,6 +4,7 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Map;
import java.util.logging.Level;
@@ -32,6 +33,7 @@ public class YamlConfiguration extends FileConfiguration {
public String saveToString() {
yamlOptions.setIndent(options().indent());
yamlOptions.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
yamlOptions.setAllowUnicode(SYSTEM_UTF);
yamlRepresenter.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
String header = buildHeader();
@@ -162,6 +164,8 @@ public class YamlConfiguration extends FileConfiguration {
* Any errors loading the Configuration will be logged and then ignored.
* If the specified input is not a valid config, a blank config will be
* returned.
* <p>
* The encoding used may follow the system dependent default.
*
* @param file Input file
* @return Resulting configuration
@@ -194,7 +198,11 @@ public class YamlConfiguration extends FileConfiguration {
* @param stream Input stream
* @return Resulting configuration
* @throws IllegalArgumentException Thrown if stream is null
* @deprecated does not properly consider encoding
* @see #load(InputStream)
* @see #loadConfiguration(Reader)
*/
@Deprecated
public static YamlConfiguration loadConfiguration(InputStream stream) {
Validate.notNull(stream, "Stream cannot be null");
@@ -210,4 +218,32 @@ public class YamlConfiguration extends FileConfiguration {
return config;
}
/**
* Creates a new {@link YamlConfiguration}, loading from the given reader.
* <p>
* Any errors loading the Configuration will be logged and then ignored.
* If the specified input is not a valid config, a blank config will be
* returned.
*
* @param reader input
* @return resulting configuration
* @throws IllegalArgumentException Thrown if stream is null
*/
public static YamlConfiguration loadConfiguration(Reader reader) {
Validate.notNull(reader, "Stream cannot be null");
YamlConfiguration config = new YamlConfiguration();
try {
config.load(reader);
} catch (IOException ex) {
Bukkit.getLogger().log(Level.SEVERE, "Cannot load configuration from stream", ex);
} catch (InvalidConfigurationException ex) {
Bukkit.getLogger().log(Level.SEVERE, "Cannot load configuration from stream", ex);
}
return config;
}
}