001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.Locale; 030import java.util.Objects; 031import java.util.regex.Matcher; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.input.XmlStreamReader; 037 038/** 039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream. 040 * <p> 041 * To build an instance, see {@link Builder}. 042 * </p> 043 * 044 * @see XmlStreamReader 045 * @since 2.0 046 */ 047public class XmlStreamWriter extends Writer { 048 049 /** 050 * Builds a new {@link XmlStreamWriter} instance. 051 * <p> 052 * For example: 053 * </p> 054 * <pre>{@code 055 * WriterOutputStream w = WriterOutputStream.builder() 056 * .setPath(path) 057 * .setCharset(StandardCharsets.UTF_8) 058 * .get();} 059 * </pre> 060 * 061 * @since 2.12.0 062 */ 063 public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> { 064 065 /** 066 * Constructs a new Builder. 067 */ 068 public Builder() { 069 setCharsetDefault(StandardCharsets.UTF_8); 070 setCharset(StandardCharsets.UTF_8); 071 } 072 073 /** 074 * Constructs a new instance. 075 * <p> 076 * This builder use the aspect OutputStream, OpenOption[], and Charset. 077 * </p> 078 * <p> 079 * You must provide an origin that can be converted to an OutputStream by this builder, otherwise, this call will throw an 080 * {@link UnsupportedOperationException}. 081 * </p> 082 * 083 * @return a new instance. 084 * @throws UnsupportedOperationException if the origin cannot provide an OutputStream. 085 * @throws IOException if an I/O error occurs. 086 * @see #getOutputStream() 087 */ 088 @SuppressWarnings("resource") 089 @Override 090 public XmlStreamWriter get() throws IOException { 091 return new XmlStreamWriter(getOutputStream(), getCharset()); 092 } 093 094 } 095 096 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 097 098 /** 099 * Constructs a new {@link Builder}. 100 * 101 * @return a new {@link Builder}. 102 * @since 2.12.0 103 */ 104 public static Builder builder() { 105 return new Builder(); 106 } 107 108 private final OutputStream out; 109 110 private final Charset defaultCharset; 111 112 private StringWriter prologWriter = new StringWriter(BUFFER_SIZE); 113 114 private Writer writer; 115 116 private Charset charset; 117 118 /** 119 * Constructs a new XML stream writer for the specified file 120 * with a default encoding of UTF-8. 121 * 122 * @param file The file to write to 123 * @throws FileNotFoundException if there is an error creating or 124 * opening the file 125 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 126 */ 127 @Deprecated 128 public XmlStreamWriter(final File file) throws FileNotFoundException { 129 this(file, null); 130 } 131 132 /** 133 * Constructs a new XML stream writer for the specified file 134 * with the specified default encoding. 135 * 136 * @param file The file to write to 137 * @param defaultEncoding The default encoding if not encoding could be detected 138 * @throws FileNotFoundException if there is an error creating or 139 * opening the file 140 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 141 */ 142 @Deprecated 143 @SuppressWarnings("resource") 144 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 145 this(new FileOutputStream(file), defaultEncoding); 146 } 147 148 /** 149 * Constructs a new XML stream writer for the specified output stream 150 * with a default encoding of UTF-8. 151 * 152 * @param out The output stream 153 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 154 */ 155 @Deprecated 156 public XmlStreamWriter(final OutputStream out) { 157 this(out, StandardCharsets.UTF_8); 158 } 159 160 /** 161 * Constructs a new XML stream writer for the specified output stream 162 * with the specified default encoding. 163 * 164 * @param out The output stream 165 * @param defaultEncoding The default encoding if not encoding could be detected 166 */ 167 private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) { 168 this.out = out; 169 this.defaultCharset = Objects.requireNonNull(defaultEncoding); 170 } 171 172 /** 173 * Constructs a new XML stream writer for the specified output stream 174 * with the specified default encoding. 175 * 176 * @param out The output stream 177 * @param defaultEncoding The default encoding if not encoding could be detected 178 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 179 */ 180 @Deprecated 181 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 182 this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8)); 183 } 184 185 /** 186 * Closes the underlying writer. 187 * 188 * @throws IOException if an error occurs closing the underlying writer 189 */ 190 @Override 191 public void close() throws IOException { 192 if (writer == null) { 193 charset = defaultCharset; 194 writer = new OutputStreamWriter(out, charset); 195 writer.write(prologWriter.toString()); 196 } 197 writer.close(); 198 } 199 200 /** 201 * Detects the encoding. 202 * 203 * @param cbuf the buffer to write the characters from 204 * @param off The start offset 205 * @param len The number of characters to write 206 * @throws IOException if an error occurs detecting the encoding 207 */ 208 private void detectEncoding(final char[] cbuf, final int off, final int len) 209 throws IOException { 210 int size = len; 211 final StringBuffer xmlProlog = prologWriter.getBuffer(); 212 if (xmlProlog.length() + len > BUFFER_SIZE) { 213 size = BUFFER_SIZE - xmlProlog.length(); 214 } 215 prologWriter.write(cbuf, off, size); 216 217 // try to determine encoding 218 if (xmlProlog.length() >= 5) { 219 if (xmlProlog.substring(0, 5).equals("<?xml")) { 220 // try to extract encoding from XML prolog 221 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 222 if (xmlPrologEnd > 0) { 223 // ok, full XML prolog written: let's extract encoding 224 final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0, 225 xmlPrologEnd)); 226 if (m.find()) { 227 final String encName = m.group(1).toUpperCase(Locale.ROOT); 228 charset = Charset.forName(encName.substring(1, encName.length() - 1)); 229 } else { 230 // no encoding found in XML prolog: using default 231 // encoding 232 charset = defaultCharset; 233 } 234 } else if (xmlProlog.length() >= BUFFER_SIZE) { 235 // no encoding found in first characters: using default 236 // encoding 237 charset = defaultCharset; 238 } 239 } else { 240 // no XML prolog: using default encoding 241 charset = defaultCharset; 242 } 243 if (charset != null) { 244 // encoding has been chosen: let's do it 245 prologWriter = null; 246 writer = new OutputStreamWriter(out, charset); 247 writer.write(xmlProlog.toString()); 248 if (len > size) { 249 writer.write(cbuf, off + size, len - size); 250 } 251 } 252 } 253 } 254 255 /** 256 * Flushes the underlying writer. 257 * 258 * @throws IOException if an error occurs flushing the underlying writer 259 */ 260 @Override 261 public void flush() throws IOException { 262 if (writer != null) { 263 writer.flush(); 264 } 265 } 266 267 /** 268 * Returns the default encoding. 269 * 270 * @return the default encoding 271 */ 272 public String getDefaultEncoding() { 273 return defaultCharset.name(); 274 } 275 276 /** 277 * Returns the detected encoding. 278 * 279 * @return the detected encoding 280 */ 281 public String getEncoding() { 282 return charset.name(); 283 } 284 285 /** 286 * Writes the characters to the underlying writer, detecting encoding. 287 * 288 * @param cbuf the buffer to write the characters from 289 * @param off The start offset 290 * @param len The number of characters to write 291 * @throws IOException if an error occurs detecting the encoding 292 */ 293 @Override 294 public void write(final char[] cbuf, final int off, final int len) throws IOException { 295 if (prologWriter != null) { 296 detectEncoding(cbuf, off, len); 297 } else { 298 writer.write(cbuf, off, len); 299 } 300 } 301}