QuotedString.java
/*
* Copyright (C) 2008, 2019 Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.util.Arrays;
import org.eclipse.jgit.lib.Constants;
/**
* Utility functions related to quoted string handling.
*/
public abstract class QuotedString {
/** Quoting style that obeys the rules Git applies to file names */
public static final GitPathStyle GIT_PATH = new GitPathStyle(true);
/**
* Quoting style that obeys the rules Git applies to file names when
* {@code core.quotePath = false}.
*
* @since 5.6
*/
public static final QuotedString GIT_PATH_MINIMAL = new GitPathStyle(false);
/**
* Quoting style used by the Bourne shell.
* <p>
* Quotes are unconditionally inserted during {@link #quote(String)}. This
* protects shell meta-characters like <code>$</code> or <code>~</code> from
* being recognized as special.
*/
public static final BourneStyle BOURNE = new BourneStyle();
/** Bourne style, but permits <code>~user</code> at the start of the string. */
public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
/**
* Quote an input string by the quoting rules.
* <p>
* If the input string does not require any quoting, the same String
* reference is returned to the caller.
* <p>
* Otherwise a quoted string is returned, including the opening and closing
* quotation marks at the start and end of the string. If the style does not
* permit raw Unicode characters then the string will first be encoded in
* UTF-8, with unprintable sequences possibly escaped by the rules.
*
* @param in
* any non-null Unicode string.
* @return a quoted string. See above for details.
*/
public abstract String quote(String in);
/**
* Clean a previously quoted input, decoding the result via UTF-8.
* <p>
* This method must match quote such that:
*
* <pre>
* a.equals(dequote(quote(a)));
* </pre>
*
* is true for any <code>a</code>.
*
* @param in
* a Unicode string to remove quoting from.
* @return the cleaned string.
* @see #dequote(byte[], int, int)
*/
public String dequote(String in) {
final byte[] b = Constants.encode(in);
return dequote(b, 0, b.length);
}
/**
* Decode a previously quoted input, scanning a UTF-8 encoded buffer.
* <p>
* This method must match quote such that:
*
* <pre>
* a.equals(dequote(Constants.encode(quote(a))));
* </pre>
*
* is true for any <code>a</code>.
* <p>
* This method removes any opening/closing quotation marks added by
* {@link #quote(String)}.
*
* @param in
* the input buffer to parse.
* @param offset
* first position within <code>in</code> to scan.
* @param end
* one position past in <code>in</code> to scan.
* @return the cleaned string.
*/
public abstract String dequote(byte[] in, int offset, int end);
/**
* Quoting style used by the Bourne shell.
* <p>
* Quotes are unconditionally inserted during {@link #quote(String)}. This
* protects shell meta-characters like <code>$</code> or <code>~</code> from
* being recognized as special.
*/
public static class BourneStyle extends QuotedString {
@Override
public String quote(String in) {
final StringBuilder r = new StringBuilder();
r.append('\'');
int start = 0, i = 0;
for (; i < in.length(); i++) {
switch (in.charAt(i)) {
case '\'':
case '!':
r.append(in, start, i);
r.append('\'');
r.append('\\');
r.append(in.charAt(i));
r.append('\'');
start = i + 1;
break;
}
}
r.append(in, start, i);
r.append('\'');
return r.toString();
}
@Override
public String dequote(byte[] in, int ip, int ie) {
boolean inquote = false;
final byte[] r = new byte[ie - ip];
int rPtr = 0;
while (ip < ie) {
final byte b = in[ip++];
switch (b) {
case '\'':
inquote = !inquote;
continue;
case '\\':
if (inquote || ip == ie)
r[rPtr++] = b; // literal within a quote
else
r[rPtr++] = in[ip++];
continue;
default:
r[rPtr++] = b;
continue;
}
}
return RawParseUtils.decode(UTF_8, r, 0, rPtr);
}
}
/** Bourne style, but permits <code>~user</code> at the start of the string. */
public static class BourneUserPathStyle extends BourneStyle {
@Override
public String quote(String in) {
if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
// If the string is just "~user" we can assume they
// mean "~user/".
//
return in + "/"; //$NON-NLS-1$
}
if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
// If the string is of "~/path" or "~user/path"
// we must not escape ~/ or ~user/ from the shell.
//
final int i = in.indexOf('/') + 1;
if (i == in.length())
return in;
return in.substring(0, i) + super.quote(in.substring(i));
}
return super.quote(in);
}
}
/** Quoting style that obeys the rules Git applies to file names */
public static final class GitPathStyle extends QuotedString {
private static final byte[] quote;
static {
quote = new byte[128];
Arrays.fill(quote, (byte) -1);
for (int i = '0'; i <= '9'; i++)
quote[i] = 0;
for (int i = 'a'; i <= 'z'; i++)
quote[i] = 0;
for (int i = 'A'; i <= 'Z'; i++)
quote[i] = 0;
quote[' '] = 0;
quote['$'] = 0;
quote['%'] = 0;
quote['&'] = 0;
quote['*'] = 0;
quote['+'] = 0;
quote[','] = 0;
quote['-'] = 0;
quote['.'] = 0;
quote['/'] = 0;
quote[':'] = 0;
quote[';'] = 0;
quote['='] = 0;
quote['?'] = 0;
quote['@'] = 0;
quote['_'] = 0;
quote['^'] = 0;
quote['|'] = 0;
quote['~'] = 0;
quote['\u0007'] = 'a';
quote['\b'] = 'b';
quote['\f'] = 'f';
quote['\n'] = 'n';
quote['\r'] = 'r';
quote['\t'] = 't';
quote['\u000B'] = 'v';
quote['\\'] = '\\';
quote['"'] = '"';
}
private final boolean quoteHigh;
@Override
public String quote(String instr) {
if (instr.isEmpty()) {
return "\"\""; //$NON-NLS-1$
}
boolean reuse = true;
final byte[] in = Constants.encode(instr);
final byte[] out = new byte[4 * in.length + 2];
int o = 0;
out[o++] = '"';
for (byte element : in) {
final int c = element & 0xff;
if (c < quote.length) {
final byte style = quote[c];
if (style == 0) {
out[o++] = (byte) c;
continue;
}
if (style > 0) {
reuse = false;
out[o++] = '\\';
out[o++] = style;
continue;
}
} else if (!quoteHigh) {
out[o++] = (byte) c;
continue;
}
reuse = false;
out[o++] = '\\';
out[o++] = (byte) (((c >> 6) & 03) + '0');
out[o++] = (byte) (((c >> 3) & 07) + '0');
out[o++] = (byte) (((c >> 0) & 07) + '0');
}
if (reuse) {
return instr;
}
out[o++] = '"';
return new String(out, 0, o, UTF_8);
}
@Override
public String dequote(byte[] in, int inPtr, int inEnd) {
if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
return dq(in, inPtr + 1, inEnd - 1);
return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
}
private static String dq(byte[] in, int inPtr, int inEnd) {
final byte[] r = new byte[inEnd - inPtr];
int rPtr = 0;
while (inPtr < inEnd) {
final byte b = in[inPtr++];
if (b != '\\') {
r[rPtr++] = b;
continue;
}
if (inPtr == inEnd) {
// Lone trailing backslash. Treat it as a literal.
//
r[rPtr++] = '\\';
break;
}
switch (in[inPtr++]) {
case 'a':
r[rPtr++] = 0x07 /* \a = BEL */;
continue;
case 'b':
r[rPtr++] = '\b';
continue;
case 'f':
r[rPtr++] = '\f';
continue;
case 'n':
r[rPtr++] = '\n';
continue;
case 'r':
r[rPtr++] = '\r';
continue;
case 't':
r[rPtr++] = '\t';
continue;
case 'v':
r[rPtr++] = 0x0B/* \v = VT */;
continue;
case '\\':
case '"':
r[rPtr++] = in[inPtr - 1];
continue;
case '0':
case '1':
case '2':
case '3': {
int cp = in[inPtr - 1] - '0';
for (int n = 1; n < 3 && inPtr < inEnd; n++) {
final byte c = in[inPtr];
if ('0' <= c && c <= '7') {
cp <<= 3;
cp |= c - '0';
inPtr++;
} else {
break;
}
}
r[rPtr++] = (byte) cp;
continue;
}
default:
// Any other code is taken literally.
//
r[rPtr++] = '\\';
r[rPtr++] = in[inPtr - 1];
continue;
}
}
return RawParseUtils.decode(UTF_8, r, 0, rPtr);
}
private GitPathStyle(boolean doQuote) {
quoteHigh = doQuote;
}
}
}