ObjectChecker.java
/*
* Copyright (C) 2008-2010, Google Inc.
* Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.lib;
import static org.eclipse.jgit.lib.Constants.DOT_GIT_MODULES;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_DATE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_EMAIL;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_OBJECT_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_PARENT_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TIMEZONE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TREE_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_UTF8;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.DUPLICATE_ENTRIES;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.EMPTY_NAME;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.FULL_PATHNAME;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTDOT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTGIT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_AUTHOR;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_COMMITTER;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_EMAIL;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_OBJECT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_SPACE_BEFORE_DATE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TAG_ENTRY;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TREE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TYPE_ENTRY;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.NULL_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.TREE_NOT_SORTED;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.UNKNOWN_TYPE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.WIN32_BAD_NAME;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.ZERO_PADDED_FILEMODE;
import static org.eclipse.jgit.util.Paths.compare;
import static org.eclipse.jgit.util.Paths.compareSameName;
import static org.eclipse.jgit.util.RawParseUtils.nextLF;
import static org.eclipse.jgit.util.RawParseUtils.parseBase10;
import java.text.MessageFormat;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.util.MutableInteger;
import org.eclipse.jgit.util.RawParseUtils;
import org.eclipse.jgit.util.StringUtils;
/**
* Verifies that an object is formatted correctly.
* <p>
* Verifications made by this class only check that the fields of an object are
* formatted correctly. The ObjectId checksum of the object is not verified, and
* connectivity links between objects are also not verified. Its assumed that
* the caller can provide both of these validations on its own.
* <p>
* Instances of this class are not thread safe, but they may be reused to
* perform multiple object validations, calling {@link #reset()} between them to
* clear the internal state (e.g. {@link #getGitsubmodules()})
*/
public class ObjectChecker {
/** Header "tree " */
public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$
/** Header "parent " */
public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$
/** Header "author " */
public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$
/** Header "committer " */
public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$
/** Header "encoding " */
public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$
/** Header "object " */
public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$
/** Header "type " */
public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$
/** Header "tag " */
public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$
/** Header "tagger " */
public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$
/** Path ".gitmodules" */
private static final byte[] dotGitmodules = Constants.encodeASCII(DOT_GIT_MODULES);
/**
* Potential issues identified by the checker.
*
* @since 4.2
*/
public enum ErrorType {
// @formatter:off
// These names match git-core so that fsck section keys also match.
/***/ NULL_SHA1,
/***/ DUPLICATE_ENTRIES,
/***/ TREE_NOT_SORTED,
/***/ ZERO_PADDED_FILEMODE,
/***/ EMPTY_NAME,
/***/ FULL_PATHNAME,
/***/ HAS_DOT,
/***/ HAS_DOTDOT,
/***/ HAS_DOTGIT,
/***/ BAD_OBJECT_SHA1,
/***/ BAD_PARENT_SHA1,
/***/ BAD_TREE_SHA1,
/***/ MISSING_AUTHOR,
/***/ MISSING_COMMITTER,
/***/ MISSING_OBJECT,
/***/ MISSING_TREE,
/***/ MISSING_TYPE_ENTRY,
/***/ MISSING_TAG_ENTRY,
/***/ BAD_DATE,
/***/ BAD_EMAIL,
/***/ BAD_TIMEZONE,
/***/ MISSING_EMAIL,
/***/ MISSING_SPACE_BEFORE_DATE,
/** @since 5.2 */ GITMODULES_BLOB,
/** @since 5.2 */ GITMODULES_LARGE,
/** @since 5.2 */ GITMODULES_NAME,
/** @since 5.2 */ GITMODULES_PARSE,
/** @since 5.2 */ GITMODULES_PATH,
/** @since 5.2 */ GITMODULES_SYMLINK,
/** @since 5.2 */ GITMODULES_URL,
/***/ UNKNOWN_TYPE,
// These are unique to JGit.
/***/ WIN32_BAD_NAME,
/***/ BAD_UTF8;
// @formatter:on
/** @return camelCaseVersion of the name. */
public String getMessageId() {
String n = name();
StringBuilder r = new StringBuilder(n.length());
for (int i = 0; i < n.length(); i++) {
char c = n.charAt(i);
if (c != '_') {
r.append(StringUtils.toLowerCase(c));
} else {
r.append(n.charAt(++i));
}
}
return r.toString();
}
}
private final MutableObjectId tempId = new MutableObjectId();
private final MutableInteger bufPtr = new MutableInteger();
private EnumSet<ErrorType> errors = EnumSet.allOf(ErrorType.class);
private ObjectIdSet skipList;
private boolean allowInvalidPersonIdent;
private boolean windows;
private boolean macosx;
private final List<GitmoduleEntry> gitsubmodules = new ArrayList<>();
/**
* Enable accepting specific malformed (but not horribly broken) objects.
*
* @param objects
* collection of object names known to be broken in a non-fatal
* way that should be ignored by the checker.
* @return {@code this}
* @since 4.2
*/
public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
skipList = objects;
return this;
}
/**
* Configure error types to be ignored across all objects.
*
* @param ids
* error types to ignore. The caller's set is copied.
* @return {@code this}
* @since 4.2
*/
public ObjectChecker setIgnore(@Nullable Set<ErrorType> ids) {
errors = EnumSet.allOf(ErrorType.class);
if (ids != null) {
errors.removeAll(ids);
}
return this;
}
/**
* Add message type to be ignored across all objects.
*
* @param id
* error type to ignore.
* @param ignore
* true to ignore this error; false to treat the error as an
* error and throw.
* @return {@code this}
* @since 4.2
*/
public ObjectChecker setIgnore(ErrorType id, boolean ignore) {
if (ignore) {
errors.remove(id);
} else {
errors.add(id);
}
return this;
}
/**
* Enable accepting leading zero mode in tree entries.
* <p>
* Some broken Git libraries generated leading zeros in the mode part of
* tree entries. This is technically incorrect but gracefully allowed by
* git-core. JGit rejects such trees by default, but may need to accept
* them on broken histories.
* <p>
* Same as {@code setIgnore(ZERO_PADDED_FILEMODE, allow)}.
*
* @param allow allow leading zero mode.
* @return {@code this}.
* @since 3.4
*/
public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
return setIgnore(ZERO_PADDED_FILEMODE, allow);
}
/**
* Enable accepting invalid author, committer and tagger identities.
* <p>
* Some broken Git versions/libraries allowed users to create commits and
* tags with invalid formatting between the name, email and timestamp.
*
* @param allow
* if true accept invalid person identity strings.
* @return {@code this}.
* @since 4.0
*/
public ObjectChecker setAllowInvalidPersonIdent(boolean allow) {
allowInvalidPersonIdent = allow;
return this;
}
/**
* Restrict trees to only names legal on Windows platforms.
* <p>
* Also rejects any mixed case forms of reserved names ({@code .git}).
*
* @param win true if Windows name checking should be performed.
* @return {@code this}.
* @since 3.4
*/
public ObjectChecker setSafeForWindows(boolean win) {
windows = win;
return this;
}
/**
* Restrict trees to only names legal on Mac OS X platforms.
* <p>
* Rejects any mixed case forms of reserved names ({@code .git})
* for users working on HFS+ in case-insensitive (default) mode.
*
* @param mac true if Mac OS X name checking should be performed.
* @return {@code this}.
* @since 3.4
*/
public ObjectChecker setSafeForMacOS(boolean mac) {
macosx = mac;
return this;
}
/**
* Check an object for parsing errors.
*
* @param objType
* type of the object. Must be a valid object type code in
* {@link org.eclipse.jgit.lib.Constants}.
* @param raw
* the raw data which comprises the object. This should be in the
* canonical format (that is the format used to generate the
* ObjectId of the object). The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if an error is identified.
*/
public void check(int objType, byte[] raw)
throws CorruptObjectException {
check(idFor(objType, raw), objType, raw);
}
/**
* Check an object for parsing errors.
*
* @param id
* identify of the object being checked.
* @param objType
* type of the object. Must be a valid object type code in
* {@link org.eclipse.jgit.lib.Constants}.
* @param raw
* the raw data which comprises the object. This should be in the
* canonical format (that is the format used to generate the
* ObjectId of the object). The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if an error is identified.
* @since 4.2
*/
public void check(@Nullable AnyObjectId id, int objType, byte[] raw)
throws CorruptObjectException {
switch (objType) {
case OBJ_COMMIT:
checkCommit(id, raw);
break;
case OBJ_TAG:
checkTag(id, raw);
break;
case OBJ_TREE:
checkTree(id, raw);
break;
case OBJ_BLOB:
BlobObjectChecker checker = newBlobObjectChecker();
if (checker == null) {
checkBlob(raw);
} else {
checker.update(raw, 0, raw.length);
checker.endBlob(id);
}
break;
default:
report(UNKNOWN_TYPE, id, MessageFormat.format(
JGitText.get().corruptObjectInvalidType2,
Integer.valueOf(objType)));
}
}
private boolean checkId(byte[] raw) {
int p = bufPtr.value;
try {
tempId.fromString(raw, p);
} catch (IllegalArgumentException e) {
bufPtr.value = nextLF(raw, p);
return false;
}
p += OBJECT_ID_STRING_LENGTH;
if (raw[p] == '\n') {
bufPtr.value = p + 1;
return true;
}
bufPtr.value = nextLF(raw, p);
return false;
}
private void checkPersonIdent(byte[] raw, @Nullable AnyObjectId id)
throws CorruptObjectException {
if (allowInvalidPersonIdent) {
bufPtr.value = nextLF(raw, bufPtr.value);
return;
}
final int emailB = nextLF(raw, bufPtr.value, '<');
if (emailB == bufPtr.value || raw[emailB - 1] != '<') {
report(MISSING_EMAIL, id, JGitText.get().corruptObjectMissingEmail);
bufPtr.value = nextLF(raw, bufPtr.value);
return;
}
final int emailE = nextLF(raw, emailB, '>');
if (emailE == emailB || raw[emailE - 1] != '>') {
report(BAD_EMAIL, id, JGitText.get().corruptObjectBadEmail);
bufPtr.value = nextLF(raw, bufPtr.value);
return;
}
if (emailE == raw.length || raw[emailE] != ' ') {
report(MISSING_SPACE_BEFORE_DATE, id,
JGitText.get().corruptObjectBadDate);
bufPtr.value = nextLF(raw, bufPtr.value);
return;
}
parseBase10(raw, emailE + 1, bufPtr); // when
if (emailE + 1 == bufPtr.value || bufPtr.value == raw.length
|| raw[bufPtr.value] != ' ') {
report(BAD_DATE, id, JGitText.get().corruptObjectBadDate);
bufPtr.value = nextLF(raw, bufPtr.value);
return;
}
int p = bufPtr.value + 1;
parseBase10(raw, p, bufPtr); // tz offset
if (p == bufPtr.value) {
report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
bufPtr.value = nextLF(raw, bufPtr.value);
return;
}
p = bufPtr.value;
if (raw[p] == '\n') {
bufPtr.value = p + 1;
} else {
report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
bufPtr.value = nextLF(raw, p);
}
}
/**
* Check a commit for errors.
*
* @param raw
* the commit data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
*/
public void checkCommit(byte[] raw) throws CorruptObjectException {
checkCommit(idFor(OBJ_COMMIT, raw), raw);
}
/**
* Check a commit for errors.
*
* @param id
* identity of the object being checked.
* @param raw
* the commit data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
* @since 4.2
*/
public void checkCommit(@Nullable AnyObjectId id, byte[] raw)
throws CorruptObjectException {
bufPtr.value = 0;
if (!match(raw, tree)) {
report(MISSING_TREE, id, JGitText.get().corruptObjectNotreeHeader);
} else if (!checkId(raw)) {
report(BAD_TREE_SHA1, id, JGitText.get().corruptObjectInvalidTree);
}
while (match(raw, parent)) {
if (!checkId(raw)) {
report(BAD_PARENT_SHA1, id,
JGitText.get().corruptObjectInvalidParent);
}
}
if (match(raw, author)) {
checkPersonIdent(raw, id);
} else {
report(MISSING_AUTHOR, id, JGitText.get().corruptObjectNoAuthor);
}
if (match(raw, committer)) {
checkPersonIdent(raw, id);
} else {
report(MISSING_COMMITTER, id,
JGitText.get().corruptObjectNoCommitter);
}
}
/**
* Check an annotated tag for errors.
*
* @param raw
* the tag data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
*/
public void checkTag(byte[] raw) throws CorruptObjectException {
checkTag(idFor(OBJ_TAG, raw), raw);
}
/**
* Check an annotated tag for errors.
*
* @param id
* identity of the object being checked.
* @param raw
* the tag data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
* @since 4.2
*/
public void checkTag(@Nullable AnyObjectId id, byte[] raw)
throws CorruptObjectException {
bufPtr.value = 0;
if (!match(raw, object)) {
report(MISSING_OBJECT, id,
JGitText.get().corruptObjectNoObjectHeader);
} else if (!checkId(raw)) {
report(BAD_OBJECT_SHA1, id,
JGitText.get().corruptObjectInvalidObject);
}
if (!match(raw, type)) {
report(MISSING_TYPE_ENTRY, id,
JGitText.get().corruptObjectNoTypeHeader);
}
bufPtr.value = nextLF(raw, bufPtr.value);
if (!match(raw, tag)) {
report(MISSING_TAG_ENTRY, id,
JGitText.get().corruptObjectNoTagHeader);
}
bufPtr.value = nextLF(raw, bufPtr.value);
if (match(raw, tagger)) {
checkPersonIdent(raw, id);
}
}
private static boolean duplicateName(final byte[] raw,
final int thisNamePos, final int thisNameEnd) {
final int sz = raw.length;
int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
for (;;) {
int nextMode = 0;
for (;;) {
if (nextPtr >= sz)
return false;
final byte c = raw[nextPtr++];
if (' ' == c)
break;
nextMode <<= 3;
nextMode += c - '0';
}
final int nextNamePos = nextPtr;
for (;;) {
if (nextPtr == sz)
return false;
final byte c = raw[nextPtr++];
if (c == 0)
break;
}
if (nextNamePos + 1 == nextPtr)
return false;
int cmp = compareSameName(
raw, thisNamePos, thisNameEnd,
raw, nextNamePos, nextPtr - 1, nextMode);
if (cmp < 0)
return false;
else if (cmp == 0)
return true;
nextPtr += Constants.OBJECT_ID_LENGTH;
}
}
/**
* Check a canonical formatted tree for errors.
*
* @param raw
* the raw tree data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
*/
public void checkTree(byte[] raw) throws CorruptObjectException {
checkTree(idFor(OBJ_TREE, raw), raw);
}
/**
* Check a canonical formatted tree for errors.
*
* @param id
* identity of the object being checked.
* @param raw
* the raw tree data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
* @since 4.2
*/
public void checkTree(@Nullable AnyObjectId id, byte[] raw)
throws CorruptObjectException {
final int sz = raw.length;
int ptr = 0;
int lastNameB = 0, lastNameE = 0, lastMode = 0;
Set<String> normalized = windows || macosx
? new HashSet<>()
: null;
while (ptr < sz) {
int thisMode = 0;
for (;;) {
if (ptr == sz) {
throw new CorruptObjectException(
JGitText.get().corruptObjectTruncatedInMode);
}
final byte c = raw[ptr++];
if (' ' == c)
break;
if (c < '0' || c > '7') {
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidModeChar);
}
if (thisMode == 0 && c == '0') {
report(ZERO_PADDED_FILEMODE, id,
JGitText.get().corruptObjectInvalidModeStartsZero);
}
thisMode <<= 3;
thisMode += c - '0';
}
if (FileMode.fromBits(thisMode).getObjectType() == OBJ_BAD) {
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().corruptObjectInvalidMode2,
Integer.valueOf(thisMode)));
}
final int thisNameB = ptr;
ptr = scanPathSegment(raw, ptr, sz, id);
if (ptr == sz || raw[ptr] != 0) {
throw new CorruptObjectException(
JGitText.get().corruptObjectTruncatedInName);
}
checkPathSegment2(raw, thisNameB, ptr, id);
if (normalized != null) {
if (!normalized.add(normalize(raw, thisNameB, ptr))) {
report(DUPLICATE_ENTRIES, id,
JGitText.get().corruptObjectDuplicateEntryNames);
}
} else if (duplicateName(raw, thisNameB, ptr)) {
report(DUPLICATE_ENTRIES, id,
JGitText.get().corruptObjectDuplicateEntryNames);
}
if (lastNameB != 0) {
int cmp = compare(
raw, lastNameB, lastNameE, lastMode,
raw, thisNameB, ptr, thisMode);
if (cmp > 0) {
report(TREE_NOT_SORTED, id,
JGitText.get().corruptObjectIncorrectSorting);
}
}
lastNameB = thisNameB;
lastNameE = ptr;
lastMode = thisMode;
ptr += 1 + OBJECT_ID_LENGTH;
if (ptr > sz) {
throw new CorruptObjectException(
JGitText.get().corruptObjectTruncatedInObjectId);
}
if (ObjectId.zeroId().compareTo(raw, ptr - OBJECT_ID_LENGTH) == 0) {
report(NULL_SHA1, id, JGitText.get().corruptObjectZeroId);
}
if (id != null && isGitmodules(raw, lastNameB, lastNameE, id)) {
ObjectId blob = ObjectId.fromRaw(raw, ptr - OBJECT_ID_LENGTH);
gitsubmodules.add(new GitmoduleEntry(id, blob));
}
}
}
private int scanPathSegment(byte[] raw, int ptr, int end,
@Nullable AnyObjectId id) throws CorruptObjectException {
for (; ptr < end; ptr++) {
byte c = raw[ptr];
if (c == 0) {
return ptr;
}
if (c == '/') {
report(FULL_PATHNAME, id,
JGitText.get().corruptObjectNameContainsSlash);
}
if (windows && isInvalidOnWindows(c)) {
if (c > 31) {
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectNameContainsChar,
Byte.valueOf(c)));
}
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectNameContainsByte,
Integer.valueOf(c & 0xff)));
}
}
return ptr;
}
@Nullable
private ObjectId idFor(int objType, byte[] raw) {
if (skipList != null) {
try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
return fmt.idFor(objType, raw);
}
}
return null;
}
private void report(@NonNull ErrorType err, @Nullable AnyObjectId id,
String why) throws CorruptObjectException {
if (errors.contains(err)
&& (id == null || skipList == null || !skipList.contains(id))) {
if (id != null) {
throw new CorruptObjectException(err, id, why);
}
throw new CorruptObjectException(why);
}
}
/**
* Check tree path entry for validity.
* <p>
* Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
* multi-directory path string such as {@code "src/main.c"}.
*
* @param path
* path string to scan.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* path is invalid.
* @since 3.6
*/
public void checkPath(String path) throws CorruptObjectException {
byte[] buf = Constants.encode(path);
checkPath(buf, 0, buf.length);
}
/**
* Check tree path entry for validity.
* <p>
* Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
* multi-directory path string such as {@code "src/main.c"}.
*
* @param raw
* buffer to scan.
* @param ptr
* offset to first byte of the name.
* @param end
* offset to one past last byte of name.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* path is invalid.
* @since 3.6
*/
public void checkPath(byte[] raw, int ptr, int end)
throws CorruptObjectException {
int start = ptr;
for (; ptr < end; ptr++) {
if (raw[ptr] == '/') {
checkPathSegment(raw, start, ptr);
start = ptr + 1;
}
}
checkPathSegment(raw, start, end);
}
/**
* Check tree path entry for validity.
*
* @param raw
* buffer to scan.
* @param ptr
* offset to first byte of the name.
* @param end
* offset to one past last byte of name.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* name is invalid.
* @since 3.4
*/
public void checkPathSegment(byte[] raw, int ptr, int end)
throws CorruptObjectException {
int e = scanPathSegment(raw, ptr, end, null);
if (e < end && raw[e] == 0)
throw new CorruptObjectException(
JGitText.get().corruptObjectNameContainsNullByte);
checkPathSegment2(raw, ptr, end, null);
}
private void checkPathSegment2(byte[] raw, int ptr, int end,
@Nullable AnyObjectId id) throws CorruptObjectException {
if (ptr == end) {
report(EMPTY_NAME, id, JGitText.get().corruptObjectNameZeroLength);
return;
}
if (raw[ptr] == '.') {
switch (end - ptr) {
case 1:
report(HAS_DOT, id, JGitText.get().corruptObjectNameDot);
break;
case 2:
if (raw[ptr + 1] == '.') {
report(HAS_DOTDOT, id,
JGitText.get().corruptObjectNameDotDot);
}
break;
case 4:
if (isGit(raw, ptr + 1)) {
report(HAS_DOTGIT, id, String.format(
JGitText.get().corruptObjectInvalidName,
RawParseUtils.decode(raw, ptr, end)));
}
break;
default:
if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) {
report(HAS_DOTGIT, id, String.format(
JGitText.get().corruptObjectInvalidName,
RawParseUtils.decode(raw, ptr, end)));
}
}
} else if (isGitTilde1(raw, ptr, end)) {
report(HAS_DOTGIT, id, String.format(
JGitText.get().corruptObjectInvalidName,
RawParseUtils.decode(raw, ptr, end)));
}
if (macosx && isMacHFSGit(raw, ptr, end, id)) {
report(HAS_DOTGIT, id, String.format(
JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
RawParseUtils.decode(raw, ptr, end)));
}
if (windows) {
// Windows ignores space and dot at end of file name.
if (raw[end - 1] == ' ' || raw[end - 1] == '.') {
report(WIN32_BAD_NAME, id, String.format(
JGitText.get().corruptObjectInvalidNameEnd,
Character.valueOf(((char) raw[end - 1]))));
}
if (end - ptr >= 3) {
checkNotWindowsDevice(raw, ptr, end, id);
}
}
}
// Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters
// to ".git" therefore we should prevent such names
private boolean isMacHFSPath(byte[] raw, int ptr, int end, byte[] path,
@Nullable AnyObjectId id) throws CorruptObjectException {
boolean ignorable = false;
int g = 0;
while (ptr < end) {
switch (raw[ptr]) {
case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192
if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
return false;
}
switch (raw[ptr + 1]) {
case (byte) 0x80:
switch (raw[ptr + 2]) {
case (byte) 0x8c: // U+200C 0xe2808c ZERO WIDTH NON-JOINER
case (byte) 0x8d: // U+200D 0xe2808d ZERO WIDTH JOINER
case (byte) 0x8e: // U+200E 0xe2808e LEFT-TO-RIGHT MARK
case (byte) 0x8f: // U+200F 0xe2808f RIGHT-TO-LEFT MARK
case (byte) 0xaa: // U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
case (byte) 0xab: // U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
case (byte) 0xac: // U+202C 0xe280ac POP DIRECTIONAL FORMATTING
case (byte) 0xad: // U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
case (byte) 0xae: // U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
ignorable = true;
ptr += 3;
continue;
default:
return false;
}
case (byte) 0x81:
switch (raw[ptr + 2]) {
case (byte) 0xaa: // U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
case (byte) 0xab: // U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
case (byte) 0xac: // U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
case (byte) 0xad: // U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
case (byte) 0xae: // U+206E 0xe281ae NATIONAL DIGIT SHAPES
case (byte) 0xaf: // U+206F 0xe281af NOMINAL DIGIT SHAPES
ignorable = true;
ptr += 3;
continue;
default:
return false;
}
default:
return false;
}
case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024
if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
return false;
}
// U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
if ((raw[ptr + 1] == (byte) 0xbb)
&& (raw[ptr + 2] == (byte) 0xbf)) {
ignorable = true;
ptr += 3;
continue;
}
return false;
default:
if (g == path.length) {
return false;
}
if (toLower(raw[ptr++]) != path[g++]) {
return false;
}
}
}
if (g == path.length && ignorable) {
return true;
}
return false;
}
private boolean isMacHFSGit(byte[] raw, int ptr, int end,
@Nullable AnyObjectId id) throws CorruptObjectException {
byte[] git = new byte[] { '.', 'g', 'i', 't' };
return isMacHFSPath(raw, ptr, end, git, id);
}
private boolean isMacHFSGitmodules(byte[] raw, int ptr, int end,
@Nullable AnyObjectId id) throws CorruptObjectException {
return isMacHFSPath(raw, ptr, end, dotGitmodules, id);
}
private boolean checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end,
@Nullable AnyObjectId id) throws CorruptObjectException {
if ((ptr + 2) >= end) {
report(BAD_UTF8, id, MessageFormat.format(
JGitText.get().corruptObjectInvalidNameInvalidUtf8,
toHexString(raw, ptr, end)));
return false;
}
return true;
}
private static String toHexString(byte[] raw, int ptr, int end) {
StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$
for (int i = ptr; i < end; i++)
b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$
return b.toString();
}
private void checkNotWindowsDevice(byte[] raw, int ptr, int end,
@Nullable AnyObjectId id) throws CorruptObjectException {
switch (toLower(raw[ptr])) {
case 'a': // AUX
if (end - ptr >= 3
&& toLower(raw[ptr + 1]) == 'u'
&& toLower(raw[ptr + 2]) == 'x'
&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
report(WIN32_BAD_NAME, id,
JGitText.get().corruptObjectInvalidNameAux);
}
break;
case 'c': // CON, COM[1-9]
if (end - ptr >= 3
&& toLower(raw[ptr + 2]) == 'n'
&& toLower(raw[ptr + 1]) == 'o'
&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
report(WIN32_BAD_NAME, id,
JGitText.get().corruptObjectInvalidNameCon);
}
if (end - ptr >= 4
&& toLower(raw[ptr + 2]) == 'm'
&& toLower(raw[ptr + 1]) == 'o'
&& isPositiveDigit(raw[ptr + 3])
&& (end - ptr == 4 || raw[ptr + 4] == '.')) {
report(WIN32_BAD_NAME, id, String.format(
JGitText.get().corruptObjectInvalidNameCom,
Character.valueOf(((char) raw[ptr + 3]))));
}
break;
case 'l': // LPT[1-9]
if (end - ptr >= 4
&& toLower(raw[ptr + 1]) == 'p'
&& toLower(raw[ptr + 2]) == 't'
&& isPositiveDigit(raw[ptr + 3])
&& (end - ptr == 4 || raw[ptr + 4] == '.')) {
report(WIN32_BAD_NAME, id, String.format(
JGitText.get().corruptObjectInvalidNameLpt,
Character.valueOf(((char) raw[ptr + 3]))));
}
break;
case 'n': // NUL
if (end - ptr >= 3
&& toLower(raw[ptr + 1]) == 'u'
&& toLower(raw[ptr + 2]) == 'l'
&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
report(WIN32_BAD_NAME, id,
JGitText.get().corruptObjectInvalidNameNul);
}
break;
case 'p': // PRN
if (end - ptr >= 3
&& toLower(raw[ptr + 1]) == 'r'
&& toLower(raw[ptr + 2]) == 'n'
&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
report(WIN32_BAD_NAME, id,
JGitText.get().corruptObjectInvalidNamePrn);
}
break;
}
}
private static boolean isInvalidOnWindows(byte c) {
// Windows disallows "special" characters in a path component.
switch (c) {
case '"':
case '*':
case ':':
case '<':
case '>':
case '?':
case '\\':
case '|':
return true;
}
return 1 <= c && c <= 31;
}
private static boolean isGit(byte[] buf, int p) {
return toLower(buf[p]) == 'g'
&& toLower(buf[p + 1]) == 'i'
&& toLower(buf[p + 2]) == 't';
}
/**
* Check if the filename contained in buf[start:end] could be read as a
* .gitmodules file when checked out to the working directory.
*
* This ought to be a simple comparison, but some filesystems have peculiar
* rules for normalizing filenames:
*
* NTFS has backward-compatibility support for 8.3 synonyms of long file
* names (see
* https://web.archive.org/web/20160318181041/https://usn.pw/blog/gen/2015/06/09/filenames/
* for details). NTFS is also case-insensitive.
*
* MacOS's HFS+ folds away ignorable Unicode characters in addition to case
* folding.
*
* @param buf
* byte array to decode
* @param start
* position where a supposed filename is starting
* @param end
* position where a supposed filename is ending
* @param id
* object id for error reporting
*
* @return true if the filename in buf could be a ".gitmodules" file
* @throws CorruptObjectException
*/
private boolean isGitmodules(byte[] buf, int start, int end, @Nullable AnyObjectId id)
throws CorruptObjectException {
// Simple cases first.
if (end - start < 8) {
return false;
}
return (end - start == dotGitmodules.length
&& RawParseUtils.match(buf, start, dotGitmodules) != -1)
|| (macosx && isMacHFSGitmodules(buf, start, end, id))
|| (windows && isNTFSGitmodules(buf, start, end));
}
private boolean matchLowerCase(byte[] b, int ptr, byte[] src) {
if (ptr + src.length > b.length) {
return false;
}
for (int i = 0; i < src.length; i++, ptr++) {
if (toLower(b[ptr]) != src[i]) {
return false;
}
}
return true;
}
// .gitmodules, case-insensitive, or an 8.3 abbreviation of the same.
private boolean isNTFSGitmodules(byte[] buf, int start, int end) {
if (end - start == 11) {
return matchLowerCase(buf, start, dotGitmodules);
}
if (end - start != 8) {
return false;
}
// "gitmod" or a prefix of "gi7eba", followed by...
byte[] gitmod = new byte[]{'g', 'i', 't', 'm', 'o', 'd', '~'};
if (matchLowerCase(buf, start, gitmod)) {
start += 6;
} else {
byte[] gi7eba = new byte[]{'g', 'i', '7', 'e', 'b', 'a'};
for (int i = 0; i < gi7eba.length; i++, start++) {
byte c = (byte) toLower(buf[start]);
if (c == '~') {
break;
}
if (c != gi7eba[i]) {
return false;
}
}
}
// ... ~ and a number
if (end - start < 2) {
return false;
}
if (buf[start] != '~') {
return false;
}
start++;
if (buf[start] < '1' || buf[start] > '9') {
return false;
}
start++;
for (; start != end; start++) {
if (buf[start] < '0' || buf[start] > '9') {
return false;
}
}
return true;
}
private static boolean isGitTilde1(byte[] buf, int p, int end) {
if (end - p != 5)
return false;
return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i'
&& toLower(buf[p + 2]) == 't' && buf[p + 3] == '~'
&& buf[p + 4] == '1';
}
private static boolean isNormalizedGit(byte[] raw, int ptr, int end) {
if (isGit(raw, ptr)) {
int dots = 0;
boolean space = false;
int p = end - 1;
for (; (ptr + 2) < p; p--) {
if (raw[p] == '.')
dots++;
else if (raw[p] == ' ')
space = true;
else
break;
}
return p == ptr + 2 && (dots == 1 || space);
}
return false;
}
private boolean match(byte[] b, byte[] src) {
int r = RawParseUtils.match(b, bufPtr.value, src);
if (r < 0) {
return false;
}
bufPtr.value = r;
return true;
}
private static char toLower(byte b) {
if ('A' <= b && b <= 'Z')
return (char) (b + ('a' - 'A'));
return (char) b;
}
private static boolean isPositiveDigit(byte b) {
return '1' <= b && b <= '9';
}
/**
* Create a new {@link org.eclipse.jgit.lib.BlobObjectChecker}.
*
* @return new BlobObjectChecker or null if it's not provided.
* @since 4.9
*/
@Nullable
public BlobObjectChecker newBlobObjectChecker() {
return null;
}
/**
* Check a blob for errors.
*
* <p>
* This may not be called from PackParser in some cases. Use
* {@link #newBlobObjectChecker} instead.
*
* @param raw
* the blob data. The array is never modified.
* @throws org.eclipse.jgit.errors.CorruptObjectException
* if any error was detected.
*/
public void checkBlob(byte[] raw) throws CorruptObjectException {
// We can always assume the blob is valid.
}
private String normalize(byte[] raw, int ptr, int end) {
String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
return macosx ? Normalizer.normalize(n, Normalizer.Form.NFC) : n;
}
/**
* Get the list of ".gitmodules" files found in the pack. For each, report
* its blob id (e.g. to validate its contents) and the tree where it was
* found (e.g. to check if it is in the root)
*
* @return List of pairs of ids {@literal <tree, blob>}.
*
* @since 4.7.5
*/
public List<GitmoduleEntry> getGitsubmodules() {
return gitsubmodules;
}
/**
* Reset the invocation-specific state from this instance. Specifically this
* clears the list of .gitmodules files encountered (see
* {@link #getGitsubmodules()})
*
* Configurations like errors to filter, skip lists or the specified O.S.
* (set via {@link #setSafeForMacOS(boolean)} or
* {@link #setSafeForWindows(boolean)}) are NOT cleared.
*
* @since 5.2
*/
public void reset() {
gitsubmodules.clear();
}
}