RevCommit.java
/*
* Copyright (C) 2008-2009, Google Inc.
* Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.revwalk;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.util.RawParseUtils;
import org.eclipse.jgit.util.StringUtils;
/**
* A commit reference to a commit in the DAG.
*
* The state of the RevCommit isn't populated until the commit is parsed. The
* newly created RevCommit is unparsed and only has an objectId reference. Other
* states like parents, trees, commit ident, commit message, etc. are
* populated/available when the commit is parsed.
*/
public class RevCommit extends RevObject {
private static final int STACK_DEPTH = 500;
/**
* Parse a commit from its canonical format.
*
* This method constructs a temporary revision pool, parses the commit as
* supplied, and returns it to the caller. Since the commit was built inside
* of a private revision pool its parent pointers will be initialized, but
* will not have their headers loaded.
*
* Applications are discouraged from using this API. Callers usually need
* more than one commit. Use
* {@link org.eclipse.jgit.revwalk.RevWalk#parseCommit(AnyObjectId)} to
* obtain a RevCommit from an existing repository.
*
* @param raw
* the canonical formatted commit to be parsed.
* @return the parsed commit, in an isolated revision pool that is not
* available to the caller.
*/
public static RevCommit parse(byte[] raw) {
try {
return parse(new RevWalk((ObjectReader) null), raw);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
/**
* Parse a commit from its canonical format.
* <p>
* This method inserts the commit directly into the caller supplied revision
* pool, making it appear as though the commit exists in the repository,
* even if it doesn't. The repository under the pool is not affected.
* <p>
* The body of the commit (message, author, committer) is always retained in
* the returned {@code RevCommit}, even if the supplied {@code RevWalk} has
* been configured with {@code setRetainBody(false)}.
*
* @param rw
* the revision pool to allocate the commit within. The commit's
* tree and parent pointers will be obtained from this pool.
* @param raw
* the canonical formatted commit to be parsed. This buffer will
* be retained by the returned {@code RevCommit} and must not be
* modified by the caller.
* @return the parsed commit, in an isolated revision pool that is not
* available to the caller.
* @throws java.io.IOException
* in case of RevWalk initialization fails
*/
public static RevCommit parse(RevWalk rw, byte[] raw) throws IOException {
try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
RevCommit r = rw.lookupCommit(fmt.idFor(Constants.OBJ_COMMIT, raw));
r.parseCanonical(rw, raw);
r.buffer = raw;
return r;
}
}
static final RevCommit[] NO_PARENTS = {};
private RevTree tree;
/**
* Avoid accessing this field directly. Use method
* {@link RevCommit#getParents()} instead. RevCommit does not allow parents
* to be overridden and altering parent(s) is not supported.
*
* @since 6.3
*/
protected RevCommit[] parents;
int commitTime; // An int here for performance, overflows in 2038
int inDegree;
private byte[] buffer;
/**
* Create a new commit reference.
*
* @param id
* object name for the commit.
*/
protected RevCommit(AnyObjectId id) {
super(id);
}
@Override
void parseHeaders(RevWalk walk) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
parseCanonical(walk, walk.getCachedBytes(this));
}
@Override
void parseBody(RevWalk walk) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
if (buffer == null) {
buffer = walk.getCachedBytes(this);
if ((flags & PARSED) == 0)
parseCanonical(walk, buffer);
}
}
void parseCanonical(RevWalk walk, byte[] raw) throws IOException {
if (!walk.shallowCommitsInitialized) {
walk.initializeShallowCommits(this);
}
final MutableObjectId idBuffer = walk.idBuffer;
idBuffer.fromString(raw, 5);
tree = walk.lookupTree(idBuffer);
int ptr = 46;
if (getParents() == null) {
RevCommit[] pList = new RevCommit[1];
int nParents = 0;
for (;;) {
if (raw[ptr] != 'p') {
break;
}
idBuffer.fromString(raw, ptr + 7);
final RevCommit p = walk.lookupCommit(idBuffer);
switch (nParents) {
case 0:
pList[nParents++] = p;
break;
case 1:
pList = new RevCommit[] { pList[0], p };
nParents = 2;
break;
default:
if (pList.length <= nParents) {
RevCommit[] old = pList;
pList = new RevCommit[pList.length + 32];
System.arraycopy(old, 0, pList, 0, nParents);
}
pList[nParents++] = p;
break;
}
ptr += 48;
}
if (nParents != pList.length) {
RevCommit[] old = pList;
pList = new RevCommit[nParents];
System.arraycopy(old, 0, pList, 0, nParents);
}
parents = pList;
}
// extract time from "committer "
ptr = RawParseUtils.committer(raw, ptr);
if (ptr > 0) {
ptr = RawParseUtils.nextLF(raw, ptr, '>');
// In 2038 commitTime will overflow unless it is changed to long.
commitTime = RawParseUtils.parseBase10(raw, ptr, null);
}
if (walk.isRetainBody()) {
buffer = raw;
}
flags |= PARSED;
}
/** {@inheritDoc} */
@Override
public final int getType() {
return Constants.OBJ_COMMIT;
}
static void carryFlags(RevCommit c, int carry) {
FIFORevQueue q = carryFlags1(c, carry, 0);
if (q != null)
slowCarryFlags(q, carry);
}
private static FIFORevQueue carryFlags1(RevCommit c, int carry, int depth) {
for (;;) {
RevCommit[] pList = c.getParents();
if (pList == null || pList.length == 0)
return null;
if (pList.length != 1) {
if (depth == STACK_DEPTH)
return defer(c);
for (int i = 1; i < pList.length; i++) {
RevCommit p = pList[i];
if ((p.flags & carry) == carry)
continue;
p.flags |= carry;
FIFORevQueue q = carryFlags1(p, carry, depth + 1);
if (q != null)
return defer(q, carry, pList, i + 1);
}
}
c = pList[0];
if ((c.flags & carry) == carry)
return null;
c.flags |= carry;
}
}
private static FIFORevQueue defer(RevCommit c) {
FIFORevQueue q = new FIFORevQueue();
q.add(c);
return q;
}
private static FIFORevQueue defer(FIFORevQueue q, int carry,
RevCommit[] pList, int i) {
// In normal case the caller will run pList[0] in a tail recursive
// fashion by updating the variable. However the caller is unwinding
// the stack and will skip that pList[0] execution step.
carryOneStep(q, carry, pList[0]);
// Remaining parents (if any) need to have flags checked and be
// enqueued if they have ancestors.
for (; i < pList.length; i++)
carryOneStep(q, carry, pList[i]);
return q;
}
private static void slowCarryFlags(FIFORevQueue q, int carry) {
// Commits in q have non-null parent arrays and have set all
// flags in carry. This loop finishes copying over the graph.
for (RevCommit c; (c = q.next()) != null;) {
for (RevCommit p : c.getParents())
carryOneStep(q, carry, p);
}
}
private static void carryOneStep(FIFORevQueue q, int carry, RevCommit c) {
if ((c.flags & carry) != carry) {
c.flags |= carry;
if (c.getParents() != null)
q.add(c);
}
}
/**
* Carry a RevFlag set on this commit to its parents.
* <p>
* If this commit is parsed, has parents, and has the supplied flag set on
* it we automatically add it to the parents, grand-parents, and so on until
* an unparsed commit or a commit with no parents is discovered. This
* permits applications to force a flag through the history chain when
* necessary.
*
* @param flag
* the single flag value to carry back onto parents.
*/
public void carry(RevFlag flag) {
final int carry = flags & flag.mask;
if (carry != 0)
carryFlags(this, carry);
}
/**
* Time from the "committer " line of the buffer.
*
* @return commit time
*/
public final int getCommitTime() {
return commitTime;
}
/**
* Get a reference to this commit's tree.
*
* @return tree of this commit.
*/
public final RevTree getTree() {
return tree;
}
/**
* Get the number of parent commits listed in this commit.
*
* @return number of parents; always a positive value but can be 0.
*/
public int getParentCount() {
return parents == null ? 0 : parents.length;
}
/**
* Get the nth parent from this commit's parent list.
*
* @param nth
* parent index to obtain. Must be in the range 0 through
* {@link #getParentCount()}-1.
* @return the specified parent.
* @throws java.lang.ArrayIndexOutOfBoundsException
* an invalid parent index was specified.
*/
public RevCommit getParent(int nth) {
return parents[nth];
}
/**
* Obtain an array of all parents (<b>NOTE - THIS IS NOT A COPY</b>).
* <p>
* This method is exposed only to provide very fast, efficient access to
* this commit's parent list. Applications relying on this list should be
* very careful to ensure they do not modify its contents during their use
* of it.
*
* @return the array of parents.
*/
public RevCommit[] getParents() {
return parents;
}
/**
* Obtain the raw unparsed commit body (<b>NOTE - THIS IS NOT A COPY</b>).
* <p>
* This method is exposed only to provide very fast, efficient access to
* this commit's message buffer within a RevFilter. Applications relying on
* this buffer should be very careful to ensure they do not modify its
* contents during their use of it.
*
* @return the raw unparsed commit body. This is <b>NOT A COPY</b>. Altering
* the contents of this buffer may alter the walker's knowledge of
* this commit, and the results it produces.
*/
public final byte[] getRawBuffer() {
return buffer;
}
/**
* Parse the gpg signature from the raw buffer.
* <p>
* This method parses and returns the raw content of the gpgsig lines. This
* method is fairly expensive and produces a new byte[] instance on each
* invocation. Callers should invoke this method only if they are certain
* they will need, and should cache the return value for as long as
* necessary to use all information from it.
* <p>
* RevFilter implementations should try to use
* {@link org.eclipse.jgit.util.RawParseUtils} to scan the
* {@link #getRawBuffer()} instead, as this will allow faster evaluation of
* commits.
*
* @return contents of the gpg signature; null if the commit was not signed.
* @since 5.1
*/
public final byte[] getRawGpgSignature() {
final byte[] raw = buffer;
final byte[] header = { 'g', 'p', 'g', 's', 'i', 'g' };
final int start = RawParseUtils.headerStart(header, raw, 0);
if (start < 0) {
return null;
}
final int end = RawParseUtils.headerEnd(raw, start);
return Arrays.copyOfRange(raw, start, end);
}
/**
* Parse the author identity from the raw buffer.
* <p>
* This method parses and returns the content of the author line, after
* taking the commit's character set into account and decoding the author
* name and email address. This method is fairly expensive and produces a
* new PersonIdent instance on each invocation. Callers should invoke this
* method only if they are certain they will be outputting the result, and
* should cache the return value for as long as necessary to use all
* information from it.
* <p>
* RevFilter implementations should try to use
* {@link org.eclipse.jgit.util.RawParseUtils} to scan the
* {@link #getRawBuffer()} instead, as this will allow faster evaluation of
* commits.
*
* @return identity of the author (name, email) and the time the commit was
* made by the author; null if no author line was found.
*/
public final PersonIdent getAuthorIdent() {
final byte[] raw = buffer;
final int nameB = RawParseUtils.author(raw, 0);
if (nameB < 0)
return null;
return RawParseUtils.parsePersonIdent(raw, nameB);
}
/**
* Parse the committer identity from the raw buffer.
* <p>
* This method parses and returns the content of the committer line, after
* taking the commit's character set into account and decoding the committer
* name and email address. This method is fairly expensive and produces a
* new PersonIdent instance on each invocation. Callers should invoke this
* method only if they are certain they will be outputting the result, and
* should cache the return value for as long as necessary to use all
* information from it.
* <p>
* RevFilter implementations should try to use
* {@link org.eclipse.jgit.util.RawParseUtils} to scan the
* {@link #getRawBuffer()} instead, as this will allow faster evaluation of
* commits.
*
* @return identity of the committer (name, email) and the time the commit
* was made by the committer; null if no committer line was found.
*/
public final PersonIdent getCommitterIdent() {
final byte[] raw = buffer;
final int nameB = RawParseUtils.committer(raw, 0);
if (nameB < 0)
return null;
return RawParseUtils.parsePersonIdent(raw, nameB);
}
/**
* Parse the complete commit message and decode it to a string.
* <p>
* This method parses and returns the message portion of the commit buffer,
* after taking the commit's character set into account and decoding the
* buffer using that character set. This method is a fairly expensive
* operation and produces a new string on each invocation.
*
* @return decoded commit message as a string. Never null.
*/
public final String getFullMessage() {
byte[] raw = buffer;
int msgB = RawParseUtils.commitMessage(raw, 0);
if (msgB < 0) {
return ""; //$NON-NLS-1$
}
return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length);
}
/**
* Parse the commit message and return the first "line" of it.
* <p>
* The first line is everything up to the first pair of LFs. This is the
* "oneline" format, suitable for output in a single line display.
* <p>
* This method parses and returns the message portion of the commit buffer,
* after taking the commit's character set into account and decoding the
* buffer using that character set. This method is a fairly expensive
* operation and produces a new string on each invocation.
*
* @return decoded commit message as a string. Never null. The returned
* string does not contain any LFs, even if the first paragraph
* spanned multiple lines. Embedded LFs are converted to spaces.
*/
public final String getShortMessage() {
byte[] raw = buffer;
int msgB = RawParseUtils.commitMessage(raw, 0);
if (msgB < 0) {
return ""; //$NON-NLS-1$
}
int msgE = RawParseUtils.endOfParagraph(raw, msgB);
String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE);
if (hasLF(raw, msgB, msgE)) {
str = StringUtils.replaceLineBreaksWithSpace(str);
}
return str;
}
static boolean hasLF(byte[] r, int b, int e) {
while (b < e)
if (r[b++] == '\n')
return true;
return false;
}
/**
* Determine the encoding of the commit message buffer.
* <p>
* Locates the "encoding" header (if present) and returns its value. Due to
* corruption in the wild this may be an invalid encoding name that is not
* recognized by any character encoding library.
* <p>
* If no encoding header is present, null.
*
* @return the preferred encoding of {@link #getRawBuffer()}; or null.
* @since 4.2
*/
@Nullable
public final String getEncodingName() {
return RawParseUtils.parseEncodingName(buffer);
}
/**
* Determine the encoding of the commit message buffer.
* <p>
* Locates the "encoding" header (if present) and then returns the proper
* character set to apply to this buffer to evaluate its contents as
* character data.
* <p>
* If no encoding header is present {@code UTF-8} is assumed.
*
* @return the preferred encoding of {@link #getRawBuffer()}.
* @throws IllegalCharsetNameException
* if the character set requested by the encoding header is
* malformed and unsupportable.
* @throws UnsupportedCharsetException
* if the JRE does not support the character set requested by
* the encoding header.
*/
public final Charset getEncoding() {
return RawParseUtils.parseEncoding(buffer);
}
private Charset guessEncoding() {
try {
return getEncoding();
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
return UTF_8;
}
}
/**
* Parse the footer lines (e.g. "Signed-off-by") for machine processing.
* <p>
* This method splits all of the footer lines out of the last paragraph of
* the commit message, providing each line as a key-value pair, ordered by
* the order of the line's appearance in the commit message itself.
* <p>
* A footer line's key must match the pattern {@code ^[A-Za-z0-9-]+:}, while
* the value is free-form, but must not contain an LF. Very common keys seen
* in the wild are:
* <ul>
* <li>{@code Signed-off-by} (agrees to Developer Certificate of Origin)
* <li>{@code Acked-by} (thinks change looks sane in context)
* <li>{@code Reported-by} (originally found the issue this change fixes)
* <li>{@code Tested-by} (validated change fixes the issue for them)
* <li>{@code CC}, {@code Cc} (copy on all email related to this change)
* <li>{@code Bug} (link to project's bug tracking system)
* </ul>
*
* @return ordered list of footer lines; empty list if no footers found.
*/
public final List<FooterLine> getFooterLines() {
final byte[] raw = buffer;
int ptr = raw.length - 1;
while (raw[ptr] == '\n') // trim any trailing LFs, not interesting
ptr--;
final int msgB = RawParseUtils.commitMessage(raw, 0);
final ArrayList<FooterLine> r = new ArrayList<>(4);
final Charset enc = guessEncoding();
for (;;) {
ptr = RawParseUtils.prevLF(raw, ptr);
if (ptr <= msgB)
break; // Don't parse commit headers as footer lines.
final int keyStart = ptr + 2;
if (raw[keyStart] == '\n')
break; // Stop at first paragraph break, no footers above it.
final int keyEnd = RawParseUtils.endOfFooterLineKey(raw, keyStart);
if (keyEnd < 0)
continue; // Not a well formed footer line, skip it.
// Skip over the ': *' at the end of the key before the value.
//
int valStart = keyEnd + 1;
while (valStart < raw.length && raw[valStart] == ' ')
valStart++;
// Value ends at the LF, and does not include it.
//
int valEnd = RawParseUtils.nextLF(raw, valStart);
if (raw[valEnd - 1] == '\n')
valEnd--;
r.add(new FooterLine(raw, enc, keyStart, keyEnd, valStart, valEnd));
}
Collections.reverse(r);
return r;
}
/**
* Get the values of all footer lines with the given key.
*
* @param keyName
* footer key to find values of, case insensitive.
* @return values of footers with key of {@code keyName}, ordered by their
* order of appearance. Duplicates may be returned if the same
* footer appeared more than once. Empty list if no footers appear
* with the specified key, or there are no footers at all.
* @see #getFooterLines()
*/
public final List<String> getFooterLines(String keyName) {
return getFooterLines(new FooterKey(keyName));
}
/**
* Get the values of all footer lines with the given key.
*
* @param keyName
* footer key to find values of, case insensitive.
* @return values of footers with key of {@code keyName}, ordered by their
* order of appearance. Duplicates may be returned if the same
* footer appeared more than once. Empty list if no footers appear
* with the specified key, or there are no footers at all.
* @see #getFooterLines()
*/
public final List<String> getFooterLines(FooterKey keyName) {
final List<FooterLine> src = getFooterLines();
if (src.isEmpty())
return Collections.emptyList();
final ArrayList<String> r = new ArrayList<>(src.size());
for (FooterLine f : src) {
if (f.matches(keyName))
r.add(f.getValue());
}
return r;
}
/**
* Reset this commit to allow another RevWalk with the same instances.
* <p>
* Subclasses <b>must</b> call <code>super.reset()</code> to ensure the
* basic information can be correctly cleared out.
*/
public void reset() {
inDegree = 0;
}
/**
* Discard the message buffer to reduce memory usage.
* <p>
* After discarding the memory usage of the {@code RevCommit} is reduced to
* only the {@link #getTree()} and {@link #getParents()} pointers and the
* time in {@link #getCommitTime()}. Accessing other properties such as
* {@link #getAuthorIdent()}, {@link #getCommitterIdent()} or either message
* function requires reloading the buffer by invoking
* {@link org.eclipse.jgit.revwalk.RevWalk#parseBody(RevObject)}.
*
* @since 4.0
*/
public final void disposeBody() {
buffer = null;
}
/** {@inheritDoc} */
@Override
public String toString() {
final StringBuilder s = new StringBuilder();
s.append(Constants.typeString(getType()));
s.append(' ');
s.append(name());
s.append(' ');
s.append(commitTime);
s.append(' ');
appendCoreFlags(s);
return s.toString();
}
}