BinaryDeltaInputStream.java
/*
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.util.io;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamCorruptedException;
import java.text.MessageFormat;
import org.eclipse.jgit.internal.JGitText;
/**
* An {@link InputStream} that applies a binary delta to a base on the fly.
* <p>
* Delta application to a base needs random access to the base data. The delta
* is expressed as a sequence of copy and insert instructions. A copy
* instruction has the form "COPY fromOffset length" and says "copy length bytes
* from the base, starting at offset fromOffset, to the result". An insert
* instruction has the form "INSERT length" followed by length bytes and says
* "copy the next length bytes from the delta to the result".
* </p>
* <p>
* These instructions are generated using a content-defined chunking algorithm
* (currently C git uses the standard Rabin variant; but there are others that
* could be used) that identifies equal chunks. It is entirely possible that a
* later copy instruction has a fromOffset that is before the fromOffset of an
* earlier copy instruction.
* </p>
* <p>
* This makes it impossible to stream the base.
* </p>
* <p>
* JGit is limited to 2GB maximum size for the base since array indices are
* signed 32bit values.
*
* @since 5.12
*/
public class BinaryDeltaInputStream extends InputStream {
private final byte[] base;
private final InputStream delta;
private long resultLength;
private long toDeliver = -1;
private int fromBase;
private int fromDelta;
private int baseOffset = -1;
/**
* Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
* to {@code base}.
*
* @param base
* data to apply the delta to
* @param delta
* {@link InputStream} delivering the delta to apply
*/
public BinaryDeltaInputStream(byte[] base, InputStream delta) {
this.base = base;
this.delta = delta;
}
@Override
public int read() throws IOException {
int b = readNext();
if (b >= 0) {
toDeliver--;
}
return b;
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
return super.read(b, off, len);
}
private void initialize() throws IOException {
long baseSize = readVarInt(delta);
if (baseSize > Integer.MAX_VALUE || baseSize < 0
|| (int) baseSize != base.length) {
throw new IOException(MessageFormat.format(
JGitText.get().binaryDeltaBaseLengthMismatch,
Integer.valueOf(base.length), Long.valueOf(baseSize)));
}
resultLength = readVarInt(delta);
if (resultLength < 0) {
throw new StreamCorruptedException(
JGitText.get().binaryDeltaInvalidResultLength);
}
toDeliver = resultLength;
baseOffset = 0;
}
private int readNext() throws IOException {
if (baseOffset < 0) {
initialize();
}
if (fromBase > 0) {
fromBase--;
return base[baseOffset++] & 0xFF;
} else if (fromDelta > 0) {
fromDelta--;
return delta.read();
}
int command = delta.read();
if (command < 0) {
return -1;
}
if ((command & 0x80) != 0) {
// Decode offset and length to read from base
long copyOffset = 0;
for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
if ((command & i) != 0) {
copyOffset |= ((long) next(delta)) << shift;
}
}
int copySize = 0;
for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
if ((command & i) != 0) {
copySize |= next(delta) << shift;
}
}
if (copySize == 0) {
copySize = 0x10000;
}
if (copyOffset > base.length - copySize) {
throw new StreamCorruptedException(MessageFormat.format(
JGitText.get().binaryDeltaInvalidOffset,
Long.valueOf(copyOffset), Integer.valueOf(copySize)));
}
baseOffset = (int) copyOffset;
fromBase = copySize;
return readNext();
} else if (command != 0) {
// The next 'command' bytes come from the delta
fromDelta = command - 1;
return delta.read();
} else {
// Zero is reserved
throw new StreamCorruptedException(
JGitText.get().unsupportedCommand0);
}
}
private int next(InputStream in) throws IOException {
int b = in.read();
if (b < 0) {
throw new EOFException();
}
return b;
}
private long readVarInt(InputStream in) throws IOException {
long val = 0;
int shift = 0;
int b;
do {
b = next(in);
val |= ((long) (b & 0x7f)) << shift;
shift += 7;
} while ((b & 0x80) != 0);
return val;
}
/**
* Tells the expected size of the final result.
*
* @return the size
* @throws IOException
* if the size cannot be determined from {@code delta}
*/
public long getExpectedResultSize() throws IOException {
if (baseOffset < 0) {
initialize();
}
return resultLength;
}
/**
* Tells whether the delta has been fully consumed, and the expected number
* of bytes for the combined result have been read from this
* {@link BinaryDeltaInputStream}.
*
* @return whether delta application was successful
*/
public boolean isFullyConsumed() {
try {
return toDeliver == 0 && delta.read() < 0;
} catch (IOException e) {
return toDeliver == 0;
}
}
@Override
public void close() throws IOException {
delta.close();
}
}