Skip to content

Commit

Permalink
Merge pull request #19 from vintmd/crc32c
Browse files Browse the repository at this point in the history
feature: 支持 crc32c 校验
  • Loading branch information
yuyang733 authored Nov 6, 2020
2 parents c5006e6 + 966f447 commit 67c9a6c
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<hadoop.version>3.1.0</hadoop.version>
<cos_api.version>5.6.29</cos_api.version>
<cos_api.version>5.6.31</cos_api.version>
<google.guava.version>24.1.1-jre</google.guava.version>
<commons_lang3.version>3.1</commons_lang3.version>
<junit.version>4.8</junit.version>
Expand Down
64 changes: 64 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CRC32CCheckSum.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.apache.hadoop.fs;

import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.StringUtils;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.math.BigInteger;

/**
* An etag as a checksum.
* Consider these suitable for checking if an object has changed, but
* not suitable for comparing two different objects for equivalence,
* especially between hadoop compatible filesystem.
*/
public class CRC32CCheckSum extends FileChecksum {
private static final String ALGORITHM_NAME = "COMPOSITE-CRC32C";

private int crc32c = 0;

public CRC32CCheckSum() {
}


public CRC32CCheckSum(String crc32cecma) {
try {
BigInteger bigInteger = new BigInteger(crc32cecma);
this.crc32c = bigInteger.intValue();
} catch (NumberFormatException e) {
this.crc32c = 0;
}
}

@Override
public String getAlgorithmName() {
return CRC32CCheckSum.ALGORITHM_NAME;
}

@Override
public int getLength() {
return Integer.SIZE / Byte.SIZE;
}

@Override
public byte[] getBytes() {
return CrcUtil.intToBytes(crc32c);
}

@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(this.crc32c);
}

@Override
public void readFields(DataInput dataInput) throws IOException {
this.crc32c = dataInput.readInt();
}

@Override
public String toString() {
return getAlgorithmName() + ":" + String.format("0x%08x", crc32c);
}
}
7 changes: 7 additions & 0 deletions src/main/java/org/apache/hadoop/fs/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ private Constants() {
// Suffix for local cache file name
public static final String BLOCK_TMP_FILE_SUFFIX = "_local_block_cache";

// Crc32c server response header key
public static final String CRC32C_RESP_HEADER = "x-cos-hash-crc32c";
// Crc32c agent request header key
public static final String CRC32C_REQ_HEADER = "x-cos-crc32c-flag";
// Crc32c agent request header value
public static final String CRC32C_REQ_HEADER_VAL = "cosn";

// Maximum number of blocks uploaded in trunks.
public static final int MAX_PART_NUM = 10000;
// The maximum size of a single block.
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CosFileSystem.java
Original file line number Diff line number Diff line change
Expand Up @@ -820,13 +820,27 @@ public FileChecksum getFileChecksum(Path f, long length) throws IOException {
Preconditions.checkArgument(length >= 0);
LOG.debug("Call the checksum for the path: {}.", f);

// The order of each file, must support both crc at same time, how to tell the difference crc request?
if (this.getConf().getBoolean(CosNConfigKeys.CRC64_CHECKSUM_ENABLED,
CosNConfigKeys.DEFAULT_CRC64_CHECKSUM_ENABLED)) {
Path absolutePath = makeAbsolute(f);
String key = pathToKey(absolutePath);
FileMetadata fileMetadata = this.store.retrieveMetadata(key);
if (null == fileMetadata) {
throw new FileNotFoundException("File or directory doesn't exist: " + f);
}
String crc64ecm = fileMetadata.getCrc64ecm();
return crc64ecm != null ? new CRC64Checksum(crc64ecm) : super.getFileChecksum(f, length);
} else if (this.getConf().getBoolean(CosNConfigKeys.CRC32C_CHECKSUM_ENABLED,
CosNConfigKeys.DEFAULT_CRC32C_CHECKSUM_ENABLED)) {
Path absolutePath = makeAbsolute(f);
String key = pathToKey(absolutePath);
FileMetadata fileMetadata = this.store.retrieveMetadata(key);
if (null == fileMetadata) {
throw new FileNotFoundException("File or directory doesn't exist: " + f);
}
String crc32cm = fileMetadata.getCrc32cm();
return crc32cm != null ? new CRC32CCheckSum(crc32cm) : super.getFileChecksum(f, length);
} else {
// disabled
return super.getFileChecksum(f, length);
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CosNConfigKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,12 @@ public class CosNConfigKeys extends CommonConfigurationKeys {
public static final int DEFAULT_TRAFFIC_LIMIT = -1;

// checksum
// crc64
public static final String CRC64_CHECKSUM_ENABLED = "fs.cosn.crc64.checksum.enabled";
public static final boolean DEFAULT_CRC64_CHECKSUM_ENABLED = false;
// crc32c
public static final String CRC32C_CHECKSUM_ENABLED = "fs.cosn.crc32c.checksum.enabled";
public static final boolean DEFAULT_CRC32C_CHECKSUM_ENABLED = false;

public static final String HTTP_PROXY_IP = "fs.cosn.http.proxy.ip";
public static final String HTTP_PROXY_PORT = "fs.cosn.http.proxy.port";
Expand Down
59 changes: 52 additions & 7 deletions src/main/java/org/apache/hadoop/fs/CosNativeFileSystemStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.qcloud.cos.utils.Jackson;
import com.qcloud.cos.utils.StringUtils;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.math3.analysis.function.Constant;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -41,6 +42,7 @@ public class CosNativeFileSystemStore implements NativeFileSystemStore {
private StorageClass storageClass;
private int maxRetryTimes;
private int trafficLimit;
private boolean crc32cEnabled;
private CosEncryptionSecrets encryptionSecrets;
private CustomerDomainEndpointResolver customerDomainEndpointResolver;

Expand Down Expand Up @@ -90,6 +92,9 @@ private void initCOSClient(URI uri, Configuration conf) throws IOException {
config.setHttpProtocol(HttpProtocol.https);
}

this.crc32cEnabled = conf.getBoolean(CosNConfigKeys.CRC32C_CHECKSUM_ENABLED,
CosNConfigKeys.DEFAULT_CRC32C_CHECKSUM_ENABLED);

// Proxy settings
String httpProxyIp = conf.getTrimmed(CosNConfigKeys.HTTP_PROXY_IP);
int httpProxyPort = conf.getInt(CosNConfigKeys.HTTP_PROXY_PORT, CosNConfigKeys.DEFAULT_HTTP_PROXY_PORT);
Expand Down Expand Up @@ -200,6 +205,9 @@ private void storeFileWithRetry(String key, InputStream inputStream,
objectMetadata.setContentMD5(Base64.encodeAsString(md5Hash));
}
objectMetadata.setContentLength(length);
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

PutObjectRequest putObjectRequest =
new PutObjectRequest(bucketName, key, inputStream,
Expand Down Expand Up @@ -265,6 +273,10 @@ public void storeEmptyFile(String key) throws IOException {

ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(0);
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

InputStream input = new ByteArrayInputStream(new byte[0]);
PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName,
key, input, objectMetadata);
Expand Down Expand Up @@ -310,20 +322,26 @@ public PartETag uploadPart(
String key, String uploadId, int partNum, long partSize, byte[] md5Hash) throws IOException {
LOG.debug("Upload the part to the cos key [{}]. upload id: {}, part number: {}, part size: {}",
key, uploadId, partNum, partSize);
ObjectMetadata objectMetadata = new ObjectMetadata();
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

UploadPartRequest uploadPartRequest = new UploadPartRequest();
uploadPartRequest.setBucketName(this.bucketName);
uploadPartRequest.setUploadId(uploadId);
uploadPartRequest.setInputStream(inputStream);
uploadPartRequest.setPartNumber(partNum);
uploadPartRequest.setPartSize(partSize);
uploadPartRequest.setObjectMetadata(objectMetadata);
if (null != md5Hash) {
uploadPartRequest.setMd5Digest(Base64.encodeAsString(md5Hash));
}
uploadPartRequest.setKey(key);
if (this.trafficLimit >= 0) {
uploadPartRequest.setTrafficLimit(this.trafficLimit);
}
this.setEncryptionMetadata(uploadPartRequest, new ObjectMetadata());
this.setEncryptionMetadata(uploadPartRequest, objectMetadata);

try {
UploadPartResult uploadPartResult =
Expand Down Expand Up @@ -360,12 +378,18 @@ public String getUploadId(String key) throws IOException {
return "";
}

ObjectMetadata objectMetadata = new ObjectMetadata();
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

InitiateMultipartUploadRequest initiateMultipartUploadRequest =
new InitiateMultipartUploadRequest(bucketName, key);
if (null != this.storageClass) {
initiateMultipartUploadRequest.setStorageClass(this.storageClass);
}
this.setEncryptionMetadata(initiateMultipartUploadRequest, new ObjectMetadata());
initiateMultipartUploadRequest.setObjectMetadata(objectMetadata);
this.setEncryptionMetadata(initiateMultipartUploadRequest, objectMetadata);
try {
InitiateMultipartUploadResult initiateMultipartUploadResult =
(InitiateMultipartUploadResult) this.callCOSClientWithRetry(initiateMultipartUploadRequest);
Expand All @@ -389,9 +413,14 @@ public int compare(PartETag o1, PartETag o2) {
}
});
try {
ObjectMetadata objectMetadata = new ObjectMetadata();
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}
CompleteMultipartUploadRequest completeMultipartUploadRequest =
new CompleteMultipartUploadRequest(bucketName, key, uploadId,
partETagList);
completeMultipartUploadRequest.setObjectMetadata(objectMetadata);
return (CompleteMultipartUploadResult) this.callCOSClientWithRetry(completeMultipartUploadRequest);
} catch (Exception e) {
String errMsg = String.format("Complete the multipart upload failed. cos key: %s, upload id: %s, " +
Expand Down Expand Up @@ -419,6 +448,7 @@ private FileMetadata QueryObjectMetadata(String key) throws IOException {

String ETag = objectMetadata.getETag();
String crc64ecm = objectMetadata.getCrc64Ecma();
String crc32cm = (String)objectMetadata.getRawMetadataValue(Constants.CRC32C_RESP_HEADER);
String versionId = objectMetadata.getVersionId();
Map<String, byte[]> userMetadata = null;
if (objectMetadata.getUserMetadata() != null) {
Expand All @@ -443,7 +473,7 @@ private FileMetadata QueryObjectMetadata(String key) throws IOException {
}
FileMetadata fileMetadata =
new FileMetadata(key, fileSize, mtime,
!key.endsWith(PATH_DELIMITER), ETag, crc64ecm, versionId, objectMetadata.getStorageClass(),
!key.endsWith(PATH_DELIMITER), ETag, crc64ecm, crc32cm, versionId, objectMetadata.getStorageClass(),
userMetadata);
LOG.debug("Retrieve the file metadata. cos key: {}, ETag:{}, length:{}, crc64ecm: {}.", key,
objectMetadata.getETag(), objectMetadata.getContentLength(), objectMetadata.getCrc64Ecma());
Expand Down Expand Up @@ -573,6 +603,10 @@ private void storeAttribute(String key, String attribute, byte[] value, boolean
objectMetadata.setUserMetadata(userMetadata);

// 构造原地copy请求来设置用户自定义属性
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucketName, key, bucketName, key);
if (null != objectMetadata.getStorageClass()) {
copyObjectRequest.setStorageClass(objectMetadata.getStorageClass());
Expand Down Expand Up @@ -776,10 +810,10 @@ private PartialListing list(String prefix, String delimiter,
long fileLen = cosObjectSummary.getSize();
String fileEtag = cosObjectSummary.getETag();
if (cosObjectSummary.getKey().endsWith(PATH_DELIMITER) && cosObjectSummary.getSize() == 0) {
fileMetadataArray.add(new FileMetadata(filePath, fileLen, mtime, false, fileEtag, null, null, cosObjectSummary.getStorageClass()));
fileMetadataArray.add(new FileMetadata(filePath, fileLen, mtime, false, fileEtag, null, null, null, cosObjectSummary.getStorageClass()));
} else {
fileMetadataArray.add(new FileMetadata(filePath, fileLen, mtime,
true, fileEtag, null, null, cosObjectSummary.getStorageClass()));
true, fileEtag, null, null, null, cosObjectSummary.getStorageClass()));
}
}
List<String> commonPrefixes = objectListing.getCommonPrefixes();
Expand Down Expand Up @@ -826,6 +860,10 @@ public void delete(String key) throws IOException {
public void rename(String srcKey, String dstKey) throws IOException {
LOG.debug("Rename the source cos key [{}] to the dest cos key [{}].", srcKey, dstKey);
try {
ObjectMetadata objectMetadata = new ObjectMetadata();
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}
CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(bucketName, srcKey, bucketName,
dstKey);
Expand All @@ -834,7 +872,8 @@ public void rename(String srcKey, String dstKey) throws IOException {
if (null != sourceFileMetadata.getStorageClass()) {
copyObjectRequest.setStorageClass(sourceFileMetadata.getStorageClass());
}
this.setEncryptionMetadata(copyObjectRequest, new ObjectMetadata());
copyObjectRequest.setNewObjectMetadata(objectMetadata);
this.setEncryptionMetadata(copyObjectRequest, objectMetadata);

if (null != this.customerDomainEndpointResolver) {
if (null != this.customerDomainEndpointResolver.getEndpoint()) {
Expand All @@ -857,13 +896,19 @@ public void rename(String srcKey, String dstKey) throws IOException {
@Override
public void copy(String srcKey, String dstKey) throws IOException {
try {
ObjectMetadata objectMetadata = new ObjectMetadata();
if (crc32cEnabled) {
objectMetadata.setHeader(Constants.CRC32C_REQ_HEADER, Constants.CRC32C_REQ_HEADER_VAL);
}

CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(bucketName, srcKey, bucketName, dstKey);
FileMetadata sourceFileMetadata = this.retrieveMetadata(srcKey);
if (null != sourceFileMetadata.getStorageClass()) {
copyObjectRequest.setStorageClass(sourceFileMetadata.getStorageClass());
}
this.setEncryptionMetadata(copyObjectRequest, new ObjectMetadata());
copyObjectRequest.setNewObjectMetadata(objectMetadata);
this.setEncryptionMetadata(copyObjectRequest, objectMetadata);
if (null != this.customerDomainEndpointResolver) {
if (null != this.customerDomainEndpointResolver.getEndpoint()) {
copyObjectRequest.setSourceEndpointBuilder(this.customerDomainEndpointResolver);
Expand Down
45 changes: 45 additions & 0 deletions src/main/java/org/apache/hadoop/fs/CrcUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package org.apache.hadoop.fs;

import java.io.IOException;

public class CrcUtil {
private CrcUtil() {
}

/**
* @return 4-byte array holding the big-endian representation of
* {@code value}.
*/
public static byte[] intToBytes(int value) {
byte[] buf = new byte[4];
try {
writeInt(buf, 0, value);
} catch (IOException ioe) {
// Since this should only be able to occur from code bugs within this
// class rather than user input, we throw as a RuntimeException
// rather than requiring this method to declare throwing IOException
// for something the caller can't control.
throw new RuntimeException(ioe);
}
return buf;
}

/**
* Writes big-endian representation of {@code value} into {@code buf}
* starting at {@code offset}. buf.length must be greater than or
* equal to offset + 4.
*/
public static void writeInt(byte[] buf, int offset, int value)
throws IOException {
if (offset + 4 > buf.length) {
throw new IOException(String.format(
"writeInt out of bounds: buf.length=%d, offset=%d",
buf.length, offset));
}
buf[offset + 0] = (byte) ((value >>> 24) & 0xff);
buf[offset + 1] = (byte) ((value >>> 16) & 0xff);
buf[offset + 2] = (byte) ((value >>> 8) & 0xff);
buf[offset + 3] = (byte) (value & 0xff);
}

}
Loading

0 comments on commit 67c9a6c

Please sign in to comment.