Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use XXH64 instead of MD5 #108

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "xxHash"]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're changing the build instructions here. It's not possible to just do a normal clone of the repo and fire a make call anymore.

Looking at the current master, It's general not a good advice having no build instructions at all. But you're introducing a stupid trap door here, which warrants a comment a.k.a. build documentation.

While reading the XXH64 website, it looks pretty promising.

path = xxHash
url = https://github.com/Cyan4973/xxHash
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ INSTALL_DATA = $(INSTALL) -c -m 0644
#
#ADDITIONAL_OBJECTS = getopt.o

OBJECT_FILES = fdupes.o md5/md5.o $(ADDITIONAL_OBJECTS)
OBJECT_FILES = fdupes.o xxHash/xxhash.o $(ADDITIONAL_OBJECTS)

#####################################################################
# no need to modify anything beyond this point #
Expand Down
62 changes: 22 additions & 40 deletions fdupes.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <errno.h>
#include <libgen.h>

#include "md5/md5.h"
#include "xxHash/xxhash.h"

#define ISFLAG(a,b) ((a & b) == b)
#define SETFLAG(a,b) (a |= b)
Expand Down Expand Up @@ -73,16 +73,14 @@ ordertype_t ordertype = ORDER_MTIME;

#define PARTIAL_MD5_SIZE 4096

#define MD5_DIGEST_LENGTH 16

/*

TODO: Partial sums (for working with very large files).

typedef struct _signature
{
md5_state_t state;
md5_byte_t digest[16];
XXH64_hash_t digest[16];
} signature_t;

typedef struct _signatures
Expand All @@ -96,8 +94,8 @@ typedef struct _signatures
typedef struct _file {
char *d_name;
off_t size;
md5_byte_t *crcpartial;
md5_byte_t *crcsignature;
XXH64_hash_t *crcpartial;
XXH64_hash_t *crcsignature;
dev_t device;
ino_t inode;
time_t sorttime;
Expand Down Expand Up @@ -358,18 +356,15 @@ int grokdir(char *dir, file_t **filelistp)
return filecount;
}

md5_byte_t *getcrcsignatureuntil(char *filename, off_t max_read)
XXH64_hash_t *getcrcsignatureuntil(char *filename, off_t max_read)
{
off_t fsize;
off_t toread;
md5_state_t state;
static md5_byte_t digest[MD5_DIGEST_LENGTH];
static md5_byte_t chunk[CHUNK_SIZE];
XXH64_hash_t state = 0UL;
static XXH64_hash_t digest;
static XXH64_hash_t chunk[CHUNK_SIZE];
FILE *file;

md5_init(&state);


fsize = filesize(filename);

if (max_read != 0 && fsize > max_read)
Expand All @@ -388,48 +383,35 @@ md5_byte_t *getcrcsignatureuntil(char *filename, off_t max_read)
fclose(file);
return NULL;
}
md5_append(&state, chunk, toread);
state = XXH64(chunk, toread, state);
fsize -= toread;
}

md5_finish(&state, digest);
digest = state;

fclose(file);

return digest;
return &digest;
}

md5_byte_t *getcrcsignature(char *filename)
XXH64_hash_t *getcrcsignature(char *filename)
{
return getcrcsignatureuntil(filename, 0);
}

md5_byte_t *getcrcpartialsignature(char *filename)
XXH64_hash_t *getcrcpartialsignature(char *filename)
{
return getcrcsignatureuntil(filename, PARTIAL_MD5_SIZE);
}

int md5cmp(const md5_byte_t *a, const md5_byte_t *b)
int md5cmp(const XXH64_hash_t *a, const XXH64_hash_t *b)
{
int x;

for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
{
if (a[x] < b[x])
return -1;
else if (a[x] > b[x])
return 1;
}

return 0;
return *a - *b;
}

void md5copy(md5_byte_t *to, const md5_byte_t *from)
void md5copy(XXH64_hash_t *to, const XXH64_hash_t *from)
{
int x;

for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
to[x] = from[x];
*to = *from;
}

void purgetree(filetree_t *checktree)
Expand Down Expand Up @@ -520,7 +502,7 @@ int is_hardlink(filetree_t *checktree, file_t *file)
file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
{
int cmpresult;
md5_byte_t *crcsignature;
XXH64_hash_t *crcsignature;
off_t fsize;

/* If device and inode fields are equal one of the files is a
Expand Down Expand Up @@ -550,7 +532,7 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
return NULL;
}

checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
checktree->file->crcpartial = (XXH64_hash_t*) malloc(sizeof(XXH64_hash_t));
if (checktree->file->crcpartial == NULL) {
errormsg("out of memory\n");
exit(1);
Expand All @@ -565,7 +547,7 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
return NULL;
}

file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
file->crcpartial = (XXH64_hash_t*) malloc(sizeof(XXH64_hash_t));
if (file->crcpartial == NULL) {
errormsg("out of memory\n");
exit(1);
Expand All @@ -581,7 +563,7 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
crcsignature = getcrcsignature(checktree->file->d_name);
if (crcsignature == NULL) return NULL;

checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
checktree->file->crcsignature = (XXH64_hash_t*) malloc(sizeof(XXH64_hash_t));
if (checktree->file->crcsignature == NULL) {
errormsg("out of memory\n");
exit(1);
Expand All @@ -593,7 +575,7 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
crcsignature = getcrcsignature(file->d_name);
if (crcsignature == NULL) return NULL;

file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
file->crcsignature = (XXH64_hash_t*) malloc(sizeof(XXH64_hash_t));
if (file->crcsignature == NULL) {
errormsg("out of memory\n");
exit(1);
Expand Down
4 changes: 0 additions & 4 deletions md5/README

This file was deleted.

Loading