Skip to content

Commit

Permalink
Fix SHA1 digest calculation for large files
Browse files Browse the repository at this point in the history
 * Read in segments to avoid using all of memory.
  • Loading branch information
jtniehof committed May 20, 2021
1 parent 049e99a commit ff072db
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions dbprocessing/Diskfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,13 @@ def calcDigest(infile):
m = hashlib.sha1()
try:
with open(infile, 'rb') as f:
m.update(f.read())
for d in iter(lambda: f.read(1048576), b''):
m.update(d)
except IOError:
raise DigestError("File not found: {0}".format(infile))

DBlogging.dblogger.debug("digest calculated: {0}, file: {1} ".format(m.hexdigest(), infile))

return m.hexdigest()
res = m.hexdigest()
DBlogging.dblogger.debug("digest calculated: {0}, file: {1} ".format(
res, infile))

return res

0 comments on commit ff072db

Please sign in to comment.