-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathB-search-files.sh
103 lines (89 loc) · 2.94 KB
/
B-search-files.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#/bin/bash
#
# select a series of filetypes from some specified directories
# and copy those that have matchin contents to one output
# folder - prefixing them with unique number, based on the
# original file's inode, and logging this number and the original
# file path to a logfile. The logfile is named by the selection
# script, and numbered with the PID of the process that created it.
# "include" the definition of the search strings $ss
#
source B-search-terms.inc
# "include" function for searching inside PDFs with OCR
#
source B-PDF-scan.inc
# echo $ss
# IFS business, because we have filenames
# with spaces in them...
OLDIFS=$IFS
IFS=$'\n'
# Get to the source directory
cd
cd ClientA
# Some directories to put the results in:...
#
mkdir -p ../ClientA-results/log
mkdir -p ../ClientA-results/files
# and headers for log...
#
echo "Date, Index, Hash, Full path" > ../ClientA-results/log/run-$$.csv
for disk in "USB-1" "USB-2" "USB-3"
do
folder=$disk
echo $folder
# Specific files (ie anything NOT email)
#
files=$(find $folder -type f ! -iname *.pst )
for f in $files
do
# grab the unique inode number, last-modified date and base filename
# and calculate the md5 hash
#
md5hash=$(md5sum $f | cut -f1 -d" ")
index=$(stat $f | grep Inode | awk '{print $4}')
date=$(stat $f | grep "Modify:" |awk '{print $2}')
filename=$(basename $f)
# Images we copy across...
#
echo $(file $f|cut -f2- -d:) | grep -iq image
if [ $? -eq 0 ]
then
# NOTE: where we copy and the log name need changing!
#
echo Image: $filename
cp $f ../ClientA-results/files/$index-$filename
# echo $date "," $index "," \"$f\" >> ../ClientA-results/log/run-$$.csv
echo $date "," $index "," $md5hash "," \"$f\" >> ../ClientA-results/log/run-$$.csv
else
# check if it's a PDF...
#
echo $(file $f|cut -f2- -d:) | grep -iq PDF
if [ $? -eq 0 ]
then
#
# insert a test for matching content later
# but for now we snaffle them all...
#
echo PDF: $filename
cp $f ../ClientA-results/files/$index-$filename
echo $date "," $index "," $md5hash "," \"$f\" >> ../ClientA-results/log/run-$$.csv
# echo $date "," $index "," \"$f\" >>../ClientA-results/log/run-$$.csv
else
# check for the search strings provided...
#
egrep -q -i $ss $f
if [ $? -eq 0 ]
# ...and copy and log if we get a match
then
# NOTE: where we copy and the log name need changing!
#
echo Matching: $filename
cp $f ../ClientA-results/files/$index-$filename
#echo $date "," "$index" "," \"$f\" >> ../ClientA-results/log/run-$$.csv
echo $date "," $index "," $md5hash "," \"$f\" >> ../ClientA-results/log/run-$$.csv
fi
fi
fi
done
done
ISF=$OLDIFS