This repository has been archived by the owner on Nov 4, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
AA_pbcorescript_with_checks
95 lines (86 loc) · 5.21 KB
/
AA_pbcorescript_with_checks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
# a version of the AA_PBCorescript that automatically checks each drive for derivatives and known failed files using stored csv reference files, and isolates them without processing them
# manual data input for metadata creation
echo "Drag and drop the drive to run the operation on"
read drive
echo "Drag and drop the directory where you would like to save your checksum files and instantiation records"
read dir
echo "Drag and drop the CSV file containing the AAPB GUIDs"
read AAPB
echo "Type in the barcode number of the drive, in the format barcode######"
read barcode
echo "Type in the barcode number of the designated LTO tape, in the format barcode######"
read LTO
mkdir "$dir"/PBCoreInstantiations
catfun ()
{ # create PBCore XML document for AAPB asset
printf '%s\n' "MD5 ($i) = $c" >> "$dir"/"$barcode"_MD5_Source.csv
printf '%s\n' "MD5 ($i) = $c" >> /Volumes/dept/MLA/Archives/LTO_Tape_Preservation/MD5/aapb_md5_total.csv
guid=$( cat $AAPB | grep $artguid | cut -d , -f 2 )
mediainfo --Output=PBCore2 "$i" >> "$dir"/mediainfo.xml
driveid=$( pwd | cut -d / -f 3 )
path=$( pwd | cut -c 10-)
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
cat "$dir"/mediainfo.xml | grep '<pbcoreInstantiationDocument\|Generated' >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
printf '\t%s\n' "<instantiationIdentifier source=\"AAPB GUID\">$guid</instantiationIdentifier>" >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
cat "$dir"/mediainfo.xml | grep 'instantiationDate\|instantiationDigital' >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
printf '\t%s\n' "<instantiationLocation>$LTO/$path/$i</instantiationLocation>" >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
cat "$dir"/mediainfo.xml | grep 'instantiationMediaType\|instantiationFileSize\|instantiationTimeStart\|instantiationDuration\|instantiationDataRate\|instantiationTracks' >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
printf '\t%s\n' "<instantiationGenerations source=\"PBCore Controlled Vocabulary\">Master: preservation</instantiationGenerations>" >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
sed -n '/<instantiationEssenceTrack>/,/<\instantiationAnnotation/p' "$dir"/mediainfo.xml >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
sed -i '' '$d' "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
cat "$dir"/mediainfo.xml | grep 'instantiationAnnotation' >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
printf '\t%s\n\t%s\n' "<instantiationAnnotation annotationType=\"Drive\">Preservation copy on hard drive "$driveid", "$barcode"</instantiationAnnotation>" "<instantiationAnnotation annotationType=\"File Name\">$i</instantiationAnnotation>" "<instantiationAnnotation annotationType=\"Artesia GUID\">$artguid</instantiationAnnotation>" "<instantiationAnnotation annotationType=\"MD5 Checksum\">$c</instantiationAnnotation>" >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
printf '%s' "</pbcoreInstantiationDocument>" >> "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml
cat "$dir"/PBCoreInstantiations/pbcore_instantiation_"${artguid}"_"${i%.*}".xml >> pbcore_instantiation_"${artguid}"_"${i%.*}".xml
rm "$dir"/mediainfo.xml
}
# restructure drive for human-readability
cd "$drive"
mkdir files_needing_qc
for d in $(find "$drive" \( ! -regex '.*/\..*' \) -not -path . -type d -depth 1 -not -name *files_needing_qc); do
mv "$d" "$drive"/files_needing_qc
done
mkdir confirmed_good_files
mkdir bad_files
mkdir derivatives
# check for derivatives and failed files before creating metadata
for d in $(find "$drive" \( ! -regex '.*/\..*' \) ! -path . -type d -depth 2); do
cd "$d"
artguid=$(echo "$d" | cut -d / -f 5)
file=$( find . \( ! -regex '.*/\..*' \) ! -path . -type f -not -name *.xml)
i=$(echo "$file" | cut -d '/' -f 2 )
if grep $i "/Volumes/dept/MLA/Archives/LTO_Tape_Preservation/AA_Batch_IDs/derivatives.csv"
then
cd "$drive"
mv "$d" "$drive"/derivatives
elif grep "$i" "/Volumes/dept/MLA/Archives/LTO_Tape_Preservation/AA_Batch_IDs/md5_original_values.csv"; then
c=$(md5 -q "$i")
if grep "$i" "/Volumes/dept/MLA/Archives/LTO_Tape_Preservation/AA_Batch_IDs/md5_original_values.csv" | grep "$c"; then
if grep "$c" /Volumes/dept/MLA/Archives/LTO_Tape_Preservation/MD5/aapb_md5_total.csv; then
cd "$drive"
mv "$d" "$drive"/derivatives
else
cd "$drive"
mv "$d" "$drive"/confirmed_good_files/
cd "$drive"/confirmed_good_files/"$artguid"
catfun
cd "$drive"
fi
else
driveid=$( echo "$drive" | cut -d / -f 3 )
cd "$drive"
mv "$d" "$drive"/bad_files
printf '\n%s' "$driveid,$artguid,$i" >> /Volumes/dept/MLA/Archives/LTO_Tape_Preservation/AA_Batch_IDs/corrupted_files.csv
fi
else
c=$(md5 -q "$i")
if grep "$c" /Volumes/dept/MLA/Archives/LTO_Tape_Preservation/MD5/aapb_md5_total.csv; then
cd "$drive"
mv "$d" "$drive"/derivatives
else
catfun
cd "$drive"
fi
fi
done