Skip to content

Commit

Permalink
Make mapping for 856 less strict #2070
Browse files Browse the repository at this point in the history
Due to missing indicator 1 = 4 some links were not created. Since 856 with an empty indicator which says missing information could also link to an http-page I change the 8564? -> 856??
  • Loading branch information
TobiasNx committed Sep 11, 2024
1 parent 62b82df commit 7d85320
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 11 deletions.
6 changes: 3 additions & 3 deletions src/main/resources/alma/fix/identifiers.fix
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ end
# 1. Indicator: 4 = HTTP
set_array("@urnLinks")

do list(path:"8564?", "var":"$i")
do list(path:"856??", "var":"$i")
if all_match("$i.u", "^http.*(urn=|\\.(org|de)/)urn:.+$") # This should ignore repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092
copy_field("$i.u", "urn[].$append")
copy_field("$i.u", "@urnLinks.$append")
Expand Down Expand Up @@ -100,12 +100,12 @@ end
# Sometimes dois are not set in 024 then we could pick up the missing from 856.
# 856 - Electronic Location and Access (R) - Subfield: $u (R) $3 (NR)
# 1. Indicator: 4 = HTTP
do list(path:"8564?", "var":"$i")
do list(path:"856??", "var":"$i")
if all_match("$i.u", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*") # Volltext
copy_field("$i.u", "doi[].$append")
replace_all("doi[].$last", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*", "$1")
end
end
replace_all("doi[].*", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*", "$1")
uniq("doi[]")

# 035 - System Control Number (R) - Subfield: $a (NR)
Expand Down
16 changes: 8 additions & 8 deletions src/main/resources/alma/fix/relatedRessourcesAndLinks.fix
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ do list(path: "rpbId", "var": "$i")
end

# DBIS
do list(path: "8564?", "var":"$i")
do list(path: "856??", "var":"$i")
if exists("$i.u")
if all_match("$i.x", ".*DBIS.*")
copy_field("$i.u", "sameAs[].$append.id")
Expand Down Expand Up @@ -254,7 +254,7 @@ replace_all("secondaryForm[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/reso

set_array("tableOfContents[]")

do list(path: "8564?", "var":"$i")
do list(path: "856??", "var":"$i")
if all_match("$i.3", "^[Ii][Nn][Hh][aA][lL][tT][sS][vV].*") # Inhaltsverzeichnis
copy_field("$i.3", "tableOfContents[].$append.label")
copy_field("$i.u", "tableOfContents[].$last.id")
Expand All @@ -263,7 +263,7 @@ end

set_array("description[]")

do list(path: "8564?", "var":"$i")
do list(path: "856??", "var":"$i")
if all_match("$i.3", "^[Ii][Nn][Hh][aA][lL][tT][sS][tT].*") # Inhaltstext
copy_field("$i.3", "description[].$append.label")
copy_field("$i.u", "description[].$last.id")
Expand All @@ -272,7 +272,7 @@ end

set_array("seeAlso[]")

do list(path: "8564?", "var":"$i")
do list(path: "856??", "var":"$i")
if all_match("$i.3", "^[zZ][uU][sS].*") # Zusätzliche Angaben
copy_field("$i.3", "seeAlso[].$append.label")
copy_field("$i.u", "seeAlso[].$last.id")
Expand All @@ -281,7 +281,7 @@ end

set_array("fulltextOnline[]")

do list(path: "8564?", "var":"$i")
do list(path: "856??", "var":"$i")
if exists("$i.u")
unless any_match("$i.u",".*(doi.org|urn=urn:|\\.(org|de)/urn:).*") # This should not skip repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092
if all_equal("$i.z", "kostenfrei") # kostenfrei, added Digitalisierung not only Verlag or Agentur as filter
Expand Down Expand Up @@ -439,7 +439,7 @@ end

# 856 - Electronic Location and Access (R) - Subfield: $x - Nonpublic note (R)
# TODO: Check if src/test/resources/alma-fix/(CKB)5280000000199164.xml is also an EZB titel even when it has no 865.
do list(path:"8564?", "var":"$i")
do list(path:"856??", "var":"$i")
if any_equal("$i.x","EZB") # can test x and x.*
add_field("inCollection[].$append.id", "http://lobid.org/resources/HT016356466#!")
add_field("inCollection[].$last.label", "Elektronische Zeitschriftenbibliothek (EZB)")
Expand All @@ -452,7 +452,7 @@ end

# edoweb

do list(path:"8564?", "var":"$i")
do list(path:"856??", "var":"$i")
if any_match("$i.u","^.*edoweb.*") # can test x and x.*
add_field("inCollection[].$append.id", "http://lobid.org/resources/HT016925914#!")
add_field("inCollection[].$last.label", "Edoweb Rheinland-Pfalz")
Expand All @@ -463,7 +463,7 @@ end


# TODO: AlephMorph checked for ellinet in "078r1.a" but publisso is also stated in the Link URI is that enough?
do list(path:"8564?", "var":"$i")
do list(path:"856??", "var":"$i")
if any_match("$i.u","^.*publisso.*") # can test x and x.*
add_field("inCollection[].$append.id", "http://repository.publisso.de")
add_field("inCollection[].$last.label", "Fachrepositorium Lebenswissenschaften")
Expand Down
16 changes: 16 additions & 0 deletions src/test/resources/alma-fix/990177418660206441.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,22 @@
"id" : "http://worldcat.org/oclc/838434577",
"label" : "OCLC Ressource"
} ],
"fulltextOnline" : [ {
"label" : "Volltext",
"id" : "http://www.gbv.de/dms/belser/aszese/74817-1.pdf"
}, {
"label" : "Volltext",
"id" : "http://www.gbv.de/dms/belser/aszese/74817-2.pdf"
}, {
"label" : "Volltext",
"id" : "http://www.gbv.de/dms/belser/aszese/74817-3.pdf"
}, {
"label" : "Volltext",
"id" : "http://www.gbv.de/dms/belser/aszese/74817-4.pdf"
}, {
"label" : "Volltext",
"id" : "http://www.gbv.de/dms/belser/aszese/74817-5.pdf"
} ],
"inCollection" : [ {
"id" : "https://nrw.digibib.net/search/hbzvk/",
"label" : "DigiBib hbz Verbundkatalog",
Expand Down
8 changes: 8 additions & 0 deletions src/test/resources/alma-fix/990184766040206441.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"almaMmsId" : "990184766040206441",
"hbzId" : "HT016770284",
"deprecatedUri" : "http://lobid.org/resources/HT016770284#!",
"doi" : [ "10.48644/mpirg_sisis_125001" ],
"oclcNumber" : [ "179716832" ],
"publication" : [ {
"startDate" : "1883",
Expand Down Expand Up @@ -73,12 +74,19 @@
}, {
"id" : "http://worldcat.org/oclc/179716832",
"label" : "OCLC Ressource"
}, {
"id" : "https://doi.org/10.48644/mpirg_sisis_125001",
"label" : "mpirg_sisis_125001"
} ],
"primaryForm" : [ {
"id" : "http://lobid.org/resources/HT000522767#!",
"label" : "Elektronische Reproduktion von HT000522767",
"note" : [ "Elektronische Reproduktion von" ]
} ],
"fulltextOnline" : [ {
"id" : "https://doi.org/10.48644/mpirg_sisis_125001",
"label" : "DOI-Link"
} ],
"inCollection" : [ {
"id" : "https://nrw.digibib.net/search/hbzvk/",
"label" : "DigiBib hbz Verbundkatalog",
Expand Down

0 comments on commit 7d85320

Please sign in to comment.