From 68014633250e965a66748cad0ef7b7523892c96a Mon Sep 17 00:00:00 2001
From: mshroom <32199029+mshroom@users.noreply.github.com>
Date: Mon, 10 Jun 2024 14:05:35 +0300
Subject: [PATCH] [FINNA-2110] QDC: Add option to set preferred dc.type fields
when indexing format (#158)
---
src/RecordManager/Base/Record/Qdc.php | 32 ++++++++++++++++-
.../RecordManagerTest/Base/Record/QdcTest.php | 35 +++++++++++++++++++
tests/fixtures/Base/record/qdc1.xml | 3 ++
3 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/src/RecordManager/Base/Record/Qdc.php b/src/RecordManager/Base/Record/Qdc.php
index f56e308a7..6a4d00cee 100644
--- a/src/RecordManager/Base/Record/Qdc.php
+++ b/src/RecordManager/Base/Record/Qdc.php
@@ -34,6 +34,8 @@
use RecordManager\Base\Utils\Logger;
use RecordManager\Base\Utils\MetadataUtils;
+use function in_array;
+
/**
* Qdc record class
*
@@ -73,6 +75,13 @@ class Qdc extends AbstractRecord
*/
protected $recordNs = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
+ /**
+ * Type fields which should be excluded when defining format.
+ *
+ * @var array
+ */
+ protected $excludedFormatTypes = [];
+
/**
* Constructor
*
@@ -328,7 +337,28 @@ public function getSeriesNumbering()
*/
public function getFormat()
{
- return $this->doc->type ? trim((string)$this->doc->type) : 'Unknown';
+ $param = $this->getDriverParam('preferredFormatTypes', '');
+ $preferredTypes = $param ? explode(',', $param) : [];
+ $collectedTypes = [];
+ $first = '';
+ foreach ($this->doc->type ?? [] as $node) {
+ if ($value = trim((string)$node)) {
+ $typeAttr = trim((string)($node->attributes()->type ?? '')) ?: 'no_type';
+ if (!in_array($typeAttr, $this->excludedFormatTypes) && !($collectedTypes[$typeAttr] ?? '')) {
+ $collectedTypes[$typeAttr] = $value;
+ $first = $first ?: $typeAttr;
+ }
+ }
+ }
+ if ($collectedTypes) {
+ foreach ($preferredTypes as $pref) {
+ if ($collectedTypes[$pref] ?? '') {
+ return $collectedTypes[$pref];
+ }
+ }
+ return $collectedTypes[$first];
+ }
+ return 'Unknown';
}
/**
diff --git a/tests/RecordManagerTest/Base/Record/QdcTest.php b/tests/RecordManagerTest/Base/Record/QdcTest.php
index 07b2390ae..b8be685e0 100644
--- a/tests/RecordManagerTest/Base/Record/QdcTest.php
+++ b/tests/RecordManagerTest/Base/Record/QdcTest.php
@@ -80,6 +80,9 @@ public function testQdc1()
'2021-06-16T06:31:44Z',
'2021',
'Article',
+ 'okm_type',
+ 'okm_type_2',
+ 'other_type',
'Eeva-Liisa Viskari, Suvi Lehtoranta, Riikka Malila. Urine : The'
. ' potential, value chain and its sustainable management. '
. 'Sanitation Value Chain (2021) 5, 1, pages 10-12. '
@@ -192,4 +195,36 @@ public function testQdc1()
$this->compareArray($expected, $keys, 'getWorkIdentificationData');
}
+
+ /**
+ * Test format
+ *
+ * @return void
+ */
+ public function testFormat()
+ {
+ $expected = [
+ 'okm' => 'okm_type',
+ 'okm,other' => 'okm_type',
+ 'finna,other' => 'other_type',
+ 'finna' => 'Article',
+ ];
+ foreach ($expected as $preferredTypes => $format) {
+ $record = $this->createRecord(
+ Qdc::class,
+ 'qdc1.xml',
+ [
+ '__unit_test_no_source__' => [
+ 'driverParams' => [
+ "preferredFormatTypes=$preferredTypes",
+ ],
+ ],
+ ],
+ 'Base',
+ [$this->createMock(\RecordManager\Base\Http\ClientManager::class)]
+ );
+ $fields = $record->toSolrArray();
+ $this->assertEquals($format, $fields['format']);
+ }
+ }
}
diff --git a/tests/fixtures/Base/record/qdc1.xml b/tests/fixtures/Base/record/qdc1.xml
index 95de39764..095232f16 100644
--- a/tests/fixtures/Base/record/qdc1.xml
+++ b/tests/fixtures/Base/record/qdc1.xml
@@ -17,6 +17,9 @@
2021-06-16T06:31:44Z
2021
Article
+ okm_type
+ okm_type_2
+ other_type
Eeva-Liisa Viskari, Suvi Lehtoranta, Riikka Malila. Urine : The potential, value chain and its sustainable management. Sanitation Value Chain (2021) 5, 1, pages 10-12. https://doi.org/10.34416/svc.00029
2432-5058
http://hdl.handle.net/10138/331330