Skip to content

Commit

Permalink
[FINNA-1729] LIDO: Drop support for splitting titles (#160)
Browse files Browse the repository at this point in the history
  • Loading branch information
mshroom authored Jun 26, 2024
1 parent 04573c7 commit 828192b
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 81 deletions.
20 changes: 1 addition & 19 deletions src/RecordManager/Base/Record/Lido.php
Original file line number Diff line number Diff line change
Expand Up @@ -146,29 +146,11 @@ public function toSolrArray(Database $db = null)

$data['record_format'] = 'lido';
$title = $this->getTitle(false);
if ($this->getDriverParam('splitTitles', false)) {
$titlePart = $this->metadataUtils->splitTitle($title);
if ($titlePart) {
$data['description'] = $title;
$title = $titlePart;
}
}
$data['title'] = $data['title_short'] = $data['title_full'] = $title;
// Create sort title from the title that may have been split above:
$data['title_sort'] = $this->metadataUtils->createSortTitle($title);
$data['title_alt'] = $this->getAltTitles();

$description = $this->getDescription();
if ($description) {
if (
!empty($data['description'])
&& !str_starts_with($description, $data['description'])
) {
$data['description'] .= " -- $description";
} else {
$data['description'] = $description;
}
}
$data['description'] = $this->getDescription();

$data['format'] = $this->getObjectWorkType();
$data['identifier'] = $this->getIdentifier();
Expand Down
62 changes: 0 additions & 62 deletions src/RecordManager/Base/Utils/MetadataUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -935,68 +935,6 @@ function (&$val, $key, $chars) {
return $array;
}

/**
* Split title to main title and description. Tries to find the first sentence
* break where the title can be split.
*
* @param string $title Title to split
*
* @return null|string Null if title was not split, otherwise the initial
* title part
*/
public function splitTitle($title)
{
$i = 0;
$parenLevel = 0;
$bracketLevel = 0;
// Make sure the title has single spaces for whitespace
$title = preg_replace('/\s+/', ' ', $title);
$titleWords = explode(' ', $title);
foreach ($titleWords as $word) {
++$i;
$parenLevel += substr_count($word, '(');
$parenLevel -= substr_count($word, ')');
$bracketLevel += substr_count($word, '[');
$bracketLevel -= substr_count($word, ']');
if ($parenLevel == 0 && $bracketLevel == 0) {
// Try to avoid splitting at short words or the very beginning
if (
substr($word, -1) == '.' && strlen($word) > 2
&& ($i > 1 || strlen($word) > 4)
) {
// Verify that the word is strippable (not abbreviation etc.)
$leadStripped = $this->stripLeadingPunctuation(
$word
);
$stripped = $this->stripTrailingPunctuation(
$leadStripped
);
$nextFirst = isset($titleWords[$i])
? substr($titleWords[$i], 0, 1)
: '';
// 1.) There has to be something following this word.
// 2.) The trailing period must be strippable or end with a year.
// 3.) Next word has to start with a capital or digit
// 4.) Not something like 12-p.
// 5.) Not initials like A.N.
if (
$nextFirst
&& ($leadStripped != $stripped
|| preg_match('/^\d{4}\.$/', $word))
&& (is_numeric($nextFirst) || !ctype_lower($nextFirst))
&& !preg_match('/.+\-\w{1,2}\.$/', $word)
&& !preg_match('/^\w\.\w\.$/', $word) // initials
) {
return $this->stripTrailingPunctuation(
implode(' ', array_splice($titleWords, 0, $i))
);
}
}
}
}
return null;
}

/**
* Determine if a record is a hidden component part
*
Expand Down
2 changes: 2 additions & 0 deletions tests/RecordManagerTest/Base/Record/LidoTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public function testLido1()
'title_sort' => 'luonnonsuojelusäädökset toimittanut raimo luhtanen'
. ' säädökset',
'title_alt' => [],
'description' => '',
'format' => 'Kirja',
'identifier' => '26054',
'institution' => 'Test Institution',
Expand Down Expand Up @@ -179,6 +180,7 @@ public function testLido1NonMergedTitle()
'title_alt' => [
'Säädökset',
],
'description' => '',
'format' => 'Kirja',
'identifier' => '26054',
'institution' => 'Test Institution',
Expand Down

0 comments on commit 828192b

Please sign in to comment.