Skip to content

Commit

Permalink
Merge pull request #35 from slub/32-optimize-elasticsearch-mapping
Browse files Browse the repository at this point in the history
32 optimize elasticsearch mapping
  • Loading branch information
dikastes authored Nov 27, 2024
2 parents 1461895 + 28ce97e commit f1e7bc5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 4 deletions.
14 changes: 10 additions & 4 deletions Classes/Command/IndexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use Psr\Log\LoggerInterface;
use Slub\LisztCommon\Common\ElasticClientBuilder;
use Slub\LisztBibliography\Processing\BibEntryProcessor;
use Slub\LisztBibliography\Processing\BibElasticMapping;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputOption;
Expand Down Expand Up @@ -73,6 +74,8 @@ protected function getRequest(): ServerRequestInterface
return $GLOBALS['TYPO3_REQUEST'];
}


// ddev typo3 liszt-bibliography:index -t 100 // index only 100 docs for testing and dev
protected function configure(): void
{
$this->setDescription('Create elasticsearch index from zotero bibliography')->
Expand Down Expand Up @@ -141,17 +144,19 @@ protected function fullSync(InputInterface $input): void
$this->bibliographyItems = $collection->pluck('data');
$cursor = 0; // set Cursor to 0, not to bulk size
$index = $this->extConf['elasticIndexName'];
$mappingParams = BibElasticMapping::getMappingParams($index);

try {
// in older Elasticsearch versions (until 7) exists returns a bool
if ($this->client->indices()->exists(['index' => $index])) {
$this->client->indices()->delete(['index' => $index]);
$this->client->indices()->create(['index' => $index]);
$this->client->indices()->create($mappingParams);
}
} catch (\Exception $e) {
// other versions return a Message object
if ($e->getCode() === 404) {
$this->io->note("Index: " . $index . " does not exist. Trying to create new index.");
$this->client->indices()->create(['index' => $index]);
$this->client->indices()->create($mappingParams);
} else {
$this->io->error("Exception: " . $e->getMessage());
$this->logger->error('Bibliography sync unsuccessful. Error creating elasticsearch index.');
Expand Down Expand Up @@ -348,6 +353,7 @@ protected function commitBibliography(): void
$index = $this->extConf['elasticIndexName'];

$params = [ 'body' => [] ];

$bulkCount = 0;
foreach ($this->dataSets as $document) {
$params['body'][] = [ 'index' =>
Expand All @@ -366,7 +372,7 @@ protected function commitBibliography(): void
$this->client->bulk($params);
}

protected function commitLocales(): void
/* protected function commitLocales(): void
{
$localeIndex = $this->extConf['elasticLocaleIndexName'];
$this->io->text('Committing the ' . $localeIndex . ' index');
Expand All @@ -390,5 +396,5 @@ protected function commitLocales(): void
$this->client->bulk($params);
$this->io->text('done');
}
}*/
}
67 changes: 67 additions & 0 deletions Classes/Processing/BibElasticMapping.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?php
declare(strict_types=1);

namespace Slub\LisztBibliography\Processing;

// create a field "fulltext" and copy content of "tx_lisztcommon_searchable" to fulltext

class BibElasticMapping
{
public static function getMappingParams(string $index): array
{
return [
'index' => $index,
'body' => [
'mappings' => [
'dynamic' => false,
'properties' => [
'version' => [ 'type' => 'long' ],
'title' => [ 'type' => 'text'],
'university' => [ 'type' => 'text'],
'bookTitle' => [ 'type' => 'text'],
'series' => [ 'type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 256 ] ] ],
'publicationTitle' => [ 'type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 256 ] ] ],
'place' => [ 'type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 256 ] ] ],
'date' => [ 'type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 256 ] ] ],
'archiveLocation' => [ 'type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 256 ] ] ],
'itemType' => [ 'type' => 'keyword'],
'journalTitle' => [ 'type' => 'keyword'],
'creators' => [
'type' => 'nested',
'properties' => [
'creatorType' => [
'type' => 'keyword'
],
'firstName' => [
'type' => 'text',
'fields' => [
'keyword' => [
'type' => 'keyword', 'ignore_above' => 256
],
],
'copy_to' => 'creators.fullName'
],
'lastName' => [
'type' => 'text',
'fields' => [
'keyword' => [
'type' => 'keyword', 'ignore_above' => 256
]
],
'copy_to' => 'creators.fullName'
],
'fullName' => ['type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword'] ] ],
]
],
'fulltext' => [ 'type' => 'text' ],
'tx_lisztcommon_header' => [ 'type' => 'text' ],
'tx_lisztcommon_body' => [ 'type' => 'text' ],
'tx_lisztcommon_footer' => [ 'type' => 'text' ],
'tx_lisztcommon_searchable' => ['type' => 'text', 'copy_to' => 'fulltext'],
'tx_lisztcommon_boosted' => ['type' => 'text'],
]
]
]
];
}
}

0 comments on commit f1e7bc5

Please sign in to comment.