From c069a40527730f0336be0d375f8feca366087d9a Mon Sep 17 00:00:00 2001 From: thomas-sc Date: Tue, 3 Dec 2024 17:44:05 +0100 Subject: [PATCH 1/6] add alias handling and atomic swap and remove of old indexes --- Classes/Command/IndexCommand.php | 163 ++++++++++++++++++++++++++----- 1 file changed, 140 insertions(+), 23 deletions(-) diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php index a05e4dc..e357435 100644 --- a/Classes/Command/IndexCommand.php +++ b/Classes/Command/IndexCommand.php @@ -143,33 +143,46 @@ protected function fullSync(InputInterface $input): void $collection = new Collection($response->getBody()); $this->bibliographyItems = $collection->pluck('data'); $cursor = 0; // set Cursor to 0, not to bulk size - $index = $this->extConf['elasticIndexName']; - $mappingParams = BibElasticMapping::getMappingParams($index); + // we are working with alias names to swap indexes from zotero_temp to zotero after successfully indexing + $tempIndexAlias = $this->extConf['elasticIndexName'].'_temp'; + $indexName = $this->extConf['elasticIndexName'] . '_' . date('Ymd_His'); + $tempIndexParams = BibElasticMapping::getMappingParams($indexName); + + // add alias name 'zotero_temp to this index + // and add a wildcard alias to find all zotero_* indices with the alias zotero-index + $aliasParams = [ + 'body' => [ + 'actions' => [ + [ + 'add' => [ + 'index' => $indexName, + 'alias' => $tempIndexAlias, + ], + ], + [ + 'add' => [ + 'index' => $this->extConf['elasticIndexName'].'_*', + 'alias' => $this->extConf['elasticIndexName'].'-index', + ], + ] + ] + ] + ]; try { - // in older Elasticsearch versions (until 7) exists returns a bool - if ($this->client->indices()->exists(['index' => $index])) { - $this->client->indices()->delete(['index' => $index]); - $this->client->indices()->create($mappingParams); - } + $this->client->indices()->create($tempIndexParams); + $this->client->indices()->updateAliases($aliasParams); } catch (\Exception $e) { - // other versions return a Message object - if ($e->getCode() === 404) { - $this->io->note("Index: " . $index . " does not exist. Trying to create new index."); - $this->client->indices()->create($mappingParams); - } else { $this->io->error("Exception: " . $e->getMessage()); $this->logger->error('Bibliography sync unsuccessful. Error creating elasticsearch index.'); throw new \Exception('Bibliography sync unsuccessful.'); - } } $apiCounter = self::API_TRIALS; while ($cursor < $this->total) { try { - $this->sync($cursor, 0); - + $this->sync($indexName, $cursor, 0); $apiCounter = self::API_TRIALS; $remainingItems = $this->total - $cursor; $advanceBy = min($remainingItems, $this->bulkSize); @@ -188,6 +201,11 @@ protected function fullSync(InputInterface $input): void } } } + + // swap alias for index from zotero_temp to zotero and remove old indexes (keep the last one) + $this->swapIndexAliases($indexName, $tempIndexAlias); + //delete old indexes + $this->deleteOldIndexes($indexName); $this->io->progressFinish(); } @@ -196,7 +214,7 @@ protected function versionedSync(int $version): void $apiCounter = self::API_TRIALS; while (true) { try { - $this->sync(0, $version); + $this->sync( $this->extConf['elasticIndexName'], 0, $version); $this->io->text('done'); return; } catch (\Exception $e) { @@ -214,13 +232,13 @@ protected function versionedSync(int $version): void } } - protected function sync(int $cursor = 0, int $version = 0): void + protected function sync(string $indexName, int $cursor = 0, int $version = 0,): void { $this->fetchBibliography($cursor, $version); $this->fetchCitations($cursor, $version); $this->fetchTeiData($cursor, $version); $this->buildDataSets(); - $this->commitBibliography(); + $this->commitBibliography($indexName); } protected function getVersion(InputInterface $input): int @@ -344,21 +362,18 @@ protected function buildDataSets(): void }); } - protected function commitBibliography(): void + protected function commitBibliography(string $indexName): void { if ($this->dataSets->count() == 0) { $this->io->text('no new bibliographic entries'); return; } - $index = $this->extConf['elasticIndexName']; - $params = [ 'body' => [] ]; - $bulkCount = 0; foreach ($this->dataSets as $document) { $params['body'][] = [ 'index' => [ - '_index' => $index, + '_index' => $indexName, '_id' => $document['key'] ] ]; @@ -372,6 +387,108 @@ protected function commitBibliography(): void $this->client->bulk($params); } + protected function swapIndexAliases(string $indexName, string $tempIndexAlias): void + { + // get index with alias = zotero + try { + $aliasesRequest = $this->client->indices()->getAlias(['name' => $this->extConf['elasticIndexName']]); + $aliasesArray = $aliasesRequest->asArray(); + + foreach ($aliasesArray as $index => $aliasArray) { + $this->io->note('Remove alias "' .$this->extConf['elasticIndexName']. '" from index '. $index . 'and add it to ' . $indexName ); + // get index name with alias 'zotero' + if (array_key_exists($this->extConf['elasticIndexName'], $aliasArray['aliases'])) { + //swap alias from old to new index + $aliasParams = [ + 'body' => [ + 'actions' => [ + [ + 'remove' => [ + 'index' => $index, + 'alias' => $this->extConf['elasticIndexName'], + ], + ], + [ + 'add' => [ + 'index' => $indexName, + 'alias' => $this->extConf['elasticIndexName'], + ], + ] + ] + ] + ]; + $this->client->indices()->updateAliases($aliasParams); + } + } + } + catch (\Exception $e) { + // other versions return a Message object + if ($e->getCode() === 404) { + $this->io->note("Alias: " . $this->extConf['elasticIndexName'] . " does not exist. Move alias to ".$indexName); + // rename alias name from temp index to zotero + $aliasParams = [ + 'body' => [ + 'actions' => [ + [ + 'remove' => [ + 'index' => $indexName, + 'alias' => $tempIndexAlias, + ], + ], + [ + 'add' => [ + 'index' => $indexName, + 'alias' => $this->extConf['elasticIndexName'], + ], + ] + ] + ] + ]; + $this->client->indices()->updateAliases($aliasParams); + + } else { + $this->io->error("Exception: " . $e->getMessage()); + $this->logger->error('Bibliography sync unsuccessful. Error getting alias: ' . $this->extConf['elasticIndexName']); + throw new \Exception('Bibliography sync unsuccessful.', 0, $e); + } + } + } + + protected function deleteOldIndexes($indexName): void + { + try { + $aliasesRequest = $this->client->indices()->getAlias(['name' => $this->extConf['elasticIndexName'].'_*']); + $aliasesArray = $aliasesRequest->asArray(); + + // sort $aliasesArray by key name + ksort($aliasesArray); + + // remove current key $indexName from array + unset($aliasesArray[$indexName]); + + // remove the last key (we keep the last two indexes) + array_pop($aliasesArray); + + foreach ($aliasesArray as $index => $aliasArray) { + $this->io->note("Delete index " . $index); + $this->client->indices()->delete(['index' => $index]); + } + + } + catch (\Exception $e) { + // other versions return a Message object + if ($e->getCode() === 404) { + $this->io->note("Nothing to remove, there are no indexes with alias " . $this->extConf['elasticIndexName'].'_*'); + } else { + $this->io->error("Exception: " . $e->getMessage()); + $this->logger->error('Bibliography sync unsuccessful. Error getting alias: ' . $this->extConf['elasticIndexName'].'_*'); + throw new \Exception('Bibliography sync unsuccessful.', 0, $e); + } + } + + + } + /* protected function commitLocales(): void { $localeIndex = $this->extConf['elasticLocaleIndexName']; From 001ee8b79f93cfa8a1062ca079e85396a2de21c3 Mon Sep 17 00:00:00 2001 From: thomas-sc Date: Wed, 4 Dec 2024 09:42:22 +0100 Subject: [PATCH 2/6] change selection of indexes to delete from wildcard alias to "zotero-index" alias --- Classes/Command/IndexCommand.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php index e357435..0488941 100644 --- a/Classes/Command/IndexCommand.php +++ b/Classes/Command/IndexCommand.php @@ -457,7 +457,7 @@ protected function swapIndexAliases(string $indexName, string $tempIndexAlias): protected function deleteOldIndexes($indexName): void { try { - $aliasesRequest = $this->client->indices()->getAlias(['name' => $this->extConf['elasticIndexName'].'_*']); + $aliasesRequest = $this->client->indices()->getAlias(['name' => $this->extConf['elasticIndexName'].'-index']); $aliasesArray = $aliasesRequest->asArray(); // sort $aliasesArray by key name @@ -478,10 +478,10 @@ protected function deleteOldIndexes($indexName): void catch (\Exception $e) { // other versions return a Message object if ($e->getCode() === 404) { - $this->io->note("Nothing to remove, there are no indexes with alias " . $this->extConf['elasticIndexName'].'_*'); + $this->io->note("Nothing to remove, there are no indexes with alias " . $this->extConf['elasticIndexName'].'-index'); } else { $this->io->error("Exception: " . $e->getMessage()); - $this->logger->error('Bibliography sync unsuccessful. Error getting alias: ' . $this->extConf['elasticIndexName'].'_*'); + $this->logger->error('Bibliography sync unsuccessful. Error getting alias: ' . $this->extConf['elasticIndexName'].'-index'); throw new \Exception('Bibliography sync unsuccessful.', 0, $e); } } From 117161035cbd4eb90efa990ad1692d9292a04989 Mon Sep 17 00:00:00 2001 From: thomas-sc Date: Wed, 4 Dec 2024 10:35:24 +0100 Subject: [PATCH 3/6] fix remove _temp Alias on old indexes --- Classes/Command/IndexCommand.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php index 0488941..f59310e 100644 --- a/Classes/Command/IndexCommand.php +++ b/Classes/Command/IndexCommand.php @@ -413,6 +413,12 @@ protected function swapIndexAliases(string $indexName, string $tempIndexAlias): 'index' => $indexName, 'alias' => $this->extConf['elasticIndexName'], ], + ], + [ + 'remove' => [ + 'index' => $indexName, + 'alias' => $tempIndexAlias, + ], ] ] ] From 1e385fb143988be6dfd6ac3eabc9caaff37b55c2 Mon Sep 17 00:00:00 2001 From: Matthias Richter Date: Wed, 4 Dec 2024 11:22:48 +0100 Subject: [PATCH 4/6] Promote indexName to property --- Classes/Command/IndexCommand.php | 51 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php index f59310e..491b3db 100644 --- a/Classes/Command/IndexCommand.php +++ b/Classes/Command/IndexCommand.php @@ -37,21 +37,22 @@ class IndexCommand extends Command { - protected ZoteroApi $bibApi; const API_TRIALS = 3; protected string $apiKey; + protected ZoteroApi $bibApi; protected Collection $bibliographyItems; - protected Collection $deletedItems; - protected Collection $teiDataSets; - protected Collection $dataSets; + protected int $bulkSize; protected Client $client; + protected Collection $dataSets; + protected Collection $deletedItems; protected array $extConf; + readonly string $indexName; protected SymfonyStyle $io; - protected int $bulkSize; - protected int $total; protected Collection $locales; protected Collection $localizedCitations; + protected Collection $teiDataSets; + protected int $total; public function __construct( private readonly SiteFinder $siteFinder, @@ -101,6 +102,7 @@ protected function configure(): void protected function initialize(InputInterface $input, OutputInterface $output): void { $this->extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('liszt_bibliography'); + $this->indexName = $this->extConf['elasticIndexName'] . '_' . date('Ymd_His'); $this->client = ElasticClientBuilder::getClient(); $this->apiKey = $this->extConf['zoteroApiKey']; $this->io = new SymfonyStyle($input, $output); @@ -145,8 +147,7 @@ protected function fullSync(InputInterface $input): void $cursor = 0; // set Cursor to 0, not to bulk size // we are working with alias names to swap indexes from zotero_temp to zotero after successfully indexing $tempIndexAlias = $this->extConf['elasticIndexName'].'_temp'; - $indexName = $this->extConf['elasticIndexName'] . '_' . date('Ymd_His'); - $tempIndexParams = BibElasticMapping::getMappingParams($indexName); + $tempIndexParams = BibElasticMapping::getMappingParams($this->indexName); // add alias name 'zotero_temp to this index // and add a wildcard alias to find all zotero_* indices with the alias zotero-index @@ -155,7 +156,7 @@ protected function fullSync(InputInterface $input): void 'actions' => [ [ 'add' => [ - 'index' => $indexName, + 'index' => $this->indexName, 'alias' => $tempIndexAlias, ], ], @@ -182,7 +183,7 @@ protected function fullSync(InputInterface $input): void while ($cursor < $this->total) { try { - $this->sync($indexName, $cursor, 0); + $this->sync($cursor, 0); $apiCounter = self::API_TRIALS; $remainingItems = $this->total - $cursor; $advanceBy = min($remainingItems, $this->bulkSize); @@ -203,9 +204,9 @@ protected function fullSync(InputInterface $input): void } // swap alias for index from zotero_temp to zotero and remove old indexes (keep the last one) - $this->swapIndexAliases($indexName, $tempIndexAlias); + $this->swapIndexAliases($tempIndexAlias); //delete old indexes - $this->deleteOldIndexes($indexName); + $this->deleteOldIndexes(); $this->io->progressFinish(); } @@ -214,7 +215,7 @@ protected function versionedSync(int $version): void $apiCounter = self::API_TRIALS; while (true) { try { - $this->sync( $this->extConf['elasticIndexName'], 0, $version); + $this->sync(0, $version); $this->io->text('done'); return; } catch (\Exception $e) { @@ -232,13 +233,13 @@ protected function versionedSync(int $version): void } } - protected function sync(string $indexName, int $cursor = 0, int $version = 0,): void + protected function sync(int $cursor = 0, int $version = 0,): void { $this->fetchBibliography($cursor, $version); $this->fetchCitations($cursor, $version); $this->fetchTeiData($cursor, $version); $this->buildDataSets(); - $this->commitBibliography($indexName); + $this->commitBibliography(); } protected function getVersion(InputInterface $input): int @@ -362,7 +363,7 @@ protected function buildDataSets(): void }); } - protected function commitBibliography(string $indexName): void + protected function commitBibliography(): void { if ($this->dataSets->count() == 0) { $this->io->text('no new bibliographic entries'); @@ -373,7 +374,7 @@ protected function commitBibliography(string $indexName): void foreach ($this->dataSets as $document) { $params['body'][] = [ 'index' => [ - '_index' => $indexName, + '_index' => $this->indexName, '_id' => $document['key'] ] ]; @@ -387,7 +388,7 @@ protected function commitBibliography(string $indexName): void $this->client->bulk($params); } - protected function swapIndexAliases(string $indexName, string $tempIndexAlias): void + protected function swapIndexAliases(string $tempIndexAlias): void { // get index with alias = zotero try { @@ -395,7 +396,7 @@ protected function swapIndexAliases(string $indexName, string $tempIndexAlias): $aliasesArray = $aliasesRequest->asArray(); foreach ($aliasesArray as $index => $aliasArray) { - $this->io->note('Remove alias "' .$this->extConf['elasticIndexName']. '" from index '. $index . 'and add it to ' . $indexName ); + $this->io->note('Remove alias "' .$this->extConf['elasticIndexName']. '" from index '. $index . ' and add it to ' . $this->indexName ); // get index name with alias 'zotero' if (array_key_exists($this->extConf['elasticIndexName'], $aliasArray['aliases'])) { //swap alias from old to new index @@ -410,7 +411,7 @@ protected function swapIndexAliases(string $indexName, string $tempIndexAlias): ], [ 'add' => [ - 'index' => $indexName, + 'index' => $this->indexName, 'alias' => $this->extConf['elasticIndexName'], ], ], @@ -430,20 +431,20 @@ protected function swapIndexAliases(string $indexName, string $tempIndexAlias): catch (\Exception $e) { // other versions return a Message object if ($e->getCode() === 404) { - $this->io->note("Alias: " . $this->extConf['elasticIndexName'] . " does not exist. Move alias to ".$indexName); + $this->io->note("Alias: " . $this->extConf['elasticIndexName'] . " does not exist. Move alias to ".$this->indexName); // rename alias name from temp index to zotero $aliasParams = [ 'body' => [ 'actions' => [ [ 'remove' => [ - 'index' => $indexName, + 'index' => $this->indexName, 'alias' => $tempIndexAlias, ], ], [ 'add' => [ - 'index' => $indexName, + 'index' => $this->indexName, 'alias' => $this->extConf['elasticIndexName'], ], ] @@ -460,7 +461,7 @@ protected function swapIndexAliases(string $indexName, string $tempIndexAlias): } } - protected function deleteOldIndexes($indexName): void + protected function deleteOldIndexes(): void { try { $aliasesRequest = $this->client->indices()->getAlias(['name' => $this->extConf['elasticIndexName'].'-index']); @@ -470,7 +471,7 @@ protected function deleteOldIndexes($indexName): void ksort($aliasesArray); // remove current key $indexName from array - unset($aliasesArray[$indexName]); + unset($aliasesArray[$this->indexName]); // remove the last key (we keep the last two indexes) array_pop($aliasesArray); From ea96685ad7b034ad92f64d9f2021de135d15a873 Mon Sep 17 00:00:00 2001 From: Matthias Richter Date: Wed, 4 Dec 2024 15:45:19 +0100 Subject: [PATCH 5/6] Promote several variables to properties --- Classes/Command/IndexCommand.php | 40 +++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php index 491b3db..50cf66b 100644 --- a/Classes/Command/IndexCommand.php +++ b/Classes/Command/IndexCommand.php @@ -39,8 +39,7 @@ class IndexCommand extends Command const API_TRIALS = 3; - protected string $apiKey; - protected ZoteroApi $bibApi; + protected string $zoteroApiKey; protected Collection $bibliographyItems; protected int $bulkSize; protected Client $client; @@ -48,9 +47,11 @@ class IndexCommand extends Command protected Collection $deletedItems; protected array $extConf; readonly string $indexName; + protected InputInterface $input; protected SymfonyStyle $io; protected Collection $locales; protected Collection $localizedCitations; + protected OutputInterface $output; protected Collection $teiDataSets; protected int $total; @@ -101,21 +102,23 @@ protected function configure(): void protected function initialize(InputInterface $input, OutputInterface $output): void { + $this->input = $input; + $this->output = $output; $this->extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('liszt_bibliography'); $this->indexName = $this->extConf['elasticIndexName'] . '_' . date('Ymd_His'); $this->client = ElasticClientBuilder::getClient(); - $this->apiKey = $this->extConf['zoteroApiKey']; - $this->io = new SymfonyStyle($input, $output); + $this->zoteroApiKey = $this->extConf['zoteroApiKey']; + $this->io = GeneralUtility::makeInstance(SymfonyStyle::class, $this->input, $this->output); $this->io->title($this->getDescription()); } protected function execute(InputInterface $input, OutputInterface $output): int { $this->bulkSize = (int) $this->extConf['zoteroBulkSize']; - $version = $this->getVersion($input); + $version = $this->getVersion(); if ($version == 0) { $this->io->text('Full data synchronization requested.'); - $this->fullSync($input); + $this->fullSync(); $this->logger->info('Full data synchronization successful.'); } else { $this->io->text('Synchronizing all data from version ' . $version); @@ -125,17 +128,17 @@ protected function execute(InputInterface $input, OutputInterface $output): int return Command::SUCCESS; } - protected function fullSync(InputInterface $input): void + protected function fullSync(): void { - $client = new ZoteroApi($this->extConf['zoteroApiKey']); + $client = GeneralUtility::makeInstance(ZoteroApi::class, $this->zoteroApiKey); $response = $client-> group($this->extConf['zoteroGroupId'])-> items()-> top()-> limit(1)-> send(); - if ($input->getOption('total')) { - $this->total = (int) $input->getOption('total'); + if ($this->input->getOption('total')) { + $this->total = (int) $this->input->getOption('total'); } else { $this->total = (int) $response->getHeaders()['Total-Results'][0]; } @@ -233,7 +236,7 @@ protected function versionedSync(int $version): void } } - protected function sync(int $cursor = 0, int $version = 0,): void + protected function sync(int $cursor = 0, int $version = 0): void { $this->fetchBibliography($cursor, $version); $this->fetchCitations($cursor, $version); @@ -242,22 +245,22 @@ protected function sync(int $cursor = 0, int $version = 0,): void $this->commitBibliography(); } - protected function getVersion(InputInterface $input): int + protected function getVersion(): int { // if -a is specified, perfom a full update - if ($input->getOption('all')) { + if ($this->input->getOption('all')) { return 0; } // also set version to 0 for dev tests if the total results are limited - if ($input->getOption('total')) { - $this->io->text('Total results limited to: '. $input->getOption('total')); + if ($this->input->getOption('total')) { + $this->io->text('Total results limited to: '. $this->input->getOption('total')); return 0; } // if a version is manually specified, perform sync from this version - $argumentVersion = $input->getArgument('version'); + $argumentVersion = $this->input->getArgument('version'); if ($argumentVersion > 0) { return (int) $argumentVersion; } @@ -294,7 +297,7 @@ protected function getVersion(InputInterface $input): int protected function fetchBibliography(int $cursor, int $version): void { - $client = new ZoteroApi($this->extConf['zoteroApiKey']); + $client = GeneralUtility::makeInstance(ZoteroApi::class, $this->zoteroApiKey); $response = $client-> group($this->extConf['zoteroGroupId'])-> items()-> @@ -310,7 +313,6 @@ protected function fetchBibliography(int $cursor, int $version): void protected function fetchCitations(int $cursor, int $version): void { - $this->localizedCitations = new Collection(); $this->locales->each(function($locale) use($cursor, $version) { $this->fetchCitationLocale($locale, $cursor, $version); }); } @@ -339,7 +341,7 @@ protected function fetchCitationLocale(string $locale, int $cursor, int $version protected function fetchTeiData(int $cursor, int $version): void { - $client = new ZoteroApi($this->extConf['zoteroApiKey']); + $client = GeneralUtility::makeInstance(ZoteroApi::class, $this->zoteroApiKey); $response = $client-> group($this->extConf['zoteroGroupId'])-> items()-> From 653712389338ca63ecb6cb32eaad21457d83aa33 Mon Sep 17 00:00:00 2001 From: Matthias Richter Date: Wed, 4 Dec 2024 15:47:29 +0100 Subject: [PATCH 6/6] Fixes --- Classes/Command/IndexCommand.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Classes/Command/IndexCommand.php b/Classes/Command/IndexCommand.php index 50cf66b..3a9ab55 100644 --- a/Classes/Command/IndexCommand.php +++ b/Classes/Command/IndexCommand.php @@ -60,6 +60,8 @@ public function __construct( private readonly LoggerInterface $logger ) { parent::__construct(); + $this->extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('liszt_bibliography'); + $this->indexName = $this->extConf['elasticIndexName'] . '_' . date('Ymd_His'); $this->initLocales(); } @@ -104,8 +106,6 @@ protected function initialize(InputInterface $input, OutputInterface $output): v { $this->input = $input; $this->output = $output; - $this->extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('liszt_bibliography'); - $this->indexName = $this->extConf['elasticIndexName'] . '_' . date('Ymd_His'); $this->client = ElasticClientBuilder::getClient(); $this->zoteroApiKey = $this->extConf['zoteroApiKey']; $this->io = GeneralUtility::makeInstance(SymfonyStyle::class, $this->input, $this->output); @@ -419,7 +419,7 @@ protected function swapIndexAliases(string $tempIndexAlias): void ], [ 'remove' => [ - 'index' => $indexName, + 'index' => $this->indexName, 'alias' => $tempIndexAlias, ], ]