From c415ffd937260747d5662d2542006847b136724b Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Tue, 22 Oct 2024 10:43:07 +0100 Subject: [PATCH] HPCC-32845 Guard against KJ reading TLKs as regular index parts Check that the last part in an index/subindex is a TLK if the meta data is missing. Also remove some redundant 'delayed' functionality. Signed-off-by: Jake Smith --- .../activities/hashdistrib/thhashdistrib.cpp | 3 +- .../activities/indexwrite/thindexwrite.cpp | 4 +-- thorlcr/activities/keydiff/thkeydiff.cpp | 5 +-- thorlcr/activities/keyedjoin/thkeyedjoin.cpp | 31 ++++++++--------- .../activities/keyedjoin/thkeyedjoinslave.cpp | 27 +++++---------- thorlcr/activities/keypatch/thkeypatch.cpp | 6 +--- thorlcr/thorutil/thormisc.cpp | 34 +++++++++++++++++++ thorlcr/thorutil/thormisc.hpp | 2 ++ 8 files changed, 65 insertions(+), 47 deletions(-) diff --git a/thorlcr/activities/hashdistrib/thhashdistrib.cpp b/thorlcr/activities/hashdistrib/thhashdistrib.cpp index b2c8a42e50f..36c2cba1194 100644 --- a/thorlcr/activities/hashdistrib/thhashdistrib.cpp +++ b/thorlcr/activities/hashdistrib/thhashdistrib.cpp @@ -26,6 +26,7 @@ #include "thorport.hpp" #include "thbufdef.hpp" #include "thexception.hpp" +#include "thormisc.hpp" #define NUMINPARALLEL 16 @@ -115,7 +116,7 @@ class IndexDistributeActivityMaster : public HashDistributeMasterBase checkFormatCrc(this, file, helper->getFormatCrc(), nullptr, helper->getFormatCrc(), nullptr, true); Owned fileDesc = file->getFileDescriptor(); Owned tlkDesc = fileDesc->getPart(fileDesc->numParts()-1); - if (!tlkDesc->queryProperties().hasProp("@kind") || 0 != stricmp("topLevelKey", tlkDesc->queryProperties().queryProp("@kind"))) + if (!hasTLK(*file, this)) throw MakeActivityException(this, 0, "Cannot distribute using a non-distributed key: '%s'", scoped.str()); unsigned location; OwnedIFile iFile; diff --git a/thorlcr/activities/indexwrite/thindexwrite.cpp b/thorlcr/activities/indexwrite/thindexwrite.cpp index 1a9f5894a87..b9c2963ff8a 100644 --- a/thorlcr/activities/indexwrite/thindexwrite.cpp +++ b/thorlcr/activities/indexwrite/thindexwrite.cpp @@ -159,9 +159,9 @@ class IndexWriteActivityMaster : public CMasterActivity checkFormatCrc(this, _f, helper->getFormatCrc(), nullptr, helper->getFormatCrc(), nullptr, true); IDistributedFile *f = _f->querySuperFile(); if (!f) f = _f; - Owned existingTlk = f->getPart(f->numParts()-1); - if (!existingTlk->queryAttributes().hasProp("@kind") || 0 != stricmp("topLevelKey", existingTlk->queryAttributes().queryProp("@kind"))) + if (!hasTLK(*f, this)) throw MakeActivityException(this, 0, "Cannot build new key '%s' based on non-distributed key '%s'", fileName.get(), diName.get()); + Owned existingTlk = f->getPart(f->numParts()-1); IPartDescriptor *tlkDesc = fileDesc->queryPart(fileDesc->numParts()-1); IPropertyTree &props = tlkDesc->queryProperties(); if (existingTlk->queryAttributes().hasProp("@size")) diff --git a/thorlcr/activities/keydiff/thkeydiff.cpp b/thorlcr/activities/keydiff/thkeydiff.cpp index 8f688ee8abd..394e6b787f5 100644 --- a/thorlcr/activities/keydiff/thkeydiff.cpp +++ b/thorlcr/activities/keydiff/thkeydiff.cpp @@ -71,10 +71,7 @@ class CKeyDiffMaster : public CMasterActivity originalDesc.setown(originalIndexFile->getFileDescriptor()); newIndexDesc.setown(newIndexFile->getFileDescriptor()); - Owned tlkDesc = originalDesc->getPart(originalDesc->numParts()-1); - const char *kind = tlkDesc->queryProperties().queryProp("@kind"); - local = NULL == kind || 0 != stricmp("topLevelKey", kind); - + local = !hasTLK(*originalIndexFile, this); if (!local) width--; // 1 part == No n distributed / Monolithic key if (width > container.queryJob().querySlaves()) diff --git a/thorlcr/activities/keyedjoin/thkeyedjoin.cpp b/thorlcr/activities/keyedjoin/thkeyedjoin.cpp index 38fabfca861..af5d197721f 100644 --- a/thorlcr/activities/keyedjoin/thkeyedjoin.cpp +++ b/thorlcr/activities/keyedjoin/thkeyedjoin.cpp @@ -112,20 +112,21 @@ class CKeyedJoinMaster : public CMasterActivity unsigned numParts = fileDesc->numParts(); unsigned nextGroupStartPos = 0; + IDistributedFile *subFile = file; for (unsigned p=0; pqueryPart(p); - const char *kind = isIndexWithTlk ? part->queryProperties().queryProp("@kind") : nullptr; - if (!kind || !strsame("topLevelKey", kind)) + unsigned partIdx = part->queryPartIndex(); + unsigned subFileNum = NotFound; + unsigned subPartIdx = partIdx; + if (superFileDesc) + { + superFileDesc->mapSubPart(partIdx, subFileNum, subPartIdx); + partIdx = superWidth*subFileNum+subPartIdx; + subFile = &super->querySubFile(subFileNum, true); + } + if (!isIndexWithTlk || (1 == numParts) || (subPartIdx < (subFile->numParts()-1)) || !hasTLK(*subFile, nullptr)) { - unsigned partIdx = part->queryPartIndex(); - unsigned subfile = NotFound; - unsigned subPartIdx = partIdx; - if (superFileDesc) - { - superFileDesc->mapSubPart(partIdx, subfile, subPartIdx); - partIdx = superWidth*subfile+subPartIdx; - } if (activity.local) { if (activity.queryContainer().queryLocalData()) @@ -234,7 +235,7 @@ class CKeyedJoinMaster : public CMasterActivity slaveParts.push_back(p); } if (superFileDesc) - partIdx = superWidth*subfile+subPartIdx; + partIdx = superWidth*subFileNum+subPartIdx; partsByPartIdx.push_back(partIdx); assertex(partIdx < totalParts); partToSlave[partIdx] = mappedPos; @@ -387,10 +388,7 @@ class CKeyedJoinMaster : public CMasterActivity ForEach(*iter) { IDistributedFile &f = iter->query(); - unsigned np = f.numParts()-1; - IDistributedFilePart &part = f.queryPart(np); - const char *kind = part.queryAttributes().queryProp("@kind"); - bool hasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); // if last part not tlk, then deemed local (might be singlePartKey) + bool hasTlk = hasTLK(f, this); if (first) { first = false; @@ -419,8 +417,7 @@ class CKeyedJoinMaster : public CMasterActivity totalIndexParts = indexFile->numParts(); if (totalIndexParts) { - const char *kind = indexFile->queryPart(indexFile->numParts()-1).queryAttributes().queryProp("@kind"); - keyHasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); + keyHasTlk = hasTLK(*indexFile, this); if (keyHasTlk) --totalIndexParts; } diff --git a/thorlcr/activities/keyedjoin/thkeyedjoinslave.cpp b/thorlcr/activities/keyedjoin/thkeyedjoinslave.cpp index 8844a99b7f3..e3710511abe 100644 --- a/thorlcr/activities/keyedjoin/thkeyedjoinslave.cpp +++ b/thorlcr/activities/keyedjoin/thkeyedjoinslave.cpp @@ -1445,7 +1445,7 @@ class CKeyedJoinSlave : public CSlaveActivity, implements IJoinProcessor, implem { unsigned partNo = partCopy & partMask; unsigned copy = partCopy >> partBits; - Owned keyIndex = activity.createPartKeyIndex(partNo, copy, false); + Owned keyIndex = activity.createPartKeyIndex(partNo, copy); partKeySet->addIndex(keyIndex.getClear()); } keyManager = createKeyMerger(helper->queryIndexRecordSize()->queryRecordAccessor(true), partKeySet, 0, &contextLogger, helper->hasNewSegmentMonitors(), false); @@ -2454,7 +2454,7 @@ class CKeyedJoinSlave : public CSlaveActivity, implements IJoinProcessor, implem } return tlkKeyIndexes.ordinality(); } - IKeyIndex *createPartKeyIndex(unsigned partNo, unsigned copy, bool delayed) + IKeyIndex *createPartKeyIndex(unsigned partNo, unsigned copy) { IPartDescriptor &filePart = allIndexParts.item(partNo); unsigned crc=0; @@ -2464,25 +2464,16 @@ class CKeyedJoinSlave : public CSlaveActivity, implements IJoinProcessor, implem StringBuffer filename; rfn.getPath(filename); - if (delayed) - { - Owned lazyIFileIO = queryThor().queryFileCache().lookupIFileIO(*this, indexName, filePart, nullptr); - Owned delayedFile = createDelayedFile(lazyIFileIO); - return createKeyIndex(filename, crc, *delayedFile, (unsigned) -1, false, 0); - } - else - { - /* NB: createKeyIndex here, will load the key immediately - * But that's okay, because we are only here on demand. - * The underlying IFileIO can later be closed by fhe file caching mechanism. - */ - Owned lazyIFileIO = queryThor().queryFileCache().lookupIFileIO(*this, indexName, filePart, nullptr); - return createKeyIndex(filename, crc, *lazyIFileIO, (unsigned) -1, false, 0); - } + /* NB: createKeyIndex here, will load the key immediately + * But that's okay, because we are only here on demand. + * The underlying IFileIO can later be closed by fhe file caching mechanism. + */ + Owned lazyIFileIO = queryThor().queryFileCache().lookupIFileIO(*this, indexName, filePart, nullptr); + return createKeyIndex(filename, crc, *lazyIFileIO, (unsigned) -1, false, 0); } IKeyManager *createPartKeyManager(unsigned partNo, unsigned copy, IContextLogger *ctx) { - Owned keyIndex = createPartKeyIndex(partNo, copy, false); + Owned keyIndex = createPartKeyIndex(partNo, copy); return createLocalKeyManager(helper->queryIndexRecordSize()->queryRecordAccessor(true), keyIndex, ctx, helper->hasNewSegmentMonitors(), false); } const void *preparePendingLookupRow(void *row, size32_t maxSz, const void *lhsRow, size32_t keySz) diff --git a/thorlcr/activities/keypatch/thkeypatch.cpp b/thorlcr/activities/keypatch/thkeypatch.cpp index 3f279cbe82c..50cb4ec0354 100644 --- a/thorlcr/activities/keypatch/thkeypatch.cpp +++ b/thorlcr/activities/keypatch/thkeypatch.cpp @@ -71,11 +71,7 @@ class CKeyPatchMaster : public CMasterActivity originalDesc.setown(originalIndexFile->getFileDescriptor()); patchDesc.setown(patchFile->getFileDescriptor()); - - Owned tlkDesc = originalDesc->getPart(originalDesc->numParts()-1); - const char *kind = tlkDesc->queryProperties().queryProp("@kind"); - local = NULL == kind || 0 != stricmp("topLevelKey", kind); - + local = !hasTLK(*originalIndexFile, this); if (!local && width > 1) width--; // 1 part == No n distributed / Monolithic key if (width > container.queryJob().querySlaves()) diff --git a/thorlcr/thorutil/thormisc.cpp b/thorlcr/thorutil/thormisc.cpp index 395b0905131..12926c92956 100644 --- a/thorlcr/thorutil/thormisc.cpp +++ b/thorlcr/thorutil/thormisc.cpp @@ -29,6 +29,8 @@ #include "jsocket.hpp" #include "jmutex.hpp" +#include "jhtree.hpp" + #include "commonext.hpp" #include "dadfs.hpp" #include "dasds.hpp" @@ -1703,3 +1705,35 @@ void saveWuidToFile(const char *wuid) wuidFileIO->write(0, strlen(wuid), wuid); wuidFileIO->close(); } + +bool hasTLK(IDistributedFile &file, CActivityBase *activity) +{ + unsigned np = file.numParts(); + if (np<=1) // NB: a better test would be to only continue if this is a width that is +1 of group it's based on, but not worth checking + return false; + IDistributedFilePart &part = file.queryPart(np-1); + bool keyHasTlk = strisame("topLevelKey", part.queryAttributes().queryProp("@kind")); + if (!keyHasTlk) + { + // See HPCC-32845 - check if TLK flag is missing from TLK part + // It is very likely the last part should be a TLK. Even a local key (>1 parts) has a TLK by default (see buildLocalTlks) + RemoteFilename rfn; + part.getFilename(rfn); + StringBuffer filename; + rfn.getPath(filename); + Owned index = createKeyIndex(filename, 0, false, 0); + dbgassertex(index); + if (index->isTopLevelKey()) + { + if (activity) + { + Owned e = MakeActivityException(activity, 0, "TLK file part of file %s is missing kind=\"topLevelKey\" flag. The meta data should be fixed!", file.queryLogicalName()); + reportExceptionToWorkunitCheckIgnore(activity->queryJob().queryWorkUnit(), e, SeverityWarning); + StringBuffer errMsg; + UWARNLOG("%s", e->errorMessage(errMsg).str()); + } + keyHasTlk = true; + } + } + return keyHasTlk; +} diff --git a/thorlcr/thorutil/thormisc.hpp b/thorlcr/thorutil/thormisc.hpp index 59d1797d1e8..ed04efc98a8 100644 --- a/thorlcr/thorutil/thormisc.hpp +++ b/thorlcr/thorutil/thormisc.hpp @@ -723,4 +723,6 @@ class graph_decl CThorPerfTracer : protected PerfTracer extern graph_decl void saveWuidToFile(const char *wuid); +extern graph_decl bool hasTLK(IDistributedFile &file, CActivityBase *activity); + #endif