Skip to content

Commit

Permalink
Merge pull request #19286 from shamser/issue32880
Browse files Browse the repository at this point in the history
HPCC-32880 Ensure that legacy write costs are not lost after file read operation

Reviewed-by: Jake Smith <[email protected]>
Reviewed-by: Gavin Halliday <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Jan 6, 2025
2 parents 88caa3f + e96d6d2 commit d8a4cb3
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 93 deletions.
78 changes: 32 additions & 46 deletions dali/base/dadfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,27 @@ extern da_decl cost_type calcDiskWriteCost(const StringArray & clusters, stat_ty
return writeCost;
}


extern da_decl cost_type updateCostAndNumReads(IDistributedFile *file, stat_type numDiskReads, cost_type curReadCost)
{
const IPropertyTree & fileAttr = file->queryAttributes();

if (!curReadCost)
curReadCost = calcFileAccessCost(file, 0, numDiskReads);
cost_type legacyReadCost = 0;
if (!fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)))
{
if (!isFileKey(fileAttr))
{
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
legacyReadCost = calcFileAccessCost(file, 0, prevDiskReads);
}
}
file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), numDiskReads);
return curReadCost;
}

// JCSMORE - I suspect this function should be removed/deprecated. It does not deal with dirPerPart or striping.
// makePhysicalPartName supports both, but does not deal with groups/endpoints)
RemoteFilename &constructPartFilename(IGroup *grp,unsigned partno,unsigned partmax,const char *name,const char *partmask,const char *partdir,unsigned copy,ClusterPartDiskMapSpec &mspec,RemoteFilename &rfn)
Expand Down Expand Up @@ -5026,49 +5047,29 @@ protected: friend class CDistributedFilePart;
}
virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) override
{
atRestCost = 0;
accessCost = 0;
CDateTime dt;
getModificationTime(dt);
double fileAgeDays = difftime(time(nullptr), dt.getSimple())/(24*60*60);
double sizeGB = getDiskSize(true, false) / ((double)1024 * 1024 * 1024);
const IPropertyTree *attrs = root->queryPropTree("Attr");
bool doLegacyAccessCostCalc = false;
__int64 numDiskWrites = 0, numDiskReads = 0;
if (attrs)
{
if (hasReadWriteCostFields(*attrs))
{
// Newer files have readCost and writeCost attributes
accessCost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
}
else
{
// Costs need to be calculated from numDiskReads and numDiskWrites for legacy files
numDiskWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites));
doLegacyAccessCostCalc = true;
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!isFileKey(*attrs))
numDiskReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
}
}

if (isEmptyString(cluster))
{
StringArray clusterNames;
unsigned countClusters = getClusterNames(clusterNames);
for (unsigned i = 0; i < countClusters; i++)
atRestCost += calcFileAtRestCost(clusterNames[i], sizeGB, fileAgeDays);
if (countClusters && doLegacyAccessCostCalc)
{
// NB: numDiskReads/numDiskWrites are stored at the file level, not per cluster.
// So cannot calculate accessCost per cluster, assume cost is based on 1st.
accessCost = calcFileAccessCost(clusterNames[0], numDiskWrites, numDiskReads);
}
if (countClusters)
cluster = clusterNames[0];
}
else
{
atRestCost += calcFileAtRestCost(cluster, sizeGB, fileAgeDays);
if (doLegacyAccessCostCalc)
accessCost = calcFileAccessCost(cluster, numDiskWrites, numDiskReads);
atRestCost = calcFileAtRestCost(cluster, sizeGB, fileAgeDays);
}
if (attrs)
accessCost = getReadCost(*attrs, cluster) + getWriteCost(*attrs, cluster);
}
};

Expand Down Expand Up @@ -13498,25 +13499,10 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu
cost_type atRestCost = calcFileAtRestCost(nodeGroup, sizeGB, fileAgeDays);
file->setPropInt64(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost);

// Dyamically calc and set the access cost field and for legacy files set read/write cost fields
cost_type accessCost = 0;
if (hasReadWriteCostFields(*file))
{
accessCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + file->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
}
else // Calc access cost from numDiskRead & numDiskWrites for Legacy files
{
cost_type legacyReadCost = getLegacyReadCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost);

cost_type legacyWriteCost = getLegacyWriteCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), legacyWriteCost);

accessCost = legacyReadCost + legacyWriteCost;
}
cost_type readCost = getReadCost(*file, nodeGroup, true);
cost_type writeCost = getWriteCost(*file, nodeGroup, true);
cost_type accessCost = readCost + writeCost;
file->setPropInt64(getDFUQResultFieldName(DFUQRFaccessCost), accessCost);

// Dymically calc and set the total cost field
file->setPropInt64(getDFUQResultFieldName(DFUQRFcost), atRestCost + accessCost);
}

Expand Down
88 changes: 71 additions & 17 deletions dali/base/dadfs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -890,40 +890,94 @@ extern da_decl cost_type calcFileAtRestCost(const char * cluster, double sizeGB,
extern da_decl cost_type calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads);
extern da_decl cost_type calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads);
extern da_decl cost_type calcDiskWriteCost(const StringArray & clusters, stat_type numDiskWrites);
extern da_decl cost_type updateCostAndNumReads(IDistributedFile *file, stat_type numDiskReads, cost_type curReadCost=0); // Update readCost and numDiskReads - return calculated read cost
constexpr bool defaultPrivilegedUser = true;
constexpr bool defaultNonPrivilegedUser = false;

extern da_decl void configurePreferredPlanes();
inline bool hasReadWriteCostFields(const IPropertyTree & fileAttr)

// Get read cost from readCost field or calculate legacy read cost
// - migrateLegacyCost: if true, update readCost field with legacy read cost
template<typename Source>
inline cost_type getReadCost(IPropertyTree & fileAttr, Source source, bool migrateLegacyCost = false)
{
return fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)) || fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost));
if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)))
return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), 0);
else
{
// Calculate legacy read cost from numDiskReads
// (However, it is not possible to accurately calculate read cost for key
// files, as the reads may have been from page cache and not from disk.)
if (!isFileKey(fileAttr) && source)
{
stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
cost_type readCost = calcFileAccessCost(source, 0, numDiskReads);
if (migrateLegacyCost)
fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), readCost);
return readCost;
}
}
return 0;
}

// Get read cost from readCost field or calculate legacy read cost
template<typename Source>
inline cost_type getLegacyReadCost(const IPropertyTree & fileAttr, Source source)
inline cost_type getReadCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @readCost attribute, so calculate from numDiskRead
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))
&& !isFileKey(fileAttr))
if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)))
return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), 0);
else
{
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
return calcFileAccessCost(source, 0, prevDiskReads);
// Calculate legacy read cost from numDiskReads
// (However, it is not possible to accurately calculate read cost for key
// files, as the reads may have been from page cache and not from disk.)
if (!isFileKey(fileAttr) && source)
{
stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
return calcFileAccessCost(source, 0, numDiskReads);
}
}
else
return 0;
return 0;
}

// Get write cost from writeCost field or calculate legacy write cost
// - migrateLegacyCost: if true, update writeCost field with legacy write cost
template<typename Source>
inline cost_type getLegacyWriteCost(const IPropertyTree & fileAttr, Source source)
inline cost_type getWriteCost(IPropertyTree & fileAttr, Source source, bool migrateLegacyCost = false)
{
// Legacy files do not have @writeCost attribute, so calculate from numDiskWrites
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost)))
return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), 0);
else
{
stat_type prevDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
return calcFileAccessCost(source, prevDiskWrites, 0);
// Calculate legacy write cost from numDiskWrites
if (source)
{
stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
cost_type writeCost = calcFileAccessCost(source, numDiskWrites, 0);
if (migrateLegacyCost)
fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost);
return writeCost;
}
}
return 0;
}

// Get write cost from writeCost field or calculate legacy write cost
template<typename Source>
inline cost_type getWriteCost(const IPropertyTree & fileAttr, Source source)
{
if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost)))
return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), 0);
else
return 0;
{
// Calculate legacy write cost from numDiskWrites
if (source)
{
stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
return calcFileAccessCost(source, numDiskWrites, 0);
}
}
return 0;
}

extern da_decl bool doesPhysicalMatchMeta(IPartDescriptor &partDesc, IFile &iFile, offset_t &expectedSize, offset_t &actualSize);
Expand Down
15 changes: 4 additions & 11 deletions dali/ft/filecopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3851,10 +3851,10 @@ cost_type FileSprayer::updateSourceProperties()
// Update file readCost and numReads in file properties and do the same for subfiles
if (distributedSource)
{
cost_type totalReadCost = 0;
IDistributedSuperFile * superSrc = distributedSource->querySuperFile();
if (superSrc && superSrc->numSubFiles() > 0)
{
cost_type totalReadCost = 0;
Owned<IFileDescriptor> fDesc = superSrc->getFileDescriptor();
ISuperFileDescriptor *superFDesc = fDesc->querySuperFileDescriptor();
ForEachItemIn(idx, partition)
Expand All @@ -3876,27 +3876,20 @@ cost_type FileSprayer::updateSourceProperties()
// so query the first (and only) subfile
subfile = &superSrc->querySubFile(0);
}
cost_type curReadCost = calcFileAccessCost(subfile, 0, curProgress.numReads);
subfile->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curProgress.numReads);
cost_type legacyReadCost = getLegacyReadCost(subfile->queryAttributes(), subfile);
subfile->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
totalReadCost += curReadCost;
totalReadCost += updateCostAndNumReads(subfile, curProgress.numReads);
}
else
{
// not sure if src superfile can have whichInput==-1 (but if so, this is best effort to calc cost)
totalReadCost += calcFileAccessCost(distributedSource, 0, curProgress.numReads);
}
}
return updateCostAndNumReads(distributedSource, totalNumReads, totalReadCost);
}
else
{
totalReadCost = calcFileAccessCost(distributedSource, 0, totalNumReads);
return updateCostAndNumReads(distributedSource, totalNumReads);
}
distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), totalNumReads);
cost_type legacyReadCost = getLegacyReadCost(distributedSource->queryAttributes(), distributedSource);
distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + totalReadCost);
return totalReadCost; // return the total cost of this file operation (exclude previous and legacy read costs)
}
return 0;
}
Expand Down
7 changes: 1 addition & 6 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8547,12 +8547,7 @@ void CHThorDiskReadBaseActivity::closepart()
dFile = &(super->querySubFile(subfile, true));
}
}
IPropertyTree & fileAttr = dFile->queryAttributes();
cost_type legacyReadCost = getLegacyReadCost(fileAttr, dFile);
cost_type curReadCost = calcFileAccessCost(dFile, 0, curDiskReads);

dFile->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
dFile->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads);
updateCostAndNumReads(dFile, curDiskReads);
}
numDiskReads += curDiskReads;
}
Expand Down
14 changes: 1 addition & 13 deletions thorlcr/graph/thgraphmaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,6 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps)
{
StringBuffer clusterName;
file->getClusterName(0, clusterName);
IPropertyTree & fileAttr = file->queryAttributes();
cost_type curReadCost = 0;
stat_type curDiskReads = stats.getStatisticSum(StNumDiskReads);
if(useJhtreeCacheStats)
Expand All @@ -671,18 +670,7 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps)
curReadCost = calcFileAccessCost(clusterName, 0, curDiskReads);

if (updateFileProps)
{
cost_type legacyReadCost = 0;
// Legacy files will not have the readCost stored as an attribute
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads)))
{
// Legacy file: calculate readCost using prev disk reads and new disk reads
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
legacyReadCost = calcFileAccessCost(clusterName, 0, prevDiskReads);
}
file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads);
}
updateCostAndNumReads(file, curDiskReads);
return curReadCost;
};
cost_type readCost = 0;
Expand Down

0 comments on commit d8a4cb3

Please sign in to comment.