diff --git a/ImageD11/sinograms/dataset.py b/ImageD11/sinograms/dataset.py index 4814d87f..832020a3 100644 --- a/ImageD11/sinograms/dataset.py +++ b/ImageD11/sinograms/dataset.py @@ -1,10 +1,9 @@ - from __future__ import print_function, division import os, h5py, numpy as np import fast_histogram import logging - + """ TO DO: @@ -21,105 +20,121 @@ """ + # POSSIBLE_DETECTOR_NAMES = ("frelon3", "eiger") - + def guess_chunks(name, shape): if name == 'omega': - return ( shape[0], 1 ) + return (shape[0], 1) if name == 'dty': - return ( 1, shape[1] ) + return (1, shape[1]) return shape - - + + class DataSet: """One DataSet instance per detector!""" - + # simple strings or ints - ATTRNAMES = ( "dataroot", "analysisroot", "sample", "dset", "shape", "dsname", - "datapath", "analysispath", "masterfile", "limapath", - "detector", "omegamotor", "dtymotor", - "pksfile", "sparsefile", "parfile" - ) - STRINGLISTS = ( "scans", "imagefiles", "sparsefiles" ) + ATTRNAMES = ("dataroot", "analysisroot", "sample", "dset", "shape", "dsname", + "datapath", "analysispath", "masterfile", "limapath", + "detector", "omegamotor", "dtymotor", + "pksfile", "sparsefile", "parfile" + ) + STRINGLISTS = ("scans", "imagefiles", "sparsefiles") # sinograms - NDNAMES = ( "omega", "dty", "nnz", "frames_per_file", "nlm", "frames_per_scan" ) - + NDNAMES = ("omega", "dty", "nnz", "frames_per_file", "nlm", "frames_per_scan") - def __init__(self, - dataroot = ".", - analysisroot = ".", - sample = "sample", - dset = "dataset", - detector = "eiger", - omegamotor = "rot_center", - dtymotor = "dty"): + dataroot=".", + analysisroot=".", + sample="sample", + dset="dataset", + detector="eiger", + omegamotor="rot_center", + dtymotor="dty"): """ The things we need to know to process data """ - + # defaults to eiger and nanoscope station, can be overwritten with init parameters detector, omegamotor and dtymotor - self.detector = detector # frelon3 - self.limapath = None # where is the data in the Lima files - - self.omegamotor = omegamotor # diffrz - self.dtymotor = dtymotor # diffty - - self.dataroot = dataroot # folder to find {sample}/{sample}_{dset} - self.analysisroot = analysisroot # where to write or find sparse data - self.sample = sample # from bliss path - self.dset = dset # from bliss path - - self.dsname = "_".join( (self.sample, self.dset ) ) - - self.datapath = os.path.join( self.dataroot, self.sample, self.dsname ) - self.analysispath = os.path.join( self.analysisroot, self.sample, self.dsname ) - self.masterfile = os.path.join( self.datapath, self.dsname + '.h5' ) - + self.detector = detector # frelon3 + self.limapath = None # where is the data in the Lima files + + self.omegamotor = omegamotor # diffrz + self.dtymotor = dtymotor # diffty + + self.dataroot = dataroot # folder to find {sample}/{sample}_{dset} + self.analysisroot = analysisroot # where to write or find sparse data + self.sample = sample # from bliss path + self.dset = dset # from bliss path + + self.dsname = "_".join((self.sample, self.dset)) + + # paths for raw data + + self.datapath = os.path.join(self.dataroot, self.sample, self.dsname) + self.masterfile = os.path.join(self.datapath, self.dsname + '.h5') + + # paths for processed data + self.update_paths() + # These are in order ! The order of the lists is important - all things should match. - self.scans = None # read from master or load from analysis - self.frames_per_scan = None # how many frames (and motor positions) in each scan row. - self.imagefiles = None # List of strings. w.r.t self.datapath - self.frames_per_file = None # how many frames in this file (Lima files) - self.sparsefiles = None # maps sparse files to self.imagefiles - + self.scans = None # read from master or load from analysis + self.frames_per_scan = None # how many frames (and motor positions) in each scan row. + self.imagefiles = None # List of strings. w.r.t self.datapath + self.frames_per_file = None # how many frames in this file (Lima files) + self.sparsefiles = None # maps sparse files to self.imagefiles + self.shape = (0, 0) self.omega = None self.dty = None - - + + def update_paths(self): + # paths for processed data + # root of analysis for this dataset for this sample: + self.analysispath = os.path.join(self.analysisroot, self.sample, self.dsname) + + self.dsfile_default = os.path.join(self.analysispath, self.dsname + '_dataset.h5') + # at the moment, set self.dsfile to be the default + # if save or load is ever called, this will be replaced + self.dsfile = self.dsfile_default + self.pksfile = os.path.join(self.analysispath, self.dsname + '_peaks_table.h5') + self.col4dfile = os.path.join(self.analysispath, self.dsname + '_peaks_4d.h5') + self.col2dfile = os.path.join(self.analysispath, self.dsname + '_peaks_2d.h5') + self.grainsfile = os.path.join(self.analysispath, self.dsname + '_grains.h5') + self.sparsefile = os.path.join(self.analysispath, self.dsname + '_sparse.h5') + def __repr__(self): r = [] - for name in "dataroot analysisroot sample dset".split(): - r.append('%s = "%s"'%( name, getattr(self, name) ) ) - r.append( 'shape = ( %d, %d)'%tuple(self.shape) ) + for name in "dataroot analysisroot sample dset".split(): + r.append('%s = "%s"' % (name, getattr(self, name))) + r.append('shape = ( %d, %d)' % tuple(self.shape)) if self.scans is not None: - r.append( '# scans %d from %s to %s'%( - len(self.scans), self.scans[0], self.scans[-1] )) - return "\n".join(r) - - + r.append('# scans %d from %s to %s' % ( + len(self.scans), self.scans[0], self.scans[-1])) + return "\n".join(r) + def compare(self, other): '''Try to see if the load/save is working''' from types import FunctionType - sattrs = set( [ name for name in vars(self) if name[0] != '_' ] ) - oattrs = set( [ name for name in vars(self) if name[0] != '_' ] ) + sattrs = set([name for name in vars(self) if name[0] != '_']) + oattrs = set([name for name in vars(self) if name[0] != '_']) if sattrs != oattrs: - logging.info('Attribute mismatch '+str(sattrs)+' != '+str(oattrs)) + logging.info('Attribute mismatch ' + str(sattrs) + ' != ' + str(oattrs)) return False for a in sattrs: - s = getattr( self, a ) + s = getattr(self, a) if isinstance(s, FunctionType): continue - o = getattr( other, a ) + o = getattr(other, a) t = type(s) if type(o) != type(s): - logging.info('Type mismatch %s %s'%(str(t),str(a))) + logging.info('Type mismatch %s %s' % (str(t), str(a))) return False if t == np.ndarray: if s.shape != o.shape: - logging.info('Shape mismatch %s %s'%(str(s.shape), str(o.shape))) + logging.info('Shape mismatch %s %s' % (str(s.shape), str(o.shape))) return False - if ( s != o ).all(): - logging.info('Data mismatch '+str(a)) + if (s != o).all(): + logging.info('Data mismatch ' + str(a)) return False else: if s != o: @@ -127,18 +142,15 @@ def compare(self, other): return False logging.info('Dataset objects seem to match!') return True - - - + def report(self): print(self) - print("# Collected %d missing %d"%(self.check_images())) - print("# Segmented %d missing %d"%(self.check_sparse())) + print("# Collected %d missing %d" % (self.check_images())) + print("# Segmented %d missing %d" % (self.check_sparse())) - def import_all(self, scans=None, shape=None): # collect the data - self.import_scans( scans=scans ) + self.import_scans(scans=scans) # lima frames self.import_imagefiles() # motor positions @@ -150,26 +162,25 @@ def import_all(self, scans=None, shape=None): self.import_nnz() except: logging.info("nnz not available. Segmentation done?") - - - def import_scans(self, scans=None, hname = None): + + def import_scans(self, scans=None, hname=None): """ Reads in the scans from the bliss master file """ # we need to work out what detector we have at this point # self.guess_detector() if hname is None: hname = self.masterfile frames_per_scan = [] - with h5py.File( hname, 'r' ) as hin: + with h5py.File(hname, 'r') as hin: if scans is None: scans = [scan for scan in list(hin['/']) if - (scan.endswith('.1') and - ('measurement' in hin[scan]) and - (self.detector in hin[scan]['measurement'])) ] + (scan.endswith('.1') and + ('measurement' in hin[scan]) and + (self.detector in hin[scan]['measurement']))] goodscans = [] for scan in scans: # Make sure that this scan has a measurement from our detector if self.detector not in hin[scan]['measurement']: - print('Bad scan', scan) + print('Bad scan', scan) else: try: frames = hin[scan]['measurement'][self.detector] @@ -177,28 +188,27 @@ def import_scans(self, scans=None, hname = None): print('Bad scan', scan, ', h5py error follows:') print(e) continue - if len(frames.shape)==3: # need 1D series of frames + if len(frames.shape) == 3: # need 1D series of frames goodscans.append(scan) - frames_per_scan.append( frames.shape[0] ) + frames_per_scan.append(frames.shape[0]) else: print('Bad scan', scan) self.scans = goodscans self.frames_per_scan = frames_per_scan - logging.info( 'imported %d scans from %s'%(len(self.scans),hname)) + logging.info('imported %d scans from %s' % (len(self.scans), hname)) return self.scans - - + def import_imagefiles(self): """ Get the Lima file names from the bliss master file, also scan_npoints """ # self.import_scans() should always be called before this function, so we know the detector npts = None self.imagefiles = [] self.frames_per_file = [] - with h5py.File( self.masterfile, 'r' ) as hin: - bad = [ ] - for i, scan in enumerate( self.scans ): + with h5py.File(self.masterfile, 'r') as hin: + bad = [] + for i, scan in enumerate(self.scans): if ('measurement' not in hin[scan]) or (self.detector not in hin[scan]['measurement']): print('Bad scan', scan) bad.append(scan) @@ -206,74 +216,72 @@ def import_imagefiles(self): frames = hin[scan]['measurement'][self.detector] self.imageshape = frames.shape[1:] for vsrc in frames.virtual_sources(): - self.imagefiles.append( vsrc.file_name ) - self.frames_per_file.append( vsrc.src_space.shape[0] ) # not sure about this + self.imagefiles.append(vsrc.file_name) + self.frames_per_file.append(vsrc.src_space.shape[0]) # not sure about this # check limapath if self.limapath is None: self.limapath = vsrc.dset_name assert self.limapath == vsrc.dset_name - self.frames_per_file = np.array( self.frames_per_file, int ) - self.sparsefiles = [ name.replace( '/', '_' ).replace( '.h5', '_sparse.h5' ) for name in - self.imagefiles ] - logging.info( 'imported %d lima filenames'%( np.sum(self.frames_per_file) ) ) - - - def import_motors_from_master(self): # could also get these from sparse files if saved + self.frames_per_file = np.array(self.frames_per_file, int) + self.sparsefiles = [name.replace('/', '_').replace('.h5', '_sparse.h5') for name in + self.imagefiles] + logging.info('imported %d lima filenames' % (np.sum(self.frames_per_file))) + + def import_motors_from_master(self): # could also get these from sparse files if saved """ read the motors from the lima file you need to import the imagefiles first these will be the motor positions to accompany the images """ # self.guess_motornames() - self.omega = [None,] * len(self.scans) - self.dty = [None,] * len(self.scans) - with h5py.File( self.masterfile, 'r' ) as hin: + self.omega = [None, ] * len(self.scans) + self.dty = [None, ] * len(self.scans) + with h5py.File(self.masterfile, 'r') as hin: bad = [] - for i, scan in enumerate( self.scans ): + for i, scan in enumerate(self.scans): # Should always be there, if not, filter scans before you get to here - om = hin[scan][ 'measurement' ][ self.omegamotor ][()] + om = hin[scan]['measurement'][self.omegamotor][()] if len(om) == self.frames_per_scan[i]: - self.omega[i] = om - else: # hope the first point was good ? Probably corrupted MUSST data. - self.omega[i] = [om[0],] + self.omega[i] = om + else: # hope the first point was good ? Probably corrupted MUSST data. + self.omega[i] = [om[0], ] bad.append(i) # this can be an array or a scalar - dty = hin[scan][ 'instrument/positioners' ][ self.dtymotor ] + dty = hin[scan]['instrument/positioners'][self.dtymotor] if len(dty.shape) == 0: - self.dty[i] = np.full( self.frames_per_scan[i], dty[()] ) + self.dty[i] = np.full(self.frames_per_scan[i], dty[()]) elif dty.shape[0] == self.frames_per_scan[i]: self.dty[i] = dty[:] else: # corrupted MUSST? - self.dty[i] = np.full( self.frames_per_scan[i], dty[0] ) + self.dty[i] = np.full(self.frames_per_scan[i], dty[0]) for b in bad: - dom = [ (abs( self.omega[i][0] - self.omega[b] ), i) for i in range(len(self.scans)) - if i not in bad ] - if len(dom)>0: - j = np.argmin( dom[0][1] ) - self.omega[b] = self.omega[j] # best match - print("replace bad scan omega",b, self.scans[b],"with",j, self.scans[j]) - logging.info( 'imported omega/dty' ) - - + dom = [(abs(self.omega[i][0] - self.omega[b]), i) for i in range(len(self.scans)) + if i not in bad] + if len(dom) > 0: + j = np.argmin(dom[0][1]) + self.omega[b] = self.omega[j] # best match + print("replace bad scan omega", b, self.scans[b], "with", j, self.scans[j]) + logging.info('imported omega/dty') + def guess_shape(self): """Guess the shape if it was not given """ - npts = np.sum( self.frames_per_scan ) - if len(self.scans) == 1: # probably fscan2d or f2scan - with h5py.File(self.masterfile,'r') as hin: + npts = np.sum(self.frames_per_scan) + if len(self.scans) == 1: # probably fscan2d or f2scan + with h5py.File(self.masterfile, 'r') as hin: s = hin[self.scans[0]] title = s['title'].asstr()[()] - print('Scan title',title) + print('Scan title', title) if title.split()[0] == 'fscan2d': s0 = s['instrument/fscan_parameters/slow_npoints'][()] s1 = s['instrument/fscan_parameters/fast_npoints'][()] - file_nums = np.arange( s0 * s1 ).reshape( (s0,s1) ) + file_nums = np.arange(s0 * s1).reshape((s0, s1)) # slice notation means last frame+1 to be inclusive - self.scans = [ "%s::[%d:%d]"%( self.scans[0], row[0], row[-1]+1 ) - for row in file_nums ] + self.scans = ["%s::[%d:%d]" % (self.scans[0], row[0], row[-1] + 1) + for row in file_nums] elif title.split()[0] == 'f2scan': # good luck ? Assuming rotation was the inner loop here: step = s['instrument/fscan_parameters/step_size'][()] - s1 = int( np.round( 360 / step ) ) + s1 = int(np.round(360 / step)) s0 = npts // s1 logging.warning("Dataset might need to be reshaped") else: @@ -283,20 +291,19 @@ def guess_shape(self): s0 = len(self.scans) s1 = npts // s0 self.shape = s0, s1 - if np.prod( self.shape ) != npts: + if np.prod(self.shape) != npts: print("Warning: irregular scan - might be bugs in here") print(npts, len(self.scans)) - self.omega = np.array( self.omega ).reshape( self.shape ) - self.dty = np.array( self.dty ).reshape( self.shape ) - logging.info( 'sinogram shape = ( %d , %d ) imageshape = ( %d , %d)'%( - self.shape[0], self.shape[1], self.imageshape[0], self.imageshape[1] ) ) - + self.omega = np.array(self.omega).reshape(self.shape) + self.dty = np.array(self.dty).reshape(self.shape) + logging.info('sinogram shape = ( %d , %d ) imageshape = ( %d , %d)' % ( + self.shape[0], self.shape[1], self.imageshape[0], self.imageshape[1])) def guessbins(self): ny, nomega = self.shape self.omin = self.omega.min() self.omax = self.omega.max() - if (self.omax - self.omin)>360: + if (self.omax - self.omin) > 360: # multi-turn scan... self.omin = 0. self.omax = 360. @@ -313,69 +320,68 @@ def guessbins(self): self.ystep = (self.ymax - self.ymin) / (ny - 1) else: self.ystep = 1 - self.obincens = np.linspace( self.omin, self.omax, nomega ) - self.ybincens = np.linspace( self.ymin, self.ymax, ny ) - self.obinedges = np.linspace( self.omin-self.ostep/2, self.omax + self.ostep/2, nomega + 1 ) - self.ybinedges = np.linspace( self.ymin-self.ystep/2, self.ymax + self.ystep/2, ny + 1 ) - + self.obincens = np.linspace(self.omin, self.omax, nomega) + self.ybincens = np.linspace(self.ymin, self.ymax, ny) + self.obinedges = np.linspace(self.omin - self.ostep / 2, self.omax + self.ostep / 2, nomega + 1) + self.ybinedges = np.linspace(self.ymin - self.ystep / 2, self.ymax + self.ystep / 2, ny + 1) + def guess_detector(self): '''Guess which detector we are using from the masterfile''' - + # open the masterfile hname = self.masterfile detectors_seen = [] scan = "1.1" - with h5py.File( hname, 'r' ) as hin: + with h5py.File(hname, 'r') as hin: # go through the first scan, and see what detectors we can see for measurement in list(hin[scan]['measurement']): if measurement.attrs.get("interpretation") == "image": detectors_seen.append(measurement) - + if len(detectors_seen) != 1: raise ValueError("More than one detector seen! Can't work out which one to process.") else: self.detector = detectors_seen[0] - -# def guess_motornames(self): -# '''Guess which station we were using (Nanoscope or 3DXRD) from which motors are in instrument/positioners''' -# from ImageD11.sinograms.assemble_label import HEADERMOTORS_NSCOPE, HEADERMOTORS_TDXRD, HEADERMOTORS -# # open the masterfile -# hname = self.masterfile -# motors_seen = [] -# scan = "1.1" - -# with h5py.File( hname, 'r' ) as hin: -# # go through the first scan, and see what motors we can see -# for positioner in list(hin[scan]['instrument/positioners']): -# if positioner in HEADERMOTORS: -# motors_seen.append(positioner) - -# using_nscope = False -# using_tdxrd = False -# for motor in motors_seen: -# if motor in HEADERMOTORS_NSCOPE: -# using_nscope = True -# elif motor in HEADERMOTORS_TDXRD: -# using_tdxrd = True - -# if using_nscope and using_tdxrd: -# raise ValueError("Found both nscope and tdxrd motors in positioners, not sure which one we were using!") - -# if using_nscope: -# self.omegamotor = 'rot_center' -# self.dtymotor = 'dty' -# elif using_tdxrd: -# self.omegamotor = 'diffrz' -# self.dtymotor = 'diffty' - - + + # def guess_motornames(self): + # '''Guess which station we were using (Nanoscope or 3DXRD) from which motors are in instrument/positioners''' + # from ImageD11.sinograms.assemble_label import HEADERMOTORS_NSCOPE, HEADERMOTORS_TDXRD, HEADERMOTORS + # # open the masterfile + # hname = self.masterfile + # motors_seen = [] + # scan = "1.1" + + # with h5py.File( hname, 'r' ) as hin: + # # go through the first scan, and see what motors we can see + # for positioner in list(hin[scan]['instrument/positioners']): + # if positioner in HEADERMOTORS: + # motors_seen.append(positioner) + + # using_nscope = False + # using_tdxrd = False + # for motor in motors_seen: + # if motor in HEADERMOTORS_NSCOPE: + # using_nscope = True + # elif motor in HEADERMOTORS_TDXRD: + # using_tdxrd = True + + # if using_nscope and using_tdxrd: + # raise ValueError("Found both nscope and tdxrd motors in positioners, not sure which one we were using!") + + # if using_nscope: + # self.omegamotor = 'rot_center' + # self.dtymotor = 'dty' + # elif using_tdxrd: + # self.omegamotor = 'diffrz' + # self.dtymotor = 'diffty' + def sinohist(self, weights=None, omega=None, dty=None, method='fast'): """ Bin some data onto the sinogram histogram """ bins = len(self.obincens), len(self.ybincens) - rng = ( (self.obinedges[0], self.obinedges[-1]), - (self.ybinedges[0], self.ybinedges[-1]) ) - if isinstance( weights, np.ndarray): + rng = ((self.obinedges[0], self.obinedges[-1]), + (self.ybinedges[0], self.ybinedges[-1])) + if isinstance(weights, np.ndarray): wt = weights.ravel() else: wt = weights @@ -384,109 +390,110 @@ def sinohist(self, weights=None, omega=None, dty=None, method='fast'): if dty is None: dty = self.dty if method == 'numpy': - ret = np.histogram2d( omega.ravel(), dty.ravel(), - weights = wt, bins = bins, range=rng ) + ret = np.histogram2d(omega.ravel(), dty.ravel(), + weights=wt, bins=bins, range=rng) histo = ret[0] - elif method == 'fast': - histo = fast_histogram.histogram2d( omega.ravel(), dty.ravel(), - weights = wt, bins = bins, range=rng ) + elif method == 'fast': + histo = fast_histogram.histogram2d(omega.ravel(), dty.ravel(), + weights=wt, bins=bins, range=rng) return histo - def import_nnz(self): """ Read the nnz arrays from the scans """ nnz = [] for spname in self.sparsefiles: - with h5py.File( os.path.join( self.analysispath, spname ), "r" ) as hin: - nnz.append( hin[self.limapath]['nnz'][:] ) - self.nnz = np.concatenate( nnz ).reshape( self.shape ).astype( np.int32 ) - logging.info('imported nnz, average %f'%(self.nnz.mean())) # expensive if you are not logging it. - - -# def compute_pixel_labels(self): -# this should instead from from the pk2d file generated by sinograms/properties.py -# nlm = [] -# for spname in self.sparsefiles: -# n, l = peaklabel.add_localmax_peaklabel( os.path.join( self.analysispath, spname ), -# self.limapath ) -# nlm.append(n) -# self.nlm = np.concatenate( nlm ).reshape( self.shape ) - - -# def import_nlm(self): -# this should instead from from the pk2d file generated by sinograms/properties.py -# """ Read the Nlmlabels -# These are the number of localmax peaks per frame -# """ -# nlm = [] -# for spname in self.sparsefiles: -# with h5py.File( os.path.join( self.analysispath, spname ), "r" ) as hin: -# nlm.append( hin[self.limapath]['Nlmlabel'][:] ) -# self.nlm = np.concatenate( nlm ).reshape( self.shape ) -# logging.info('imported nlm, max %d'%(self.nlm.max())) - - - def check_files(self, path, filenames, verbose = 0): + with h5py.File(os.path.join(self.analysispath, spname), "r") as hin: + nnz.append(hin[self.limapath]['nnz'][:]) + self.nnz = np.concatenate(nnz).reshape(self.shape).astype(np.int32) + logging.info('imported nnz, average %f' % (self.nnz.mean())) # expensive if you are not logging it. + + # def compute_pixel_labels(self): + # this should instead from from the pk2d file generated by sinograms/properties.py + # nlm = [] + # for spname in self.sparsefiles: + # n, l = peaklabel.add_localmax_peaklabel( os.path.join( self.analysispath, spname ), + # self.limapath ) + # nlm.append(n) + # self.nlm = np.concatenate( nlm ).reshape( self.shape ) + + # def import_nlm(self): + # this should instead from from the pk2d file generated by sinograms/properties.py + # """ Read the Nlmlabels + # These are the number of localmax peaks per frame + # """ + # nlm = [] + # for spname in self.sparsefiles: + # with h5py.File( os.path.join( self.analysispath, spname ), "r" ) as hin: + # nlm.append( hin[self.limapath]['Nlmlabel'][:] ) + # self.nlm = np.concatenate( nlm ).reshape( self.shape ) + # logging.info('imported nlm, max %d'%(self.nlm.max())) + + def check_files(self, path, filenames, verbose=0): """ See whether files are created or not """ # images collected done = 0 missing = 0 for fname in filenames: - fullname = os.path.join( path, fname ) - if os.path.exists( fullname ): + fullname = os.path.join(path, fname) + if os.path.exists(fullname): done += 1 else: missing += 1 - if verbose>0: - print("missing", fullname ) + if verbose > 0: + print("missing", fullname) verbose -= 1 return done, missing - - + def check_images(self): """ Is the experiment finished ? """ - return self.check_files( self.datapath, self.imagefiles ) - - + return self.check_files(self.datapath, self.imagefiles) + def check_sparse(self): """ Has the segmentation been done ? """ - return self.check_files( self.analysispath, self.sparsefiles, verbose=2 ) - - - def save( self, h5name, h5group = '/' ): - - ZIP = { 'compression': 'gzip' } - - with h5py.File( h5name, "a") as hout: - grp = hout[ h5group ] + return self.check_files(self.analysispath, self.sparsefiles, verbose=2) + + def save(self, h5name=None, h5group='/'): + if h5name is None: + # none supplied, so use default path + h5name = self.dsfile_default + # make sure parent directories exist + # ensure that the analysis path exists + dsfile_folder = os.path.dirname(self.dsfile_default) + if not os.path.exists(dsfile_folder): + os.makedirs(dsfile_folder) + + ZIP = {'compression': 'gzip'} + + with h5py.File(h5name, "a") as hout: + grp = hout[h5group] # Simple small objects for name in self.ATTRNAMES: - data = getattr( self, name, None ) + data = getattr(self, name, None) if data is not None: - grp.attrs[ name ] = data - # The string lists + grp.attrs[name] = data + # The string lists for name in self.STRINGLISTS: - data = getattr( self, name, None ) + data = getattr(self, name, None) if data is not None and len(data): - sdata = np.array( data, "S" ) - ds = grp.require_dataset( name, - shape = sdata.shape, - chunks = sdata.shape, - dtype = h5py.string_dtype(), - **ZIP ) + sdata = np.array(data, "S") + ds = grp.require_dataset(name, + shape=sdata.shape, + chunks=sdata.shape, + dtype=h5py.string_dtype(), + **ZIP) ds[:] = sdata # for name in self.NDNAMES: data = getattr(self, name, None) if data is not None: - data = np.asarray( data ) + data = np.asarray(data) try: chunks = guess_chunks(name, data.shape) - ds = grp.require_dataset( name, - shape = data.shape, - chunks = chunks, - dtype = data.dtype, - **ZIP ) + ds = grp.require_dataset(name, + shape=data.shape, + chunks=chunks, + dtype=data.dtype, + **ZIP) ds[:] = data except: print(name) @@ -494,20 +501,26 @@ def save( self, h5name, h5group = '/' ): print(data.shape) print(chunks) raise - - def load( self, h5name, h5group = '/' ): + # if we got here, we saved the file successfully + self.dsfile = h5name + + def load(self, h5name=None, h5group='/'): + if h5name is None: + # none supplied, so use default path + h5name = self.dsfile_default + """ Recover this from a hdf5 file """ - with h5py.File( h5name, "r") as hin: - grp = hin[ h5group ] + with h5py.File(h5name, "r") as hin: + grp = hin[h5group] for name in self.ATTRNAMES: if name in grp.attrs: - setattr( self, name, grp.attrs.get(name) ) - self.shape = tuple( self.shape ) # hum + setattr(self, name, grp.attrs.get(name)) + self.shape = tuple(self.shape) # hum for name in self.NDNAMES: if name in grp: data = grp[name][()] - setattr( self, name, data ) + setattr(self, name, data) for name in self.STRINGLISTS: if name in grp: stringlist = list(grp[name][()]) @@ -515,48 +528,55 @@ def load( self, h5name, h5group = '/' ): data = [s.decode() for s in stringlist] else: data = stringlist - setattr( self, name, data ) + setattr(self, name, data) self.guessbins() + + # analysis paths can only be calculated once + self.update_paths() + # if we got here, we loaded the file successfully + self.dsfile = h5name + return self -def load( h5name, h5group = '/' ): - ds_obj = DataSet().load( h5name, h5group ) - +def load(h5name, h5group='/'): + ds_obj = DataSet().load(h5name, h5group) + return ds_obj -def import_from_sparse( hname, + +def import_from_sparse(hname, omegamotor='instrument/positioners/rot', - dtymotor= 'instrument/positioners/dty', - ): + dtymotor='instrument/positioners/dty', + ): ds = DataSet() - with h5py.File(hname,'r') as hin: + with h5py.File(hname, 'r') as hin: scans = list(hin['/']) - order = np.argsort( [ float(v) for v in scans if v.endswith('.1')] ) - scans = [ scans[i] for i in order ] - dty = [ hin[scan][dtymotor][()] for scan in scans ] - omega = [ hin[scan][omegamotor][()] for scan in scans ] + order = np.argsort([float(v) for v in scans if v.endswith('.1')]) + scans = [scans[i] for i in order] + dty = [hin[scan][dtymotor][()] for scan in scans] + omega = [hin[scan][omegamotor][()] for scan in scans] nnz = [hin[scan]['nnz'][()] for scan in scans] -# nlm = [hin[scan]['Nlmlabel'][()] for scan in scans] + # nlm = [hin[scan]['Nlmlabel'][()] for scan in scans] ds.scans = scans ds.nnz = nnz ds.nnz = np.array(nnz) ds.shape = ds.nnz.shape ds.omega = np.zeros(ds.nnz.shape, float) - for i,o in enumerate( omega ): - if isinstance( o, float ) or (len(o) == len(ds.nnz[i])): + for i, o in enumerate(omega): + if isinstance(o, float) or (len(o) == len(ds.nnz[i])): ds.omega[i] = o if len(o) > len(ds.nnz[i]): - ds.omega[i] = ds.omega[i-2] # guess zig zag + ds.omega[i] = ds.omega[i - 2] # guess zig zag # warning here - - ds.dty = np.zeros(ds.nnz.shape, float) - for i,o in enumerate( dty ): - if isinstance( o, float ) or (len(o) == len(ds.nnz[i])): + + ds.dty = np.zeros(ds.nnz.shape, float) + for i, o in enumerate(dty): + if isinstance(o, float) or (len(o) == len(ds.nnz[i])): ds.dty[i] = o else: - raise Exception('Cannot read %d dty %s %s'%(i, str(o), str(o.shape) )) -# assert ds.nlm.shape == ds.shape + raise Exception('Cannot read %d dty %s %s' % (i, str(o), str(o.shape))) + # assert ds.nlm.shape == ds.shape try: ds.guess_scans() except: @@ -567,6 +587,7 @@ def import_from_sparse( hname, print("warning, guessbins failed") return ds + # Example # s = dataset( # dataroot = "/data/visitor/ma5415/id11/20221027", @@ -576,25 +597,26 @@ def import_from_sparse( hname, # s.import_all() -def check( dataroot, analysisroot, sample, dset, destination, scans=None ): - - h5o = DataSet( dataroot = dataroot, analysisroot = analysisroot, sample = sample, dset = dset ) +def check(dataroot, analysisroot, sample, dset, destination, scans=None): + h5o = DataSet(dataroot=dataroot, analysisroot=analysisroot, sample=sample, dset=dset) h5o.import_all(scans=scans) - h5o.save( destination ) - + h5o.save(destination) + print("Checking: Read back from hdf5") - t = load( destination ) + t = load(destination) t.report() return t.compare(h5o) - -if __name__=="__main__": + + +if __name__ == "__main__": import sys + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) - + dataroot = sys.argv[1] analysisroot = sys.argv[2] sample = sys.argv[3] dset = sys.argv[4] destination = sys.argv[5] - - check( dataroot, analysisroot, sample, dset, destination ) + + check(dataroot, analysisroot, sample, dset, destination) diff --git a/ImageD11/sinograms/roi_iradon.py b/ImageD11/sinograms/roi_iradon.py index 32d3c7f6..cda88dd3 100644 --- a/ImageD11/sinograms/roi_iradon.py +++ b/ImageD11/sinograms/roi_iradon.py @@ -1,5 +1,4 @@ - -"""This code came from skimage.transform and then was modified +"""This code came from skimage.transform and then was modified - Added mask ROI for back projection. Optimisation for small grains in big maps. Should allow threading (one thread per tile) @@ -39,7 +38,6 @@ """ - from scipy.fft import fft, ifft, fftfreq, fftshift from scipy.interpolate import interp1d import numpy as np @@ -48,6 +46,7 @@ import numba from ImageD11 import cImageD11 + def _sinogram_pad(n, o=None): if o is None: diagonal = int(np.ceil(np.sqrt(2) * n)) @@ -60,6 +59,7 @@ def _sinogram_pad(n, o=None): pad_width = ((pad_before, pad - pad_before), (0, 0)) return pad_width + def _get_fourier_filter(size, filter_name): """Construct the Fourier filter. """ @@ -72,7 +72,7 @@ def _get_fourier_filter(size, filter_name): # Computing the ramp filter from the fourier transform of its # frequency domain representation lessens artifacts and removes a # small bias as explained in [1], Chap 3. Equation 61 - fourier_filter = 2 * np.real(fft(f)) # ramp filter + fourier_filter = 2 * np.real(fft(f)) # ramp filter if filter_name == "ramp": pass elif filter_name == "shepp-logan": @@ -203,7 +203,7 @@ def run_interp(i): return reconstructed -#TODO : fixme +# TODO : fixme # It would be 'nice' for the radon transform to be closer to iradon # in using the same xpr/ypr/shifts formula. That will give pixel co-ords # for the 1D projections. The problem is to then 'cut up' the pixels on @@ -215,11 +215,11 @@ def run_interp(i): # First triangle. Middle Part. Last triangle. # -def radon( image, theta, - output_size=None, # sinogram width - projection_shifts=None, - mask = None, - workers = 1, +def radon(image, theta, + output_size=None, # sinogram width + projection_shifts=None, + mask=None, + workers=1, ): """ From skimage.transform. Modified to have projection shifts and roi @@ -264,15 +264,15 @@ def radon( image, theta, assert image.dtype == np.float32 assert len(image.shape) == 2 assert image.shape[0] == image.shape[1] - + if output_size is None: output_size = image.shape[0] - + if projection_shifts is not None: - assert projection_shifts.shape[1] == len( theta ) + assert projection_shifts.shape[1] == len(theta) assert projection_shifts.shape[0] == output_size - - radius = output_size // 2 + + radius = output_size // 2 # padding the image. Shall we bother? Apparently yes. pad = [int(np.ceil(output_size - s)) for s in image.shape] new_center = [(s + p) // 2 for s, p in zip(image.shape, pad)] @@ -280,7 +280,7 @@ def radon( image, theta, pad_before = [nc - oc for oc, nc in zip(old_center, new_center)] pad_width = [(pb, p - pb) for pb, p in zip(pad_before, pad)] padded_image = np.pad(image, pad_width, mode='constant', - constant_values=0) + constant_values=0) # padded_image is always square if padded_image.shape[0] != padded_image.shape[1]: raise ValueError('padded_image must be a square') @@ -289,20 +289,20 @@ def radon( image, theta, dtype=image.dtype).T angles_count = len(theta) - rtheta = np.deg2rad( theta ) - + rtheta = np.deg2rad(theta) + def roti(i): if projection_shifts is not None: - dx = projection_shifts.T[i] # measured positions are shifted + dx = projection_shifts.T[i] # measured positions are shifted else: dx = None - rotated = skimage.transform.radon_transform.warp( padded_image, fxyrot, - map_args={'angle': rtheta[i], - 'center': center, - 'projection_shifts': dx }, + rotated = skimage.transform.radon_transform.warp(padded_image, fxyrot, + map_args={'angle': rtheta[i], + 'center': center, + 'projection_shifts': dx}, clip=False) radon_image[:, i] = rotated.sum(0) - + slices = list(range(angles_count)) if workers == 1: for i in slices: @@ -310,13 +310,13 @@ def roti(i): return radon_image if workers is None or workers < 1: workers = cImageD11.cores_available() - with concurrent.futures.ThreadPoolExecutor(max_workers = workers) as pool: + with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as pool: for _ in pool.map(roti, slices): - pass + pass return radon_image -def fxyrot( colrow, angle=0, center=0, projection_shifts = None ): +def fxyrot(colrow, angle=0, center=0, projection_shifts=None): # used in radon above # apply the projection shifts in reverse # t = ypr * np.cos(rtheta[i]) - xpr * np.sin(rtheta[i]) @@ -326,23 +326,22 @@ def fxyrot( colrow, angle=0, center=0, projection_shifts = None ): # xi = x col, row = colrow.T - center n = int(np.sqrt(col.shape[0])) - assert n*n == col.shape[0] - col.shape = n,n - row.shape = n,n + assert n * n == col.shape[0] + col.shape = n, n + row.shape = n, n cos_a, sin_a = np.cos(angle), np.sin(angle) if projection_shifts is not None: ct = col.T ct += projection_shifts - x = cos_a*col + sin_a*row - y = -sin_a*col + cos_a*row - colrow[:,0] = x.ravel() - colrow[:,1] = y.ravel() + x = cos_a * col + sin_a * row + y = -sin_a * col + cos_a * row + colrow[:, 0] = x.ravel() + colrow[:, 1] = y.ravel() return colrow + center - @numba.njit(boundscheck=False) -def recon_cens( omega, dty, ybins, imsize, wt, y0 = 0.0 ): +def recon_cens(omega, dty, ybins, imsize, wt, y0=0.0): """ Back project the peak centers into a map omega, dty = peak co-ordinates in the sinogram @@ -350,49 +349,46 @@ def recon_cens( omega, dty, ybins, imsize, wt, y0 = 0.0 ): imsize = probably len(ybins)+1 wt = intensity, probably ones() """ - r = np.zeros( (imsize, imsize), dtype=np.float32 ) + r = np.zeros((imsize, imsize), dtype=np.float32) rc = imsize // 2 for i in range(len(omega)): s, c = np.sin(omega[i]), np.cos(omega[i]) - yv = (dty[i] - y0) / (ybins[1]-ybins[0]) + yv = (dty[i] - y0) / (ybins[1] - ybins[0]) if abs(c) > abs(s): # going across image, beam is along x for p in range(imsize): k = p - rc # -rc -> rc - v = ((-yv - k * s)/ c ) + rc - j = int(np.floor( v )) - f = (j+1)-v - if ( j >= 0 ) & ( j < imsize ): - r[ j, p ] += wt[i]*f + v = ((-yv - k * s) / c) + rc + j = int(np.floor(v)) + f = (j + 1) - v + if (j >= 0) & (j < imsize): + r[j, p] += wt[i] * f f = v - j - if ( (j+1) >= 0 ) & ( (j+1) < imsize ): - r[ j+1, p ] += wt[i]*f + if ((j + 1) >= 0) & ((j + 1) < imsize): + r[j + 1, p] += wt[i] * f else: # going along image for p in range(imsize): j = p - rc # -rc -> rc - v = ((-yv - j * c) / s ) + rc - k = int(np.floor( v )) - f = (k+1)-v - if ( k>=0 ) & ( k= 0) & (k < imsize): + r[p, k] += wt[i] * f f = v - k - if ( (k+1)>=0 ) & ( (k+1)= 0) & ((k + 1) < imsize): + r[p, k + 1] += wt[i] * f return r - - - - -def mlem( sino, theta, - startvalue = 1, - projection_shifts = None, - mask = None, - workers = 1, - pad=0, niter=50, - divtol=1e-5, ): +def mlem(sino, + theta, + startvalue=1, + projection_shifts=None, + mask=None, + workers=1, + niter=50, + divtol=1e-5): """ # Also called "MART" for Multiplicative ART # This keeps a positivity constraint for both the data and reconstruction @@ -404,35 +400,37 @@ def mlem( sino, theta, # ToDo : implement a mask # ToDo : check about the corners / circle=False aspects # - + An "MLEM" algorithm from XRDUA was used in this paper: - "Impurity precipitation in atomized particles evidenced by nano x-ray diffraction computed tomography" - Anne Bonnin; Jonathan P. Wright; Rémi Tucoulou; Hervé Palancher + "Impurity precipitation in atomized particles evidenced by nano x-ray diffraction computed tomography" + Anne Bonnin; Jonathan P. Wright; Rémi Tucoulou; Hervé Palancher Appl. Phys. Lett. 105, 084103 (2014) https://doi.org/10.1063/1.4894009 This python code implements something similar based on a youtube video (https://www.youtube.com/watch?v=IhETD4nSJec) - There are lots of papers from mathematicians in the literature about MART (multiplicative ART). - The conversion of latex algebra back and forth into computer code seems to be a bit of a + There are lots of papers from mathematicians in the literature about MART (multiplicative ART). + The conversion of latex algebra back and forth into computer code seems to be a bit of a problem for me (Jon Wright - Nov 2023). """ - def backproject( sino, theta, projection_shifts = None ): + def backproject(sino, theta, mask=mask, projection_shifts=None): """ project the sinogram into the sample """ - return iradon( sino, theta, - filter_name=None, - mask = mask, - workers = workers, - projection_shifts = projection_shifts ) - - def forwardproject( sample, theta, projection_shifts = None ): + return iradon(sino, + theta, + filter_name=None, + mask=mask, + workers=workers, + projection_shifts=projection_shifts) + + def forwardproject(sample, theta, projection_shifts=None): """ project the sample into the experiment (sinogram) """ - return radon( sample, theta, - mask = mask, - workers = workers, - projection_shifts = projection_shifts ) - # + return radon(sample, + theta, + workers=workers, + projection_shifts=projection_shifts) + + # # Also called "MART" for Multiplicative ART # This keeps a positivity constraint for both the data and reconstruction # @@ -440,20 +438,29 @@ def forwardproject( sample, theta, projection_shifts = None ): # https://www.youtube.com/watch?v=IhETD4nSJec # by Andrew Reader # - # ToDo : implement a mask + # ToDo : check about the corners / circle=False aspects # # Number of pixels hitting each output in the sample: - sensitivity_image = backproject( np.ones_like(sino), theta, - projection_shifts = projection_shifts ) - recip_sensitivity_image = 1./sensitivity_image + sensitivity_image = backproject(np.ones_like(sino), + theta, + mask=None, + projection_shifts=projection_shifts) + recip_sensitivity_image = 1. / sensitivity_image # The image reconstruction: - mlem_rec = np.empty( sensitivity_image.shape, np.float32) + mlem_rec = np.empty(sensitivity_image.shape, np.float32) mlem_rec[:] = startvalue for i in range(niter): - calc_sino = forwardproject( mlem_rec, theta, projection_shifts = projection_shifts ) - ratio = sino / (calc_sino + divtol ) - correction = recip_sensitivity_image * backproject( ratio, theta, - projection_shifts = projection_shifts ) + calc_sino = forwardproject(mlem_rec, + theta, + projection_shifts=projection_shifts) + ratio = sino / (calc_sino + divtol) + correction = recip_sensitivity_image * backproject(ratio, + theta, + projection_shifts=projection_shifts) mlem_rec *= correction - return mlem_rec \ No newline at end of file + + if mask is not None: + mlem_rec[~mask] = 0. + + return mlem_rec