-
Notifications
You must be signed in to change notification settings - Fork 0
/
simple_access.py
133 lines (120 loc) · 3.64 KB
/
simple_access.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from glob import iglob
import ROOT
import rootpy
from rootpy.tree import Cut
from root_numpy import tree2rec
import pandas as pd
from time import time
def expander(
dtype='cf',
tree='rrqDir/calibzip1',
base='/tera2/data3/cdmsbatsProd/R133/dataReleases/Prodv5-3_June2013/merged/',
data='all',
productions=['all'],
cut='',
cutrev=''):
pbase = base + data + '/' + cutrev + dtype + '/' + cut
if 'bg_permitted' in dtype:
prefix = 'blinded_'
else:
prefix = ''
if 'rrqDir/calib' in tree:
fname = prefix + 'calib_Prodv5-3_{}_??.root'
elif 'rqDir' in tree:
fname = prefix + 'merge_Prodv5-3_{}_??.root'
elif 'cutDir' in tree:
fname = cut.rstrip('/') + '_{}_??.root'
if productions == ['all']:
prod = '0[0-9][0-9][0-9][0-9][0-9]?'
ppath = iglob(pbase + fname.format(prod))
else:
ppath = (pbase + fname.format(i) for i in productions)
result = [i + '/' + tree for i in ppath]
result.sort()
return result
def chainer(
dtype='cf',
izip=1,
base='/tera2/data3/cdmsbatsProd/R133/dataReleases/Prodv5-3_June2013/merged/',
data='all',
productions=['all'],
rqs=[],
eventrqs=[],
rrqs=[],
eventrrqs=[],
cuts=[],
eventcuts=[],
selections=[],
cutrev='current',
load_cut_to_ram=False):
# time call
t1 = time()
# deal with data chains
dchain = ROOT.TChain() # initialize data chain
dlist = []
# initialize first chain with calibebent trees (because they are small)
dpaths = expander(
dtype=dtype,
tree='rrqDir/calibevent',
base=base, data=data,
productions=productions)
map(dchain.Add, dpaths)
# then make a list of chains for the other types
for i, v in {
'rrqDir/calibzip{}': rrqs,
'rqDir/zip{}': rqs,
'rqDir/eventTree': eventrqs}.iteritems():
if len(v) != 0:
tmp = ROOT.TChain()
dpaths = expander(
dtype=dtype,
tree=i.format(izip),
base=base,
productions=productions)
map(tmp.Add, dpaths)
dlist.append(tmp)
# friend each other data tree with the original chain
map(dchain.AddFriend, dlist)
# deal with cuts
clist = {}
for i, v in {
'cutDir/cutzip{}': cuts,
'cutDir/cutevent': eventcuts}.iteritems():
for c in v:
cpaths = expander(
data='cuts',
dtype=dtype,
tree=i.format(izip),
base=base,
productions=productions,
cut=c + '/',
cutrev='current/')
tmp = ROOT.TChain()
#print "cpaths ", cpaths
map(tmp.Add, cpaths)
clist[c] = tmp
#print "adding cuts: ", clist
[dchain.AddFriend(v, k) for k, v in clist.iteritems()]
# build cut selection
cut_string = None
if len(selections) != 0:
cut_string = reduce(
lambda x, y: x & y,
map(
Cut,
selections))
# extract the desired variables from the file turn into a Data Frame
rows = ['SeriesNumber', 'EventNumber']
branches = rrqs+rqs+eventrqs+eventrrqs+rows
if load_cut_to_ram:
branches += cuts+eventcuts
df = pd.pivot_table(
pd.DataFrame(
tree2rec(
dchain,
branches=list(set(branches)),
selection=cut_string)),
rows=rows)
t2 = time()
print "Load time: ", t2-t1, "s"
return df