Merge pull request #26 from cms-analysis/Blinding

MSSM updates
cms-analysis · Feb 9, 2016 · 6625d24 · 6625d24
2 parents 1a2768c + 609cf56
commit 6625d24
Show file tree

Hide file tree

Showing 4 changed files with 347 additions and 52 deletions.
diff --git a/HIG16006/bin/MorphingMSSMRun2.cpp b/HIG16006/bin/MorphingMSSMRun2.cpp
@@ -7,6 +7,7 @@
 #include <cstdlib>
 #include "boost/algorithm/string/predicate.hpp"
 #include "boost/program_options.hpp"
+#include "boost/lexical_cast.hpp"
 #include "CombineHarvester/CombineTools/interface/CombineHarvester.h"
 #include "CombineHarvester/CombineTools/interface/Observation.h"
 #include "CombineHarvester/CombineTools/interface/Process.h"
@@ -216,24 +217,36 @@ int main(int argc, char** argv) {
   }
   demo.Close();
   cb.AddWorkspace(ws);
-  //cb.cp().signals().ExtractPdfs(cb, "htt", "$BIN_$PROCESS_morph");
   cb.cp().process(ch::JoinStr({signal_types["ggH"], signal_types["bbH"]})).ExtractPdfs(cb, "htt", "$BIN_$PROCESS_morph");
   cb.PrintAll();
 
   string folder = "output/"+output_folder+"/cmb";
   boost::filesystem::create_directories(folder);
-
+
+
+ //Write out datacards. Naming convention important for rest of workflow. We
+ //make one directory per chn-cat, one per chn and cmb. In this code we only
+ //store the complete combined datacard for each directory, but we also
+ //introduce the label _mssm to allow to distinguish from individual cards if
+ //people want to store those too for some reason.
   cout << "Writing datacards ...";
-  TFile output((folder + "/htt_mssm_input.root").c_str(), "RECREATE");
+  TFile output((folder + "/htt_cmb_mssm_input.root").c_str(), "RECREATE");
+  cb.cp().mass({"*"}).WriteDatacard(folder + "/htt_cmb_mssm.txt", output);
+  output.Close();
+
+  //Combined based on bin_id - i.e. make combined b-tag and no b-tag
   for (string chn : chns) {
-    auto bins = cb.cp().channel({chn}).bin_set();
-    for (auto b : bins) {
-      cb.cp().channel({chn}).bin({b}).mass({"*"}).WriteDatacard(folder + "/" + b + ".txt", output);
+    auto bin_ids = cb.cp().bin_id_set();
+    for (auto b : bin_ids) {
+       string folder_cat = "output/"+output_folder+"/htt_cmb_"+boost::lexical_cast<string>(b)+"_13TeV";
+       boost::filesystem::create_directories(folder_cat);
+       TFile output((folder_cat + "/htt_cmb_"+boost::lexical_cast<string>(b)+"_13TeV_mssm_input.root").c_str(), "RECREATE");
+       cb.cp().mass({"*"}).bin_id({b}).WriteDatacard(folder_cat + "/htt_cmb_"+boost::lexical_cast<string>(b)+"_13TeV_mssm.txt", output);
+       output.Close();
     }
   }
-  cb.cp().mass({"*"}).WriteDatacard(folder + "/htt_mssm.txt", output);
-  output.Close();
 
+  //Individual channel-cats  
   for (string chn : chns) {
      string folderchn = "output/"+output_folder+"/"+chn;
      boost::filesystem::create_directories(folderchn);

diff --git a/HIG16006/input/mssm_protocol.txt b/HIG16006/input/mssm_protocol.txt
@@ -0,0 +1,83 @@
+#This file collects up to date instructions for running the 13 TeV MSSM statistical results. For more detailed information on Combine Harvester please see the documentation at
+http://cms-analysis.github.io/CombineHarvester/
+
+1a) Setting up datacards: for model independent limits
+
+MorphingMSSMRun2 --output_folder="mssm_250116_svfit_modelindep" -m MH
+
+This will setup a subdirectory for each channel-cat, as well as combined channel and full combination directories. Each directory contains a single combined card as generated by WriteCards.
+Note that this uses the new RooMorphingPdf and includes bin-by-bin uncertainties which are merged with merging parameter 0.4. Optional extras are controlled by --auto_rebin for automatic rebinning, --manual_rebin to choose a rebinning manually. 
+
+1b) Setting up datacards: for model dependent limits
+
+MorphingMSSMRun2 --output_folder="mssm_250116_svfit_mhmodp" 
+
+If not using the argument -m MH, the code will setup datacards for all 3 Higgs bosons as required for model dependent limits
+
+2a) Setting up workspaces for model independent limits 
+
+Workspaces can be setup using the combine tool text2workspace, but this is now implemented in combineTool.py for easy conversion of multiple datacards:
+
+combineTool.py -M T2W -o "htt_ggPhi_mssm.root" -P CombineHarvester.CombinePdfs.ModelIndependent:floatingMSSMXSHiggs --PO 'modes=ggH' --PO 'ggHRange=0:20' -i output/mssm_250116_svfit_modelindep/*
+combineTool.py -M T2W -o "htt_bbPhi_mssm.root" -P CombineHarvester.CombinePdfs.ModelIndependent:floatingMSSMXSHiggs --PO 'modes=bbH' --PO 'bbHRange=0:20' -i output/mssm_250116_svfit_modelindep/*
+
+This will apply the text2workspace step recursively, setting up for every subdirectory and hence every channel-cat scenario. Workspaces are created for each of the two signal cases. In each case, the other signal is profiled. There are many different options to combineTool, but this particular set will create a "combined card" named combined.txt.cmb in each subdir, and a workspace named as required, either htt_ggPhi_mssm.root or htt_bbPhi_mssm.root
+
+
+2a) Setting up workspaces for model dependent limits 
+
+combineTool.py -M T2W -o "htt_mhmodp_mssm.root" -P CombineHarvester.CombinePdfs.MSSM:MSSM --PO filePrefix=$CMSSW_BASE/src/auxiliaries/models/ --PO modelFiles=13TeV,mhmodp_mu200_13TeV.root,1 -i output/mssm_250116_svfit_mhmodp/*
+
+This will perform the same recursive application of text2workspace, this time applying the mhmodp physics model.
+
+3a) Running model independent limits
+
+This can be done directly with combine, but again combineTool makes life a lot easier for us by allowing successive calls (with different choices of job submission, and parallelisation of calculations):
+
+combineTool.py -m "90,100,110,120,130,140,160,180,200,250,300,350,400,450,500,600,700,800,900,1000" -M Asymptotic --boundlist ../CombinePdfs/scripts/mssm_ggh_boundaries.json --freezeNuisances MH  --setPhysicsModelParameters r_ggH=0,r_bbH=0  -t -1 -d output/mssm_250116_svfit_modelindep/*/htt_ggH_mssm.root --there -n "ggH"
+
+combineTool.py -m "90,100,110,120,130,140,160,180,200,250,300,350,400,450,500,600,700,800,900,1000" -M Asymptotic --boundlist ../CombinePdfs/scripts/mssm_bbh_boundaries.json --freezeNuisances MH  --setPhysicsModelParameters r_ggH=0,r_bbH=0  -t -1 -d output/mssm_250116_svfit_modelindep/*/htt_bbH_mssm.root --there -n "bbH"
+
+This goes to each subdirectory of output/mssm_250116_svfit_modelindep/ (--there) and performs the combine calculation for the masses listed on the workspace (either ggH or bbH workspace). The combine output files are stored in the directories alongside the datacards. Note the optin --parallel = X allows you to run the calculations interactively with X in parallel, and e.g.' --job-mode 'lxbatch' --task-name 'mssm_ggH' --sub-opts '-q 1nh' --merge=4 ' could be used to instead run on the lxbatch, merging 4 masspoints into 1 job and submitting to the 1 hour queue.
+
+Once all calculations are complete, the results are collected into json files using:
+
+combineTool.py -M CollectLimits output/mssm_250116_svfit_modelindep/*/higgsCombine.ggH*.root --use-dirs -o "mssm_250116_svfit_ggH.json"
+
+combineTool.py -M CollectLimits output/mssm_250116_svfit_modelindep/*/higgsCombine.bbH*.root --use-dirs -o "mssm_250116_svfit_bbH.json"
+
+This will place a json in the current directory, and append the string "mssm_250116_svfit_ggH" to them. One will be placed for every subdir, so every channel-cat, combination requested will be available then for plotting.
+
+3b) Running model dependent limits
+
+combineTool.py -M AsymptoticGrid ../CombinePdfs/scripts/mssm_asymptotic_grid.json -d output/mssm_250116_svfit_mhmodp/mt/htt_mhmodp_mssm.root  --job-mode 'interactive' --task-name 'mssm_mhmodp'
+
+The asymptotic grid mode reads in an input json to define a set of mA-tanb points to scan and perform the limit calculation for. This time the calculation is done once per workspace, since the script has a nice feature which is that if you call it multiple times with the same workspace and asymptotic grid it will check which points have already completed successfully and only run those remaining. This makes it really easy to top up the grid for a finer scan for example. Once all points are complete, on the final call the script will create asymptotic_grid.root file containing the results.
+
+4a) Plotting model independent limits
+
+The usual Brazil band plots can be made using this script, for e.g. the mutau channel:
+
+python ../CombineTools/scripts/plotBSMxsBRLimit.py --file=mssm_250116_svfit_ggH_mt.json --log=1 --process="gg#phi" --custom_y_range=True --y_axis_max=1000 --y_axis_min=0.001 --expected_only
+
+Or comparison plots can be made using the following script:
+
+python scripts/MSSMlimitCompare.py --file=mssm_250116_svfit_ggH_mt.json,mssm_250116_svfit_ggH_et.json --labels="mutau,etau" --expected_only --outname="mssm_250116_mt_vs_et_ggH" --process="gg#phi" 
+
+The options --absolute and --relative can be used to make ratio plots as well
+
+4a) Plotting model dependent limits
+
+python ../CombineTools/scripts/plotLimitGrid.py asymptotic_grid.root --scenario-label="m_{h}^{mod+} scenario"  --output="mssm_250116_mhmodp_mt" --title-right="2.2 fb^{-1} (13 TeV)" --expected_only
+
+The plotting takes thr asymptotic_grid file as input, and performs interpolation to produce smooth contours of exclusion for a 2D mA-tanb plot. Note that this script contains many different optional settings for this interpolation. 
+
+5) Pre or post fit plots
+
+Prefit plots can be made and either the model dependent (3 Higgs) or signal resonance signal can be added for a benchmark point. The usage:
+
+python scripts/postFitPlot.py --postfitshapes --dir=output/mssm_250116_svfit_modelindep/htt_mt_8_13TeV/ --workspace=htt_ggPhi_mssm  --ratio --extra_pad=0.3 --custom_x_range --x_axis_max=999.9 --x_axis_min=0.0 --r_ggH=0.1 --r_bbH=0.1 --mPhi=700
+
+This will internally call PostFitShapesFromWorkspace, which will generate the signal and background templates for the benchmark point of mPhi = 700 GeV and for cross-section values for the 2 signal processes of 0.1pb. The script will then make a stacked plot. The option --model_dep can be used with choices for --mA and --tanb to instead plot the 3 Higgs signal from a model dependent workspace. 
+
+By default (essentially hardcoded until we pass preapproval) the data will be blinded above 200 GeV. Automated blinding, based on s/root b bigger than 0.5 in a given bin, is also implemented and can be enable. The option --auto_blind_check_only will simply report which bins the calculation would choose to blind, based on internally running PostFitShapesFromWorkspace for a few benchmark points, whilst keeping the manual blinding of 200 and above. The option --auto_blind will then actually apply the blinding based on the s/root b calculation. To be used with caution - please call the check_only mode first to check it appears sensible! To simply make an s/root b plot for a given benchmark signal, the option --soverb_plot can be used, which replaces the ratio with the s/root b plot for signal and doesnt plot ANY data. 
diff --git a/CombineTools/scripts/limitCompare.py → HIG16006/scripts/MSSMlimitCompare.py b/CombineTools/scripts/limitCompare.py → HIG16006/scripts/MSSMlimitCompare.py
@@ -65,9 +65,9 @@
     data = {}
     with open(files[i]) as jsonfile:
       data = json.load(jsonfile)
-    exp_graph_list[i] = plot.LimitTGraphFromJSON(data, 'expected')
+    exp_graph_list[i] = plot.LimitTGraphFromJSON(data, 'exp0')
     if args.relative or args.absolute:
-     obs_graph_list[i] = plot.LimitTGraphFromJSON(data, 'observed')
+     obs_graph_list[i] = plot.LimitTGraphFromJSON(data, 'obs')
 
 max_vals = []
 for i in range(len(exp_graph_list)):