-
Notifications
You must be signed in to change notification settings - Fork 3
/
trec2008Blog.R
136 lines (127 loc) · 5.24 KB
/
trec2008Blog.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# trec2008Blog.R
# an R script defining a few functions for processing the TREC blog track
# results files and generating some pretty pictures.
#
# Author: Jonathan Elsas ([email protected])
#
# USAGE:
# this script should be source'd from within an R session, then the
# functions can be called.
#
# EXAMPLE:
# to generate a grid of "scaled system performance" plots:
# > source("/path/to/trec2008Blog.R")
# > plotAll("/path/to/data/", plotFn=plotOurPerf.scale, xaxt="n")
# produces an plot shown at the bottom of the following post:
# http://windowoffice.tumblr.com/post/53677875/visualizing-retrieval-performance
#
# NOTE:
# All this code assumes the queries in the results files are exactly the
# same as the queries in the summary file. There is not checking to ensure
# this is actually the case. You have been warned.
# reads in a trec results file, returning a reasonably formatted
# data frame
readTrecResults <- function(filename,
measures=c("map", "P10", "R-prec"),
remove.all=TRUE) {
# read in the (key, qid, val) triples
raw.data = read.table(filename, sep="", header=FALSE,
col.names=c("measure", "qid", "value"), as.is=TRUE)
results = data.frame(qid=sort(unique(raw.data$qid)))
for( m in measures) {
this.measure = raw.data[which(raw.data$measure == m),]
# ensure we're sorting by qid
this.measure = this.measure[order(this.measure$qid),]
results = cbind(results, this.measure$value)
}
names(results) = c("qid", make.names(measures))
if(remove.all) {
results = results[which(results$qid != "all"),] }
return(results)
}
# reads in the summary results file, provided with the blog track results
readTrecSummary <- function(filename) {
raw.data = read.table(filename, sep="", header=FALSE,
col.names=c("qid", "nrel", "map.best", "map.median",
"map.worst", "rprec.best", "rprec.median",
"rprec.worst", "p10.best", "p10.median", "p10.worst"),
skip=3)
# convert p10 values to floats
raw.data$p10.best = raw.data$p10.best / 10
raw.data$p10.median = raw.data$p10.median / 10
raw.data$p10.worst = raw.data$p10.worst / 10
# again, ensure sorting by qid
raw.data = raw.data[order(raw.data$qid),]
return(raw.data)
}
# plots the absolute performance, with max, median and min performance
plotOurPerf.abs <- function(ourPerf, bestPerf, medianPerf,
worstPerf, ylab=deparse(substitute(ourPerf)),
xlab="", ...){
# assume everything's sorted by the same key... not so smart,
# but it works
our.order = order(ourPerf, decreasing=TRUE)
ylim=c(min(worstPerf), max(bestPerf))
plot( worstPerf[our.order], ylim=ylim, type="l", col="red",
xlab=xlab, ylab=ylab, frame.plot=FALSE,
...)
lines( medianPerf[our.order], type="l", col="blue")
lines( bestPerf[our.order], type="l", col="green")
lines( ourPerf[our.order], type="l", col="purple", lwd=2)
}
# plots the relative performance, with max, median performance
plotOurPerf.rel <- function(ourPerf, bestPerf, medianPerf,
worstPerf, ylab=paste("relative", deparse(substitute(ourPerf))),
xlab="", ...){
# skip the ones with a median perf of zero
nonzero = which(medianPerf > 0)
ourPerf = ourPerf[nonzero]
bestPerf = bestPerf[nonzero]
medianPerf = medianPerf[nonzero]
our.pct.over.median = 100* (ourPerf - medianPerf) / medianPerf
best.pct.over.median = 100* (bestPerf - medianPerf) / medianPerf
our.order = order(our.pct.over.median, decreasing=TRUE)
ylim=c(min(our.pct.over.median, na.rm=TRUE),
max(best.pct.over.median, na.rm=TRUE))
plot( our.pct.over.median[our.order], ylim=ylim, type="l", col="purple",
xlab=xlab, ylab=ylab, frame.plot=FALSE, lwd=2, ...)
lines( best.pct.over.median[our.order], type="l", col="green")
abline(h=0, col="gray")
}
# plots the scaled performance
plotOurPerf.scale <- function(ourPerf, bestPerf, medianPerf,
worstPerf, ylab=paste("scale", deparse(substitute(ourPerf))),
xlab="", ...){
ours.scaled = (ourPerf - medianPerf) / (bestPerf -medianPerf)
our.order = order(ours.scaled, decreasing=TRUE)
plot( ours.scaled[our.order], type="l", col="purple",
yaxt="n", xlab=xlab, ylab=ylab, frame.plot=FALSE, lwd=2, ...)
axis(side=2, at=c(0,1), labels=c("median", "best"))
abline(h=0, col="gray")
}
# make some pretty charts, in a grid
# dataDir should point to a directory containing your .blogdist
# results files (output from trec_eval -q) and the
# _trec_trec17_tables_blog.feed file
#
# this probably won't work on non-linux/mac systems. sorry!
#
plotAll <- function(dataDir, plotFn=plotOurPerf.abs, ...) {
# find the .blogdist files in dataDir
blogdist.files = list.files(dataDir, pattern=glob2rx("*.blogdist"))
all.summ = readTrecSummary(paste(dataDir, "_trec_trec17_tables_blog.feed", sep="/"))
op = par(no.readonly = TRUE)
par(mfrow=c(length(blogdist.files),3), mar=c(2, 4, 2, 0) + 0.1)
for(i in 1:length(blogdist.files)) {
f = blogdist.files[i]
name = unlist(strsplit(f, "\\."))[1]
results = readTrecResults(paste(dataDir, f, sep="/"))
plotFn(results$map, all.summ$map.best, all.summ$map.median, all.summ$map.worst,
ylab=name, main=if(i==1){"AP"}else{""}, ...)
plotFn(results$R.prec, all.summ$rprec.best, all.summ$rprec.median,
all.summ$rprec.worst, ylab="", main=if(i==1){"R-Prec"}else{""}, ...)
plotFn(results$P10, all.summ$p10.best, all.summ$p10.median, all.summ$p10.worst,
ylab="", main=if(i==1){"P10"}else{""}, ...)
}
par(op)
}