IR4QA-splitqrels脚本
#!/bin/sh
# create topic directories under "current directory"
# and creates a qrels file(*.rel) for each topic.
# also create a list of topicIDs(*.tid) that are included in the original qrels
if [ $# -ne 1 ]; then
echo "usage: `basename $0` <IR4QAqrels file>" >&2
echo " e.g.: `basename $0` ACLIA1-JA.qrels" >&2
exit 1
fi
QRELS=$1
shift
TIDSUF="tid"
#RELSUF="prel" # pseudorel
RELSUF="rel"
# create a topicid file from the qrel file
TIDFILE=$QRELS.$TIDSUF
cat $QRELS | sed 's/ .*$//' | uniq > $TIDFILE
echo "created $TIDFILE" >&2
# create a rel assessment file in each topic directory
# cat $TIDFILE | while read TID; do
while read TID; do
mkdir -p $TID
OUTF=$TID/$TID.$RELSUF
grep "^$TID " $QRELS | sed 's/^[^ ]* //' | sort -u > $OUTF
echo "created $OUTF" >&2
# done
done < $TIDFILE
IR4QA-splitruns脚本
#!/bin/sh
# break a single xml file in the ACLIA1 IR4QA submission format
# into per-topic ranked list files(*.res).
# IR4QA: break a IR4QA runfile into per-topic files
# truncate if there are more than 1000 docs
# makes empty files even if the runfile is empty for some topics
if [ $# -lt 1 ]; then
echo "usage: `basename $0` <topicIDlist> [runpathlist]" >&2
echo " e.g.: echo ./Runs/CMUJAV-EN-JA-01-T | `basename $0` ACLIA1-JA.qrels.tid" >&2
exit 1
fi
#AWK=/usr/bin/gawk
AWK=awk
SUF="res"
DOCLIMIT=1000
# truncate if the run is too long
TIDLIST=$1
shift
# cat $1 | while read RUNPATH; do
RUNPATH=$1
RUN=`echo $RUNPATH | sed 's/^.*\///'`
# RUN=`basename $RUNPATH`
cat $TIDLIST | while read TID; do
OUTF=$TID/$TID.$RUN.$SUF
cat $RUNPATH | $AWK 'BEGIN{count=0}
/<TOPIC ID="'$TID'">/{ sw = 1; next }
sw==1{
if( match( $0, /<\/TOPIC>/ ) ){ exit }
if( match( $0, / 'DOCID='/ ) ){
sub( /^.* DOCID="/, "", $0 );
sub( /" .*$/, "", $0 );
print
count++;
if( count >= '$DOCLIMIT' ){ exit }
}
}
' > $OUTF
# sed 's/_CMN_//' $OUTF > tmp
# mv tmp $OUTF
sed -i 's/_CMN_//' $OUTF
echo "created $OUTF" >&2
done
# done
IR4QA-qeval脚本
#!/bin/sh
# read the *.rel file and a *.res (ranked list) file for each topic
# and compute evaluation metrics(*.lab), by calling the C program q_eval.
# *.qev file contains per-topic performance values.
# also output performance values averaged across topics(to standard output).
if [ $# -lt 2 ]; then
echo "usage: `basename $0` <topicIDfile> <evaluationname> [runlist]" >&2
echo " e.g.: echo TEST-EN-JA-01-T | `basename $0` ACLIA1-JA.qrels.tid default" >&2
exit 1
fi
# write your q_eval label/comp options here
LABELOPT=""
COMPOPT="-cutoffs 10,1000 -g 1:2"
# The above is for the ACLIA1 IR4QA collection which only has
# relevant and partially relevant docs.
#COMPOPT="-cutoffs 10,1000 -g 1:2:3"
# The above is for NTCIR collections with highly relevant / relevant
# / partially relevant docs.
# select the metrics you want here
#METRICS="RR O-measure P-measure P-plus AP Q-measure RBP MSnDCG@0010 MSnDCG@1000 P@0010 Hit@0010"
METRICS="AP Q-measure MSnDCG@1000"
OSUF="qev"
TIDFILE=$1
OUTSTR=$2
shift
shift
RESSUF="res"
RELSUF="rel"
LABSUF="lab"
#AWK=/usr/bin/gawk
AWK=awk
# set your own q_eval path here
QEVPATH=./q_eval
# number of topics
NTOPICS=`wc $TIDFILE | $AWK '{print $1}'`
# cat $1 | while read RUN; do
RUN=$1
OUTF=$RUN.$OUTSTR.$OSUF
cat $TIDFILE | while read TID; do
LABFILE=$TID/$TID.$RUN.$OUTSTR.$LABSUF
cat $TID/$TID.$RUN.$RESSUF |
$QEVPATH label -r $TID/$TID.$RELSUF $LABELOPT > $LABFILE
echo "created $LABFILE" >&2
cat $LABFILE |
$QEVPATH compute -r $TID/$TID.$RELSUF $COMPOPT -out $TID
done > $OUTF
echo "created $OUTF" >&2
echo "averaging over $NTOPICS topics:"
for MET in $METRICS; do
cat $OUTF | grep " ${MET}=" | $AWK 'BEGIN{sum=0}
{ sum += $NF }
END{ printf( "'$RUN' '$MET' %.4f\n", sum/'$NTOPICS' ) }
# note that dividing by NR is incorrect.
'
done
# done
run调用前三个脚本
#!/bin/sh
if [ $# -ne 1 ]; then
echo "usage: $0 <file.xml>" >&2
# echo "usage: `basename $0` <file.xml>" >&2
echo " e.g.: $0 OT-CS-CS-02-T.xml" >&2
exit 1
fi
MYFILE=$1
if [ -d result ]; then
rm -r result
fi
# create topic directories under "current directory"
# and creates a qrels file(*.rel) for each topic.
# also create a list of topicIDs(*.tid) that are included in the original qrels
#if [ -f ACLIA1-CS.qrels.tid ]; then
./IR4QA-splitqrels ACLIA1-CS.qrels
#fi
# break a single xml file in the ACLIA1 IR4QA submission format
# into per-topic ranked list files(*.res).
./IR4QA-splitruns ACLIA1-CS.qrels.tid $MYFILE
make
# read the *.rel file and a *.res (ranked list) file for each topic
# and compute evaluation metrics(*.lab), by calling the C program q_eval.
# *.qev file contains per-topic performance values.
# also output performance values averaged across topics(to standard output).
./IR4QA-qeval ACLIA1-CS.qrels.tid default $MYFILE
# mkdir -p result
# mv ACLIA1-CS-T* result/
# mv ACLIA1-CS.qrels.tid result/
# mv OT-CS-CS-05-T.xml.default.qev result/
(studyarea) |