shell脚本（sed及awk的应用）

TAG:

IR4QA-splitqrels脚本

#!/bin/sh

# create topic directories under "current directory"
# and creates a qrels file(*.rel) for each topic.
# also create a list of topicIDs(*.tid) that are included in the original qrels

if [ $# -ne 1 ]; then
    echo "usage: `basename $0` <IR4QAqrels file>" >&2
    echo " e.g.: `basename $0` ACLIA1-JA.qrels" >&2
    exit 1
fi

QRELS=$1
shift

TIDSUF="tid"
#RELSUF="prel" # pseudorel
RELSUF="rel"

# create a topicid file from the qrel file

TIDFILE=$QRELS.$TIDSUF
cat $QRELS | sed 's/ .*$//' | uniq > $TIDFILE
echo "created $TIDFILE" >&2

# create a rel assessment file in each topic directory

# cat $TIDFILE | while read TID; do
while read TID; do

mkdir -p $TID
OUTF=$TID/$TID.$RELSUF

grep "^$TID " $QRELS | sed 's/^[^ ]* //' | sort -u > $OUTF
echo "created $OUTF" >&2

# done
done < $TIDFILE

IR4QA-splitruns脚本

#!/bin/sh

# break a single xml file in the ACLIA1 IR4QA submission format
# into per-topic ranked list files(*.res).
# IR4QA: break a IR4QA runfile into per-topic files
# truncate if there are more than 1000 docs

# makes empty files even if the runfile is empty for some topics

if [ $# -lt 1 ]; then
    echo "usage: `basename $0` <topicIDlist> [runpathlist]" >&2
    echo " e.g.: echo ./Runs/CMUJAV-EN-JA-01-T | `basename $0` ACLIA1-JA.qrels.tid" >&2
    exit 1
fi

#AWK=/usr/bin/gawk
AWK=awk

SUF="res"
DOCLIMIT=1000
# truncate if the run is too long

TIDLIST=$1
shift

# cat $1 | while read RUNPATH; do
RUNPATH=$1

RUN=`echo $RUNPATH | sed 's/^.*\///'`
# RUN=`basename $RUNPATH`

cat $TIDLIST | while read TID; do

OUTF=$TID/$TID.$RUN.$SUF

cat $RUNPATH | $AWK 'BEGIN{count=0}
/<TOPIC ID="'$TID'">/{ sw = 1; next }
sw==1{

if( match( $0, /<\/TOPIC>/ ) ){ exit }

if( match( $0, / 'DOCID='/ ) ){

sub( /^.* DOCID="/, "", $0 );
sub( /" .*$/, "", $0 );
print
count++;
if( count >= '$DOCLIMIT' ){ exit }

}

}
' > $OUTF

# sed 's/_CMN_//' $OUTF > tmp
# mv tmp $OUTF
sed -i 's/_CMN_//' $OUTF

echo "created $OUTF" >&2

done

# done

IR4QA-qeval脚本

#!/bin/sh

# read the *.rel file and a *.res (ranked list) file for each topic
# and compute evaluation metrics(*.lab), by calling the C program q_eval.
# *.qev file contains per-topic performance values.
# also output performance values averaged across topics(to standard output).

if [ $# -lt 2 ]; then
    echo "usage: `basename $0` <topicIDfile> <evaluationname> [runlist]" >&2
    echo " e.g.: echo TEST-EN-JA-01-T | `basename $0` ACLIA1-JA.qrels.tid default" >&2
    exit 1
fi

# write your q_eval label/comp options here

LABELOPT=""

COMPOPT="-cutoffs 10,1000 -g 1:2"
# The above is for the ACLIA1 IR4QA collection which only has
# relevant and partially relevant docs.

#COMPOPT="-cutoffs 10,1000 -g 1:2:3"
# The above is for NTCIR collections with highly relevant / relevant
# / partially relevant docs.

# select the metrics you want here

#METRICS="RR O-measure P-measure P-plus AP Q-measure RBP MSnDCG@0010 MSnDCG@1000 P@0010 Hit@0010"

METRICS="AP Q-measure MSnDCG@1000"

OSUF="qev"

TIDFILE=$1
OUTSTR=$2
shift
shift

RESSUF="res"
RELSUF="rel"
LABSUF="lab"

#AWK=/usr/bin/gawk
AWK=awk

# set your own q_eval path here
QEVPATH=./q_eval

# number of topics
NTOPICS=`wc $TIDFILE | $AWK '{print $1}'`

# cat $1 | while read RUN; do
RUN=$1

OUTF=$RUN.$OUTSTR.$OSUF

cat $TIDFILE | while read TID; do

LABFILE=$TID/$TID.$RUN.$OUTSTR.$LABSUF

cat $TID/$TID.$RUN.$RESSUF |
$QEVPATH label -r $TID/$TID.$RELSUF $LABELOPT > $LABFILE
echo "created $LABFILE" >&2

cat $LABFILE |
$QEVPATH compute -r $TID/$TID.$RELSUF $COMPOPT -out $TID

done > $OUTF

echo "created $OUTF" >&2

echo "averaging over $NTOPICS topics:"
for MET in $METRICS; do

cat $OUTF | grep " ${MET}=" | $AWK 'BEGIN{sum=0}
{ sum += $NF }
END{ printf( "'$RUN' '$MET' %.4f\n", sum/'$NTOPICS' ) }
# note that dividing by NR is incorrect.
'
done

# done

run调用前三个脚本

#!/bin/sh

if [ $# -ne 1 ]; then
echo "usage: $0 <file.xml>" >&2
# echo "usage: `basename $0` <file.xml>" >&2
echo " e.g.: $0 OT-CS-CS-02-T.xml" >&2
exit 1
fi

MYFILE=$1

if [ -d result ]; then
rm -r result
fi

# create topic directories under "current directory"
# and creates a qrels file(*.rel) for each topic.
# also create a list of topicIDs(*.tid) that are included in the original qrels

#if [ -f ACLIA1-CS.qrels.tid ]; then
./IR4QA-splitqrels ACLIA1-CS.qrels
#fi

# break a single xml file in the ACLIA1 IR4QA submission format
# into per-topic ranked list files(*.res).

./IR4QA-splitruns ACLIA1-CS.qrels.tid $MYFILE

make

./IR4QA-qeval ACLIA1-CS.qrels.tid default $MYFILE

# mkdir -p result
# mv ACLIA1-CS-T* result/
# mv ACLIA1-CS.qrels.tid result/
# mv OT-CS-CS-05-T.xml.default.qev result/

(studyarea)

shell--传入参数的处理	shell命令查看某文件夹下的文件个数
bash代码注入的安全漏洞	应该知道的Linux技巧
在Makefile中使用$$	16个桌面Linux用户必须要知道的Shell命令
监视模块启动的SHELL	用脚本实时显示Linux网络流量
shell中字符到ascii码或数字的转换	shell简单应用（分割文件内容）

搜索

热门标签:

shell脚本（sed及awk的应用）