/corp/anotace
na grimmovi.make-whole-corp.sh
až k vert-rules-frazrl
.vert-rules-frazrl-rules
:$ make-corp.sh -s vert-rules-frazrl -t vert-rules-frazrl-rules -i wide_vertical -o wide_vertical -n -v -p8
annot-prep-vert.sh < /corp/syn2010/vert-rules-frazrl-rules/<file> \ > .../SCI/vert/<file>
csts
:$ cd .../SCI $ make-corp.sh -s vert -t csts -g -v -p8
x-rate-vert.sh
:$ x-rate-vert.sh | sort -n -k2 | cut -f1 -d":" | head -28 | xargs cat | grep -c '.'
utf-8
:$ for ff in $(x-rate-vert.sh | sort -n -k2 | cut -f1 -d":" | head -28); do echo $ff; \ cstocs il2 utf8 < ../csts/$ff > ~/PROJEKTY/GACR1012/data/SCI-import/$ff; done
long-name.sh
:$ cd .../SCI-import $ long-name.sh
distribute.sh
(nejdřív upravit!).count-multi-tags.pl
:$ for ff in VH MK ZN AB JK JN MB PK; do echo -n "$ff "; cat ????????-$ff | count-multi-tags.pl; done
cd /usr/local/corp/annotate
bin/csts-import-utkl.pl --force ~/PROJEKTY/GACR1012/data/PUB-import/<file>
users
.$ cd ~/PROJEKTY/GACR1012/data/PUB-import
$ for ff in *-AP; do echo $ff; /usr/local/corp/annotate/bin/csts-export.pl --verbose $ff \ > ../PUB-export/$ff; done
$ for ff in *-AP; do echo -n $ff; (/usr/local/corp/annotate/bin/csts-export.pl \ --verbose $ff > /dev/null) |& grep -c 'Warning' | xargs -0 printf "%7s"; done
$ cd ~/PROJEKTY/GACR1012/data/PUB-export
$ cd ~/PROJEKTY/GACR1012/data/korpus-cmp $ paste <file1> <file2> | perl -pe 's/"utkl-.."/"utkl"/g' | merge-csts | remove-MM.pl \ > ../merge-import/<file3>
anebo ještě lépe použít skript:
#!/bin/bash print_help() { echo "Merge two annotations and prepare data for final annotation" echo "Usage: $(basename "$0") [ -f from_dir ]" # [ -t to_dir ] echo " $(basename "$0") -h" echo "Defaults: from_dir - current dir (csts-export)" echo " to_dir - ../merge-import" echo "from_dir files: *-??" echo "to_dir files: *-??-??" exit 1 } fdir="." tdir="../merge-import" ddir="../merge-import-done" zdir="../merge-zero" while getopts f:h FLAG; do case $FLAG in f) fdir=$OPTARG ;; h) print_help ;; # t) tdir=$OPTARG; ddir=$tdir-done; zdir=${tdir%-import}-zero ;; *) echo "Unknown option \"$1\""; print_help ;; esac done cd $fdir mkdir -p $tdir; mkdir -p $ddir; mkdir -p $zdir; ff='' aa='' cd $fdir for gg in *-??; do bb=${gg%-??} if [ "x$aa" = "x$bb" ]; then if [[ ! -f $tdir/$ff${gg: -3} ]] && [[ ! -f $ddir/$ff${gg: -3} ]] && [[ ! -f $zdir/$ff${gg: -3} ]]; then echo "$ff${gg: -3}" paste $ff $gg | perl -pe 's/"utkl-.."/"utkl"/g' \ | merge-csts | remove-MM.pl > $tdir/$ff${gg: -3} fi fi aa=$bb ff=$gg done
merge-export
…/compare/merge
:$ cd ../merge-export $ prep-for-tag-comparison.sh < <file>-XX-YY | cat -s > ../../compare/merge/<file>
nebo ještě lépe
$ for ff in *; \ do if [[ ( ! -f ../../compare/merge/${ff%-??-??}.fordiff ) && \ ( ! -f ../../compare/merge/archiv/${ff%-??-??}.fordiff ) ]]; \ then echo $ff; \ prep-for-tag-comparison.sh < $ff | cat -s > ../../compare/merge/${ff%-??-??}; \ fi; done
a
$ for ff in *; \ do if [[ ( ! -f ../../compare/merge/${ff%-??-??}.fordiff ) && \ ( ! -f ../../compare/merge/archiv/${ff%-??-??}.fordiff ) ]]; \ then echo $ff; \ prep-for-tag-comparison.sh < $ff | cat -s > ../../compare/merge/${ff%-??-??}.fordiff; \ fi; done
.fordiff
a opravit lemmataFREKVENCE/negr_kolokace_rl_vert
check-vert.pl
Dg——-1A—-
M
a příznak 7
AA.* NN.*
a NN.* AG.*
:$ cd /corp/vert-done $ for ff in *; do echo $ff; check-adj-noun.pl < $ff | grep --color=always "[^\ ]*/[^\ ]*"; done | l -R $ for ff in *; do echo $ff; check-noun-adg.pl < $ff | grep --color=always "[^\ ]*/[^\ ]*"; done | l -R
/data/PDT-data/variables_nodisamb
annot-prep-vert.sh
) a rozdělí se to na dvě sady: pro dva anotátory a pro jednoho anotátora (provede Tomáš): /data/PDT-data/variables_nodisamb/prac/vert-double-anot/
a /data/PDT-data/variables_nodisamb/prac/vert-single-anot/
csts
~/skoumal/PROJEKTY/GACR1012/data/PDT_nod_double-import
a ~/skoumal/PROJEKTY/GACR1012/data/PDT_nod_single-import
long-name.sh
(předem upravit)distribute.sh
(předem upravit)PDT_nod_double
tagují Marie Kelblerová (MK) a Pavel Kopřiva (PK)PDT_nod_single
tagují Marie Kelblerová (MK) a Pavel Kopřiva (PK)lnd92254_009 lnd92255_056 lnd92258_053 lnd92258_076 lnd92258_084 lnd92259_056