$ cd /corp/etalon $ldir drwxr-xr-x 2 skoumal staff 4096 2013-05-13 19:20 compare/ drwxr-xr-x 2 skoumal staff 4096 2013-05-14 09:40 vert-done/ drwxrwxr-x 2 skoumal staff 4096 2013-05-13 15:30 vert-morf/ drwxr-xr-x 2 skoumal staff 4096 2013-05-13 18:22 vert-morf-cmp/ drwxrwxr-x 2 skoumal staff 4096 2013-05-13 15:31 vert-rules/ drwxr-xr-x 2 skoumal staff 4096 2013-05-13 19:11 vert-rules-cmp/ drwxrwxr-x 2 skoumal staff 4096 2013-05-13 12:47 vert-rules-frazrl/ drwxr-xr-x 2 skoumal staff 4096 2013-05-13 15:07 vert-rules-frazrl-cmp/ drwxrwxr-x 2 skoumal staff 4096 2013-05-13 12:47 vert-rules-frazrl-rules/ drwxr-xr-x 2 skoumal staff 4096 2013-05-13 15:07 vert-rules-frazrl-rules-cmp/
$ cd .../vert-morf $ for ff in *; do echo $ff; normalize-anot.pl < $ff | post-disamb.pl > ../vert-morf-cmp/$ff; done
a totéž pro vert-rules
, vert-rules-frazrl
a vert-rules-frazrl-rules
.
compare-Mvert
:$ cd ../vert-done $ for ff in *; do echo $ff; paste $ff ../vert-morf-cmp/$ff | perl -pe 's/^[\t]+$//' > ../compare/morf-$ff; done
a opět opakovat pro vert-rules
, vert-rules-frazrl
a vert-rules-frazrl-rules
. Předpony jsou morf-
, rules-
, frazrl-
a rfr-
.
compare-Mvert
:$ cd .../compare $ cat morf-???????? | compare-Mvert tag-table.txt > /dev/null $ cat morf-?????????? | compare-Mvert tag-table.txt > /dev/null $ cat morf-????????? | compare-Mvert tag-table.txt > /dev/null
Z údajů Lines
a Diff
se spočítá procento recallu.
count-spaces
:$ cd /corp/etalon/vert-morf $ cat ???????? | count-spaces $ cat ?????????? | count-spaces $ cat ????????? | count-spaces
Zajímavé jsou pro nás údaje Forms
, Tags
a Uniq
. Stejný výpočet provedeme i pro vert-rules
, vert-rules-frazrl
a vert-rules-frazrl-rules
.
$ mkdir .../compare/anotatori/PUB $ mkdir .../compare/anotatori/NOV $ mkdir .../compare/anotatori/SCI
$ cd .../data/PUB-export $ for ff in *; do echo $ff; prep-for-tag-comparison.sh < $ff |\ perl -pe 's/<.*//' | cat -s > ../../compare/anotatori/PUB/$ff; done $ cd ../NOV-export $ for ff in *; do echo $ff; prep-for-tag-comparison.sh < $ff |\ perl -pe 's/<.*//' | cat -s > ../../compare/anotatori/NOV/$ff; done $ cd ../SCI-export $ for ff in *; do echo $ff; prep-for-tag-comparison.sh < $ff |\ perl -pe 's/<.*//' | cat -s > ../../compare/anotatori/SCI/$ff; done
$ cd .../compare/merge/done $ for ff in ????????; do (cd ../../anotatori/PUB/; ls $ff*); done $ cat > ../../PUB-done.txt $ for ff in ??????????; do (cd ../../anotatori/NOV/; ls $ff*); done $ cat > ../../NOV-done.txt $ for ff in ?????????; do (cd ../../anotatori/SCI/; ls $ff*); done $ cat > ../../SCI-done.txt
$ cd .../compare $ vert-vert-vert.sh merge/archiv/<file>.fordiff anotatori/PUB/<file>-<A1> anotatori/PUB/<file>-<A2> \ > cmp-3input/<file>-<A1>-<A2>
anebo použít skript make-3input.pl
$ make-3input.pl PUB < PUB-done.txt
#!/usr/bin/perl use autodie; $num_args = $#ARGV + 1; if ($num_args != 1) { print "\nUsage: make-3input.pl genre_abbrev\n"; exit; } $genre = $ARGV[0]; chdir "/home/skoumal/PROJEKTY/GACR1012/compare"; while ($radek = <STDIN>) { chomp $radek; @file = split("[ ]+",$radek); @parts0 = split('-',$file[0]); @parts1 = split('-',$file[1]); print $radek; @inp = ("merge/archiv/$parts0[0].fordiff","anotatori/$genre/$file[0]","anotatori/$genre/$file[1]"); $outf = "cmp-3input/$parts0[0]-$parts0[1]-$parts1[1]"; print " > $outf\n"; open ($outhandle,'>',$outf); print $outhandle `vert-vert-vert.sh $inp[0] $inp[1] $inp[2]`; close ($outhandle); }
$ cd .../compare/cmp-3input $ for ff in ????????-??-??; do echo $ff; compare-3vert ../tag-table.txt -F < $ff \ > ../cmp-3result/$ff; done $ cat > ../3vert-PUB-results.txt $ for ff in ?????????-??-??; do echo $ff; compare-3vert ../tag-table.txt -F < $ff \ > ../cmp-3result/$ff; done $ cat > ../3vert-SCI-results.txt $ for ff in ??????????-??-??; do echo $ff; compare-3vert ../tag-table.txt -F < $ff \ > ../cmp-3result/$ff; done $ cat > ../3vert-NOV-results.txt