/home/skoumal/PROJEKTY/GACR2729/home/skoumal/cnk-work/GACR2729 - v něm zpracováváme češtinuvert-1-6 až vert-A:cd .../cestina mkdir -p vert-1-6 mkdir -p vert-7 mkdir -p vert-8 mkdir -p vert-9 mkdir -p vert-A
a rozhodíme do nich sloupce
cd vert-rules0-frazrl-rules-mdita-kolok-sublm-agr for ff in *; do cut -f1-6 $ff > ../vert-1-6/$ff; done for ff in *; do cut -f7 $ff > ../vert-7/$ff; done for ff in *; do cut -f8 $ff > ../vert-8/$ff; done cd ../mwe_out/ for ff in *; do cut -f7 $ff > ../vert-9/$ff; done for ff in *; do cut -f8 $ff > ../vert-A/$ff; done
vert-kolok:cd ../vert-1-6/
for ff in *; do paste $ff ../vert-7/$ff ../vert-8/$ff ../vert-9/$ff ../vert-A/$ff | perl -pe 's/[\ \t]+$//' > ../vert-kolok/${ff%.txt}.vrt; done
cd ../vert-kolok/
for ff in ../orig/*.txt; do ln -s $ff; done
for ff in ../orig/*.json; do ln -s $ff; done
for ff in ../orig/*.xml; do ln -s $ff; done
a vyrobíme XML a vertikály
parallel-mask.sh -C "ann2standoff -c /cnk/work/skoumal/LEMUR/ann2standoff.ini -p frantalemur -P none" -m ".*\.vrt" -v -p45 -d.
parallel-mask.sh -C "standoff2xml -c /cnk/work/skoumal/LEMUR/ann2standoff.ini -p frantalemur -me s" -m ".*\.txt$" -v -p100 -d.
for ff in *.ann.xml; do echo $ff; \
xml2vrt -c /cnk/work/skoumal/LEMUR/ann2standoff.ini -p frantalemur -g g $ff | perl -pe 'undef $/; s:</s>\n</s>:</s>:g' \
> ../vertikaly/${ff%.ann.xml}.vert.xml; done