csts – nástroje máme odladěné.vert a z něj csts.premorfo (mezery v poznámkách) a pouze s povinnými skripty z postmorfo a na závěr rozgenerujeme tagy na naše:make-corp.sh -A1 -B0 -Eucs2 -p45 -s csts -t csts-morf -w -v
postdisamb:mkdir csts-morf-vid
cd /usr/local/corp/aspect
./aspect-vert.pl -csts -utf8 /store/corp/Ortofon/ortofon-hybrid/csts-morf \
/store/corp/Ortofon/ortofon-hybrid/csts-morf-vid
cd -
corr-asp.sh -C1 -Eutf -fcsts -p45 -s csts-morf-vid -t csts-morf-vid-corr -v
remove-dupl-csts.pl:parallel-filter.sh -C remove-dupl-csts.pl -p45 -s csts-morf-vid-corr -t csts-morf-vid-corr-single -v
vert:make-corp.sh -s csts-morf-vid-corr-single -t vert-morf-vid-corr-single -p45 -v
premorfo a pouze s povinnými skripty z postmorfo:make-corp.sh -A1 -B0 -Eucs2 -p45 -s csts -t csts-morf -v
anebo si vezmeme data trénování Morphodity:
cd csts-morf
for ff in *; do echo $ff; perl -i -pe 's/(<MMt>.{15})./$1/g' $ff; done
make-whole-corp-csts.sh -E ucs2 -trules -Trules -p45 -v
(for ff in csts-rules/*; do echo $ff; check-csts.pl < $ff > /dev/null; done) 2>&1 |\
grep -B1 Incorrect | grep csts-rules > seznam-vadnych.txt
a v seznamu zaměníme rules za morf.
for ff in $(cat seznam-vadnych.txt; do (< $ff /usr/local/corp/bin/gc_ucs2_release.x \
--lex=/usr/local/corp/LEX_ucs2 --action=single --input=jh_csts --output=wide_csts \
---collocations=modified_subst --empty_positions=modified --trace_deletions=no \
---group=root0 >/dev/null) |& grep Tag | cut -f2 -d' ' | sort -u > add-JH.txt
add-JH-1.txt a add-JH-2.txt, upravíme, slepíme k sobě a přidáme do CZ181001ag.txt:cp add-JH.txt add-JH-1.txt cp add-JH.txt add-JH-2.txt perl -i -pe 's/-([1-9])$/=$1/' add-JH-1.txt perl -i -pe 's/-//g' add-JH-1.txt perl -i -pe 's/=/-/' add-JH-1.txt paste -d' ' add-JH-1.txt add-JH-2.txt > add-JH.txt
a zkompilujeme LEXy
frazrl:make-whole-corp-csts.sh -E ucs2 -trules -Tfrazrl -p45 -v
parallel-filter.sh -C "remove1314-utf8.pl | negr_kolokace_csts_ucs2.pl" -p45 \
-s csts-rules-frazrl -t csts-rules-frazrl-negr -v
rm -rf csts-rules-frazrl
mv csts-rules-frazrl-negr csts-rules-frazrl
postdisamb:mkdir csts-rules-frazrl-vid
cd /usr/local/corp/aspect
./aspect-vert.pl -csts -utf8 ~/PROJEKTY/INFRASTRUKTURA/Etalon/MorphoDiTa/csts-rules-frazrl \
~/PROJEKTY/INFRASTRUKTURA/Etalon/MorphoDiTa/csts-rules-frazrl-vid
cd -
corr-asp.sh -C1 -Eutf -fcsts -p45 -s csts-rules-frazrl-vid -t csts-rules-frazrl-vid-corr -v
cd csts-rules-frazrl-vid-corr for ff in *; do remove-dupl-csts.pl < $ff > $ff.new; mv $ff.new $ff; done cd -
csts a řádky:parallel-filter.sh -C "check-csts.pl 16" -n -p45 -s csts-rules-frazrl-vid-corr cmp-lines-csts.sh csts-morf csts-rules-frazrl-vid-corr
vertmkdir vert-test
cd vert-rules-frazrl-vid-corr
for ff in *; do sed '1{/^$/d}' $ff | cut -f2- > ../vert-test/$ff.2; done
cd ../../../Etalon/Verze/0.9/4/
for ff in *; do echo $ff; paste <(perl -pe 's/(\t.*)\t/$1 /' $ff) ../../../MorphoDiTa/vert-test/$ff.2 \
> ../../../MorphoDiTa/vert-test/$ff; done
grep -h '&' * | sort -u for ff in *; do perl -i -pe 's/%/%/g' $ff; done for ff in *; do perl -i -pe 's/&/&/g' $ff; done for ff in *; do perl -i -pe 's/*/*/g' $ff; done for ff in *; do perl -i -pe 's/[/[/g' $ff; done for ff in *; do perl -i -pe 's/]/]/g' $ff; done
cd ../../../MorphoDiTa/vert-test/
for ff in *.2; do tail -n3 ${ff%.2}; done | l