Both sides previous revisionPrevious revision | |
wiki:user:skoumal:infra:process_text_run [2025/10/07 11:34] – [Výroba vlastních korpusů] skoumal | wiki:user:skoumal:infra:process_text_run [2025/10/07 15:03] (current) – [Výroba vlastních korpusů] skoumal |
---|
* Příprava souborů pro vlastní korpus:<code>cd .../vert-corr | * Příprava souborů pro vlastní korpus:<code>cd .../vert-corr |
parallel-mask.sh -C "standoff2xml -c ../../../LEMUR/ann2standoff.ini -p frantalemur" -m ".*\.txt$" -v -p45 -d.</code>s FRANTOU:<code> | parallel-mask.sh -C "standoff2xml -c ../../../LEMUR/ann2standoff.ini -p frantalemur" -m ".*\.txt$" -v -p45 -d.</code>s FRANTOU:<code> |
for ff in *.ann.xml; do gg=${ff%.ann.xml}.txt; xml2vrt -c ../../../Frazeologie/ann2standoff.ini -p cnk2020lemur -P none $ff | perl -pe "s/^(<doc) .*>$/\1 id=\"$gg\">/"; done > /cnk/common/korpus/vertikaly/NEWTON2023/vertikala | for ff in *.ann.xml; do gg=${ff%.ann.xml}.txt; xml2vrt -c ../../../LEMUR/ann2standoff.ini -p cnk2020lemur -P none $ff | perl -pe "s/^(<doc) .*>$/\1 id=\"$gg\">/"; done > /cnk/common/korpus/vertikaly/<korpus>/vertikala |
</code>s FRANTOU a Lemurem:<code>for ff in *.ann.xml; do gg=${ff%.ann.xml}.txt; xml2vrt -c ../../../Frazeologie/ann2standoff.ini -p frantalemur $ff | perl -pe "s/^(<doc) .*>$/\1 id=\"$gg\">/"; done > /cnk/common/korpus/vertikaly/NEWTON2023/vertikala | </code>s FRANTOU a Lemurem:<code>for ff in *.ann.xml; do gg=${ff%.ann.xml}.txt; xml2vrt -c ../../../LEMUR/ann2standoff.ini -p frantalemur $ff | perl -pe "s/^(<doc) .*>$/\1 id=\"$gg\">/"; done > /cnk/common/korpus/vertikaly/<korpus>/vertikala |
</code> | </code> |
| |
| |