xquery version "3.1";
(:~
Saxon makes the input document the context object.
Call with `java -cp path/to/saxon-he-12.6.jar net.sf.saxon.Query -s:input.xml -q:transformation.xquery`
:)
declare namespace tgmd = "http://textgrid.info/namespaces/metadata/core/2010";
declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace saxon="http://saxon.sf.net/";
declare option saxon:output "method=text";
declare variable $metadataUrl external;
let $metadata := doc($metadataUrl)
let $project_id := string($metadata//tgmd:generic/tgmd:generated/tgmd:project/@id)
let $textgrid_uri := $metadata//tgmd:generic/tgmd:generated/tgmd:textgridUri => substring-after(":")
for $surface in ./tei:TEI/tei:sourceDoc/tei:surface[@n]
let $text := string($surface) => normalize-space()
where $text ne ""
where not(matches($text, '^\d{1,2}$'))
return (
map{
"index": $project_id,
"_id" : $textgrid_uri || "_" || string( $surface/@n )
} => serialize(map{"method": "json", "indent": false()}),
codepoints-to-string(10), (: line break :)
map{
"text": $text
} => serialize(map{"method": "json", "indent": false()}),
codepoints-to-string(10) (: line break :)
)
- Holder of rights
- SUB Göttingen