xquery version "3.1";

(:~
    Saxon makes the input document the context object.
    Call with `java -cp path/to/saxon-he-12.6.jar net.sf.saxon.Query -s:input.xml -q:transformation.xquery`
:)

declare namespace tgmd = "http://textgrid.info/namespaces/metadata/core/2010";
declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace saxon="http://saxon.sf.net/";
declare option saxon:output "method=text";

declare variable $metadataUrl external;

let $metadata := doc($metadataUrl)
let $project_id := string($metadata//tgmd:generic/tgmd:generated/tgmd:project/@id)
let $textgrid_uri := $metadata//tgmd:generic/tgmd:generated/tgmd:textgridUri => substring-after(":")

for $surface in ./tei:TEI/tei:sourceDoc/tei:surface[@n]
let $text := string($surface) => normalize-space()
where $text ne ""
where not(matches($text, '^\d{1,2}$'))
return (
    map{
        "index": $project_id,
        "_id" : $textgrid_uri || "_" || string( $surface/@n )
    } => serialize(map{"method": "json", "indent": false()}),
    codepoints-to-string(10), (: line break :)
    map{
        "text": $text
    } => serialize(map{"method": "json", "indent": false()}),
    codepoints-to-string(10) (: line break :)
)

      

Holder of rights
SUB Göttingen

Citation Suggestion for this Object
TextGrid Repository (2025). Die Kollektion. transformation.xquery. search-index-test. SUB Göttingen. https://hdl.handle.net/21.T11991/48q9z.0