<?xml version="1.0" encoding="UTF-8" ?>
<?xml-stylesheet type="text/xsl" href="https://webservices2.cls.ru.nl/asrservice/static/interface.xsl"?>
<clam xmlns:xlink="http://www.w3.org/1999/xlink" version="3.2.14" id="asrservice" name="Automatic Speech Recognition Service" user="anonymous" baseurl="https://webservices2.cls.ru.nl/asrservice" interfaceoptions="disableliveinput" authentication="oauth,basic">
    <description>An Automatic Speech Recognition Service for a variety of languages, powered by WhisperX</description>
    <version>0.3.1</version>
    <author>Maarten van Gompel</author>
<email>proycon@anaproy.nl</email>    <customhtml>
    <![CDATA[
    <p class="alert alert info"><strong>Data processing notice:</strong> All data you upload to this service and data obtained using this service will remain yours and is accessible only by you and our technical staff. Your data will not be shared with third parties and not be used for any purpose other than this service's operation. You can remove your projects at any time and are encouraged to do so, which will remove your data from our servers permanently. We can not guarantee any long-term storage of your data so you are recommended to download the results and store it yourself immediately; projects on the server will be automatically deleted after 30 days. Despite our security precautions, we do discourage use of this service for highly confidential material as there is no encryption on the storage. Last, we also collect some statistics on the frequency of use of this service, when shared this will always be anonymised.</p>
    ]]>
    </customhtml>
    <formats>
            <format id="WaveAudioFormat" name="Wave Audio File" mimetype="audio/vnd.wave" />
            <format id="PlainTextFormat" name="Plain Text Format" mimetype="text/plain" />
            <format id="JSONFormat" name="JSON Format (generic, not further specified)" mimetype="application/json" />
            <format id="CTMFormat" name="Conversation Time Marked File" mimetype="text/plain" />
            <format id="SubRipTextFormat" name="SubRip Text" mimetype="application/x-subrip" />
            <format id="WebVTTFormat" name="WebVTT" mimetype="text/vtt" />
            <format id="TSVFormat" name="Tab Separated Values" mimetype="text/tab-separated-values" />
            <format id="MP3AudioFormat" name="MP3 Audio File" mimetype="audio/mpeg" />
            <format id="MP4AudioFormat" name="MP4 Audio File" mimetype="audio/mpeg" />
            <format id="OggAudioFormat" name="Ogg Vorbis Audio File" mimetype="audio/vorbis" />
    </formats>
    <profiles>
            
        <profile>
         <input>
            <InputTemplate id="InputWavFile" format="WaveAudioFormat" label="Wav audio file" mimetype="audio/vnd.wave" extension="wav" optional="no" unique="no" acceptarchive="no">
            </InputTemplate>
         </input>
         <output>
            <OutputTemplate id="Transcription" format="PlainTextFormat" label="Plain text transcriptions without time stamps and speaker attribution" mimetype="text/plain" extension="txt" parent="InputWavFile" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="JSON" format="JSONFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="application/json" extension="json" parent="InputWavFile" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="CTM" format="CTMFormat" label="Transcription with full word segmentation/alignment" mimetype="text/plain" extension="ctm" parent="InputWavFile" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="SRT" format="SubRipTextFormat" label="Timed transcriptions with speaker attribution (srt)" mimetype="application/x-subrip" extension="srt" parent="InputWavFile" unique="no">
            </OutputTemplate>
            <OutputTemplate id="WebVTT" format="WebVTTFormat" label="Timed transcriptions with speaker attribution (WebVTT)" mimetype="text/vtt" extension="vtt" parent="InputWavFile" unique="no">
            </OutputTemplate>
            <OutputTemplate id="TSV" format="TSVFormat" label="Timed transcriptions with speaker attribution (TSV)" mimetype="text/tab-separated-values" extension="tsv" parent="InputWavFile" unique="no">
            </OutputTemplate>
            <OutputTemplate id="errorlog" format="PlainTextFormat" label="Log file with (standard) error output" mimetype="text/plain" filename="error.log" unique="yes">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
         </output>
        </profile>

            
        <profile>
         <input>
            <InputTemplate id="InputMP3File" format="MP3AudioFormat" label="MP3 audio file" mimetype="audio/mpeg" extension="mp3" optional="no" unique="no" acceptarchive="no">
            </InputTemplate>
         </input>
         <output>
            <OutputTemplate id="Transcription" format="PlainTextFormat" label="Plain text transcriptions without time stamps and speaker attribution" mimetype="text/plain" extension="txt" parent="InputMP3File" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="JSON" format="JSONFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="application/json" extension="json" parent="InputMP3File" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="CTM" format="CTMFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="text/plain" extension="ctm" parent="InputMP3File" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="SRT" format="SubRipTextFormat" label="Timed transcriptions with speaker attribution (srt)" mimetype="application/x-subrip" extension="srt" parent="InputMP3File" unique="no">
            </OutputTemplate>
            <OutputTemplate id="WebVTT" format="WebVTTFormat" label="Timed transcriptions with speaker attribution (WebVTT)" mimetype="text/vtt" extension="vtt" parent="InputMP3File" unique="no">
            </OutputTemplate>
            <OutputTemplate id="TSV" format="TSVFormat" label="Timed transcriptions with speaker attribution (TSV)" mimetype="text/tab-separated-values" extension="tsv" parent="InputMP3File" unique="no">
            </OutputTemplate>
            <OutputTemplate id="errorlog" format="PlainTextFormat" label="Log file with (standard) error output" mimetype="text/plain" filename="error.log" unique="yes">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
         </output>
        </profile>

            
        <profile>
         <input>
            <InputTemplate id="InputMP4File" format="MP4AudioFormat" label="MP4 audio file" mimetype="audio/mpeg" extension="mp4" optional="no" unique="no" acceptarchive="no">
            </InputTemplate>
         </input>
         <output>
            <OutputTemplate id="Transcription" format="PlainTextFormat" label="Plain text transcriptions without time stamps and speaker attribution" mimetype="text/plain" extension="txt" parent="InputMP4File" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="JSON" format="JSONFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="application/json" extension="json" parent="InputMP4File" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="CTM" format="CTMFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="text/plain" extension="ctm" parent="InputMP4File" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="SRT" format="SubRipTextFormat" label="Timed transcriptions with speaker attribution (srt)" mimetype="application/x-subrip" extension="srt" parent="InputMP4File" unique="no">
            </OutputTemplate>
            <OutputTemplate id="WebVTT" format="WebVTTFormat" label="Timed transcriptions with speaker attribution (WebVTT)" mimetype="text/vtt" extension="vtt" parent="InputMP4File" unique="no">
            </OutputTemplate>
            <OutputTemplate id="TSV" format="TSVFormat" label="Timed transcriptions with speaker attribution (TSV)" mimetype="text/tab-separated-values" extension="tsv" parent="InputMP4File" unique="no">
            </OutputTemplate>
            <OutputTemplate id="errorlog" format="PlainTextFormat" label="Log file with (standard) error output" mimetype="text/plain" filename="error.log" unique="yes">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
         </output>
        </profile>

            
        <profile>
         <input>
            <InputTemplate id="InputOggFile" format="OggAudioFormat" label="Ogg audio file" mimetype="audio/vorbis" extension="ogg" optional="no" unique="no" acceptarchive="no">
            </InputTemplate>
         </input>
         <output>
            <OutputTemplate id="Transcription" format="PlainTextFormat" label="Plain text transcriptions without time stamps and speaker attribution" mimetype="text/plain" extension="txt" parent="InputOggFile" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="JSON" format="JSONFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="application/json" extension="json" parent="InputOggFile" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="CTM" format="CTMFormat" label="Transcription with full word segmentation/alignment and speaker attribution" mimetype="text/plain" extension="ctm" parent="InputOggFile" unique="no">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
            <OutputTemplate id="SRT" format="SubRipTextFormat" label="Timed transcriptions with speaker attribution (srt)" mimetype="application/x-subrip" extension="srt" parent="InputOggFile" unique="no">
            </OutputTemplate>
            <OutputTemplate id="WebVTT" format="WebVTTFormat" label="Timed transcriptions with speaker attribution (WebVTT)" mimetype="text/vtt" extension="vtt" parent="InputOggFile" unique="no">
            </OutputTemplate>
            <OutputTemplate id="TSV" format="TSVFormat" label="Timed transcriptions with speaker attribution (TSV)" mimetype="text/tab-separated-values" extension="tsv" parent="InputOggFile" unique="no">
            </OutputTemplate>
            <OutputTemplate id="errorlog" format="PlainTextFormat" label="Log file with (standard) error output" mimetype="text/plain" filename="error.log" unique="yes">
                <meta id="encoding">utf-8</meta>
            </OutputTemplate>
         </output>
        </profile>

    </profiles>
    <parameters>
        <parametergroup name="Global">
                    <ChoiceParameter id="language" name="Language" description="The language to recognize" flag="-l"> <choice id="nl" selected="1">Dutch  / Nederlands</choice> <choice id="en">English</choice> <choice id="de">German / Deutsch</choice> <choice id="fr">French / Français</choice> <choice id="it">Italian / Italiano</choice> <choice id="ja">Japanese / 日本語</choice> <choice id="zh">Mandarin Chinese / 普通话</choice> <choice id="es">Spanish / Español</choice> <choice id="pt">Portuguese / Português</choice> <choice id="uk">Ukrainian / Українська</choice></ChoiceParameter>
                    <ChoiceParameter id="model" name="Model" description="The ASR model to use" flag="-m"> <choice id="tiny">tiny</choice> <choice id="small">small</choice> <choice id="medium">medium</choice> <choice id="large">large</choice> <choice id="large-v2" selected="1">large-v2</choice> <choice id="large-v3">large-v3</choice></ChoiceParameter>
                    <BooleanParameter id="gpu" name="GPU" description="Use GPU (improves performance but may not always be available)" flag="-g" default="1" value="True" />
        </parametergroup>
        <parametergroup name="Diarization">
                    <BooleanParameter id="diarization" name="Diarization" description="Enable speaker diarization?" flag="-d" />
                    <IntegerParameter id="minspeakers" name="Minimum speakers" description="Minimum number of speakers (this helps diarization)" flag="-s" />
                    <IntegerParameter id="maxspeakers" name="Maximum speakers" description="Minimum number of speakers (this helps diarization)" flag="-S" />
        </parametergroup>
    </parameters>
    <inputsources>
    </inputsources>
</clam>