Source code for mmlspark.cognitive.SpeechToText

# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.


import sys
if sys.version >= '3':
    basestring = str

from pyspark.ml.param.shared import *
from pyspark import keyword_only
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
from mmlspark.core.serialize.java_params_patch import *
from pyspark.ml.wrapper import JavaTransformer, JavaEstimator, JavaModel
from pyspark.ml.common import inherit_doc
from mmlspark.core.schema.Utils import *
from mmlspark.core.schema.TypeConversionUtils import generateTypeConverter, complexTypeConverter

[docs]@inherit_doc
class SpeechToText(ComplexParamsMixin, JavaMLReadable, JavaMLWritable, JavaTransformer):
    """


    Args:

        audioData (object): The data sent to the service must be a .wav files
        concurrency (int): max number of concurrent calls (default: 1)
        concurrentTimeout (double): max number seconds to wait on futures if concurrency >= 1 (default: 100.0)
        errorCol (str): column to hold http errors (default: [self.uid]_error)
        format (object): Specifies the result format. Accepted values are simple and detailed. Default is simple.
        handler (object): Which strategy to use when handling requests (default: UserDefinedFunction(<function2>,StringType,None))
        language (object): Identifies the spoken language that is being recognized.
        outputCol (str): The name of the output column (default: [self.uid]_output)
        profanity (object): Specifies how to handle profanity in recognition results. Accepted values are masked, which replaces profanity with asterisks, removed, which remove all profanity from the result, or raw, which includes the profanity in the result. The default setting is masked.
        subscriptionKey (object): the API key to use
        timeout (double): number of seconds to wait before closing the connection (default: 60.0)
        url (str): Url of the service
    """

    @keyword_only
    def __init__(self, audioData=None, concurrency=1, concurrentTimeout=100.0, errorCol=None, format=None, handler=None, language=None, outputCol=None, profanity=None, subscriptionKey=None, timeout=60.0, url=None):
        super(SpeechToText, self).__init__()
        self._java_obj = self._new_java_obj("com.microsoft.ml.spark.cognitive.SpeechToText")
        self._cache = {}
        self.audioData = Param(self, "audioData", "audioData:  The data sent to the service must be a .wav files")
        self.concurrency = Param(self, "concurrency", "concurrency: max number of concurrent calls (default: 1)")
        self._setDefault(concurrency=1)
        self.concurrentTimeout = Param(self, "concurrentTimeout", "concurrentTimeout: max number seconds to wait on futures if concurrency >= 1 (default: 100.0)")
        self._setDefault(concurrentTimeout=100.0)
        self.errorCol = Param(self, "errorCol", "errorCol: column to hold http errors (default: [self.uid]_error)")
        self._setDefault(errorCol=self.uid + "_error")
        self.format = Param(self, "format", "format:  Specifies the result format. Accepted values are simple and detailed. Default is simple.")
        self.handler = Param(self, "handler", "handler: Which strategy to use when handling requests (default: UserDefinedFunction(<function2>,StringType,None))", generateTypeConverter("handler", self._cache, complexTypeConverter))
        self.language = Param(self, "language", "language:  Identifies the spoken language that is being recognized.")
        self.outputCol = Param(self, "outputCol", "outputCol: The name of the output column (default: [self.uid]_output)")
        self._setDefault(outputCol=self.uid + "_output")
        self.profanity = Param(self, "profanity", "profanity:  Specifies how to handle profanity in recognition results. Accepted values are masked, which replaces profanity with asterisks, removed, which remove all profanity from the result, or raw, which includes the profanity in the result. The default setting is masked.")
        self.subscriptionKey = Param(self, "subscriptionKey", "subscriptionKey: the API key to use")
        self.timeout = Param(self, "timeout", "timeout: number of seconds to wait before closing the connection (default: 60.0)")
        self._setDefault(timeout=60.0)
        self.url = Param(self, "url", "url: Url of the service")
        if hasattr(self, "_input_kwargs"):
            kwargs = self._input_kwargs
        else:
            kwargs = self.__init__._input_kwargs
        self.setParams(**kwargs)

[docs]    @keyword_only
    def setParams(self, audioData=None, concurrency=1, concurrentTimeout=100.0, errorCol=None, format=None, handler=None, language=None, outputCol=None, profanity=None, subscriptionKey=None, timeout=60.0, url=None):
        """
        Set the (keyword only) parameters

        Args:

            audioData (object): The data sent to the service must be a .wav files
            concurrency (int): max number of concurrent calls (default: 1)
            concurrentTimeout (double): max number seconds to wait on futures if concurrency >= 1 (default: 100.0)
            errorCol (str): column to hold http errors (default: [self.uid]_error)
            format (object): Specifies the result format. Accepted values are simple and detailed. Default is simple.
            handler (object): Which strategy to use when handling requests (default: UserDefinedFunction(<function2>,StringType,None))
            language (object): Identifies the spoken language that is being recognized.
            outputCol (str): The name of the output column (default: [self.uid]_output)
            profanity (object): Specifies how to handle profanity in recognition results. Accepted values are masked, which replaces profanity with asterisks, removed, which remove all profanity from the result, or raw, which includes the profanity in the result. The default setting is masked.
            subscriptionKey (object): the API key to use
            timeout (double): number of seconds to wait before closing the connection (default: 60.0)
            url (str): Url of the service
        """
        if hasattr(self, "_input_kwargs"):
            kwargs = self._input_kwargs
        else:
            kwargs = self.__init__._input_kwargs
        return self._set(**kwargs)

[docs]    def setAudioData(self, value):
        """

        Args:

            audioData (object): The data sent to the service must be a .wav files

        """
        self._java_obj = self._java_obj.setAudioData(value)
        return self


[docs]    def setAudioDataCol(self, value):
        """

        Args:

            audioData (object): The data sent to the service must be a .wav files

        """
        self._java_obj = self._java_obj.setAudioDataCol(value)
        return self




[docs]    def getAudioData(self):
        """

        Returns:

            object: The data sent to the service must be a .wav files
        """
        return self._cache.get("audioData", None)


[docs]    def setConcurrency(self, value):
        """

        Args:

            concurrency (int): max number of concurrent calls (default: 1)

        """
        self._set(concurrency=value)
        return self


[docs]    def getConcurrency(self):
        """

        Returns:

            int: max number of concurrent calls (default: 1)
        """
        return self.getOrDefault(self.concurrency)


[docs]    def setConcurrentTimeout(self, value):
        """

        Args:

            concurrentTimeout (double): max number seconds to wait on futures if concurrency >= 1 (default: 100.0)

        """
        self._set(concurrentTimeout=value)
        return self


[docs]    def getConcurrentTimeout(self):
        """

        Returns:

            double: max number seconds to wait on futures if concurrency >= 1 (default: 100.0)
        """
        return self.getOrDefault(self.concurrentTimeout)


[docs]    def setErrorCol(self, value):
        """

        Args:

            errorCol (str): column to hold http errors (default: [self.uid]_error)

        """
        self._set(errorCol=value)
        return self


[docs]    def getErrorCol(self):
        """

        Returns:

            str: column to hold http errors (default: [self.uid]_error)
        """
        return self.getOrDefault(self.errorCol)


[docs]    def setFormat(self, value):
        """

        Args:

            format (object): Specifies the result format. Accepted values are simple and detailed. Default is simple.

        """
        self._java_obj = self._java_obj.setFormat(value)
        return self


[docs]    def setFormatCol(self, value):
        """

        Args:

            format (object): Specifies the result format. Accepted values are simple and detailed. Default is simple.

        """
        self._java_obj = self._java_obj.setFormatCol(value)
        return self




[docs]    def getFormat(self):
        """

        Returns:

            object: Specifies the result format. Accepted values are simple and detailed. Default is simple.
        """
        return self._cache.get("format", None)


[docs]    def setHandler(self, value):
        """

        Args:

            handler (object): Which strategy to use when handling requests (default: UserDefinedFunction(<function2>,StringType,None))

        """
        self._set(handler=value)
        return self


[docs]    def getHandler(self):
        """

        Returns:

            object: Which strategy to use when handling requests (default: UserDefinedFunction(<function2>,StringType,None))
        """
        return self._cache.get("handler", None)


[docs]    def setLanguage(self, value):
        """

        Args:

            language (object): Identifies the spoken language that is being recognized.

        """
        self._java_obj = self._java_obj.setLanguage(value)
        return self


[docs]    def setLanguageCol(self, value):
        """

        Args:

            language (object): Identifies the spoken language that is being recognized.

        """
        self._java_obj = self._java_obj.setLanguageCol(value)
        return self




[docs]    def getLanguage(self):
        """

        Returns:

            object: Identifies the spoken language that is being recognized.
        """
        return self._cache.get("language", None)


[docs]    def setOutputCol(self, value):
        """

        Args:

            outputCol (str): The name of the output column (default: [self.uid]_output)

        """
        self._set(outputCol=value)
        return self


[docs]    def getOutputCol(self):
        """

        Returns:

            str: The name of the output column (default: [self.uid]_output)
        """
        return self.getOrDefault(self.outputCol)


[docs]    def setProfanity(self, value):
        """

        Args:

            profanity (object): Specifies how to handle profanity in recognition results. Accepted values are masked, which replaces profanity with asterisks, removed, which remove all profanity from the result, or raw, which includes the profanity in the result. The default setting is masked.

        """
        self._java_obj = self._java_obj.setProfanity(value)
        return self


[docs]    def setProfanityCol(self, value):
        """

        Args:

            profanity (object): Specifies how to handle profanity in recognition results. Accepted values are masked, which replaces profanity with asterisks, removed, which remove all profanity from the result, or raw, which includes the profanity in the result. The default setting is masked.

        """
        self._java_obj = self._java_obj.setProfanityCol(value)
        return self




[docs]    def getProfanity(self):
        """

        Returns:

            object: Specifies how to handle profanity in recognition results. Accepted values are masked, which replaces profanity with asterisks, removed, which remove all profanity from the result, or raw, which includes the profanity in the result. The default setting is masked.
        """
        return self._cache.get("profanity", None)


[docs]    def setSubscriptionKey(self, value):
        """

        Args:

            subscriptionKey (object): the API key to use

        """
        self._java_obj = self._java_obj.setSubscriptionKey(value)
        return self


[docs]    def setSubscriptionKeyCol(self, value):
        """

        Args:

            subscriptionKey (object): the API key to use

        """
        self._java_obj = self._java_obj.setSubscriptionKeyCol(value)
        return self




[docs]    def getSubscriptionKey(self):
        """

        Returns:

            object: the API key to use
        """
        return self._cache.get("subscriptionKey", None)


[docs]    def setTimeout(self, value):
        """

        Args:

            timeout (double): number of seconds to wait before closing the connection (default: 60.0)

        """
        self._set(timeout=value)
        return self


[docs]    def getTimeout(self):
        """

        Returns:

            double: number of seconds to wait before closing the connection (default: 60.0)
        """
        return self.getOrDefault(self.timeout)


[docs]    def setUrl(self, value):
        """

        Args:

            url (str): Url of the service

        """
        self._set(url=value)
        return self


[docs]    def getUrl(self):
        """

        Returns:

            str: Url of the service
        """
        return self.getOrDefault(self.url)




[docs]    def setLocation(self, value):
        self._java_obj = self._java_obj.setLocation(value)
        return self


[docs]    @classmethod
    def read(cls):
        """ Returns an MLReader instance for this class. """
        return JavaMMLReader(cls)

[docs]    @staticmethod
    def getJavaPackage():
        """ Returns package name String. """
        return "com.microsoft.ml.spark.cognitive.SpeechToText"

    @staticmethod
    def _from_java(java_stage):
        module_name=SpeechToText.__module__
        module_name=module_name.rsplit(".", 1)[0] + ".SpeechToText"
        return from_java(java_stage, module_name)