Source code for synapse.ml.cognitive.vision.ReadImage

# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.


import sys
if sys.version >= '3':
    basestring = str

from pyspark import SparkContext, SQLContext
from pyspark.sql import DataFrame
from pyspark.ml.param.shared import *
from pyspark import keyword_only
from pyspark.ml.util import JavaMLReadable, JavaMLWritable
from synapse.ml.core.platform import running_on_synapse_internal
from synapse.ml.core.serialize.java_params_patch import *
from pyspark.ml.wrapper import JavaTransformer, JavaEstimator, JavaModel
from pyspark.ml.evaluation import JavaEvaluator
from pyspark.ml.common import inherit_doc
from synapse.ml.core.schema.Utils import *
from pyspark.ml.param import TypeConverters
from synapse.ml.core.schema.TypeConversionUtils import generateTypeConverter, complexTypeConverter


[docs]@inherit_doc class ReadImage(ComplexParamsMixin, JavaMLReadable, JavaMLWritable, JavaTransformer): """ Args: AADToken (object): AAD Token used for authentication CustomAuthHeader (object): A Custom Value for Authorization Header backoffs (list): array of backoffs to use in the handler concurrency (int): max number of concurrent calls concurrentTimeout (float): max number seconds to wait on futures if concurrency >= 1 errorCol (str): column to hold http errors imageBytes (object): bytestream of the image to use imageUrl (object): the url of the image to use initialPollingDelay (int): number of milliseconds to wait before first poll for result language (object): IThe BCP-47 language code of the text in the document. Currently, only English (en), Dutch (nl), French (fr), German (de), Italian (it), Portuguese (pt), and Spanish (es) are supported. Read supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language. maxPollingRetries (int): number of times to poll outputCol (str): The name of the output column pollingDelay (int): number of milliseconds to wait between polling subscriptionKey (object): the API key to use suppressMaxRetriesException (bool): set true to suppress the maxumimum retries exception and report in the error column timeout (float): number of seconds to wait before closing the connection url (str): Url of the service """ AADToken = Param(Params._dummy(), "AADToken", "ServiceParam: AAD Token used for authentication") CustomAuthHeader = Param(Params._dummy(), "CustomAuthHeader", "ServiceParam: A Custom Value for Authorization Header") backoffs = Param(Params._dummy(), "backoffs", "array of backoffs to use in the handler", typeConverter=TypeConverters.toListInt) concurrency = Param(Params._dummy(), "concurrency", "max number of concurrent calls", typeConverter=TypeConverters.toInt) concurrentTimeout = Param(Params._dummy(), "concurrentTimeout", "max number seconds to wait on futures if concurrency >= 1", typeConverter=TypeConverters.toFloat) errorCol = Param(Params._dummy(), "errorCol", "column to hold http errors", typeConverter=TypeConverters.toString) imageBytes = Param(Params._dummy(), "imageBytes", "ServiceParam: bytestream of the image to use") imageUrl = Param(Params._dummy(), "imageUrl", "ServiceParam: the url of the image to use") initialPollingDelay = Param(Params._dummy(), "initialPollingDelay", "number of milliseconds to wait before first poll for result", typeConverter=TypeConverters.toInt) language = Param(Params._dummy(), "language", "ServiceParam: IThe BCP-47 language code of the text in the document. Currently, only English (en), Dutch (nl), French (fr), German (de), Italian (it), Portuguese (pt), and Spanish (es) are supported. Read supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language.") maxPollingRetries = Param(Params._dummy(), "maxPollingRetries", "number of times to poll", typeConverter=TypeConverters.toInt) outputCol = Param(Params._dummy(), "outputCol", "The name of the output column", typeConverter=TypeConverters.toString) pollingDelay = Param(Params._dummy(), "pollingDelay", "number of milliseconds to wait between polling", typeConverter=TypeConverters.toInt) subscriptionKey = Param(Params._dummy(), "subscriptionKey", "ServiceParam: the API key to use") suppressMaxRetriesException = Param(Params._dummy(), "suppressMaxRetriesException", "set true to suppress the maxumimum retries exception and report in the error column", typeConverter=TypeConverters.toBoolean) timeout = Param(Params._dummy(), "timeout", "number of seconds to wait before closing the connection", typeConverter=TypeConverters.toFloat) url = Param(Params._dummy(), "url", "Url of the service", typeConverter=TypeConverters.toString) @keyword_only def __init__( self, java_obj=None, AADToken=None, AADTokenCol=None, CustomAuthHeader=None, CustomAuthHeaderCol=None, backoffs=[100,500,1000], concurrency=1, concurrentTimeout=None, errorCol="ReadImage_8f94f9e5ef2a_error", imageBytes=None, imageBytesCol=None, imageUrl=None, imageUrlCol=None, initialPollingDelay=300, language=None, languageCol=None, maxPollingRetries=1000, outputCol="ReadImage_8f94f9e5ef2a_output", pollingDelay=300, subscriptionKey=None, subscriptionKeyCol=None, suppressMaxRetriesException=False, timeout=60.0, url=None ): super(ReadImage, self).__init__() if java_obj is None: self._java_obj = self._new_java_obj("com.microsoft.azure.synapse.ml.cognitive.vision.ReadImage", self.uid) else: self._java_obj = java_obj self._setDefault(backoffs=[100,500,1000]) self._setDefault(concurrency=1) self._setDefault(errorCol="ReadImage_8f94f9e5ef2a_error") self._setDefault(initialPollingDelay=300) self._setDefault(maxPollingRetries=1000) self._setDefault(outputCol="ReadImage_8f94f9e5ef2a_output") self._setDefault(pollingDelay=300) self._setDefault(suppressMaxRetriesException=False) self._setDefault(timeout=60.0) if hasattr(self, "_input_kwargs"): kwargs = self._input_kwargs else: kwargs = self.__init__._input_kwargs if java_obj is None: for k,v in kwargs.items(): if v is not None: getattr(self, "set" + k[0].upper() + k[1:])(v)
[docs] @keyword_only def setParams( self, AADToken=None, AADTokenCol=None, CustomAuthHeader=None, CustomAuthHeaderCol=None, backoffs=[100,500,1000], concurrency=1, concurrentTimeout=None, errorCol="ReadImage_8f94f9e5ef2a_error", imageBytes=None, imageBytesCol=None, imageUrl=None, imageUrlCol=None, initialPollingDelay=300, language=None, languageCol=None, maxPollingRetries=1000, outputCol="ReadImage_8f94f9e5ef2a_output", pollingDelay=300, subscriptionKey=None, subscriptionKeyCol=None, suppressMaxRetriesException=False, timeout=60.0, url=None ): """ Set the (keyword only) parameters """ if hasattr(self, "_input_kwargs"): kwargs = self._input_kwargs else: kwargs = self.__init__._input_kwargs return self._set(**kwargs)
[docs] @classmethod def read(cls): """ Returns an MLReader instance for this class. """ return JavaMMLReader(cls)
[docs] @staticmethod def getJavaPackage(): """ Returns package name String. """ return "com.microsoft.azure.synapse.ml.cognitive.vision.ReadImage"
@staticmethod def _from_java(java_stage): module_name=ReadImage.__module__ module_name=module_name.rsplit(".", 1)[0] + ".ReadImage" return from_java(java_stage, module_name)
[docs] def setAADToken(self, value): """ Args: AADToken: AAD Token used for authentication """ if isinstance(value, list): value = SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.param.ServiceParam.toSeq(value) self._java_obj = self._java_obj.setAADToken(value) return self
[docs] def setAADTokenCol(self, value): """ Args: AADToken: AAD Token used for authentication """ self._java_obj = self._java_obj.setAADTokenCol(value) return self
[docs] def setCustomAuthHeader(self, value): """ Args: CustomAuthHeader: A Custom Value for Authorization Header """ if isinstance(value, list): value = SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.param.ServiceParam.toSeq(value) self._java_obj = self._java_obj.setCustomAuthHeader(value) return self
[docs] def setCustomAuthHeaderCol(self, value): """ Args: CustomAuthHeader: A Custom Value for Authorization Header """ self._java_obj = self._java_obj.setCustomAuthHeaderCol(value) return self
[docs] def setBackoffs(self, value): """ Args: backoffs: array of backoffs to use in the handler """ self._set(backoffs=value) return self
[docs] def setConcurrency(self, value): """ Args: concurrency: max number of concurrent calls """ self._set(concurrency=value) return self
[docs] def setConcurrentTimeout(self, value): """ Args: concurrentTimeout: max number seconds to wait on futures if concurrency >= 1 """ self._set(concurrentTimeout=value) return self
[docs] def setErrorCol(self, value): """ Args: errorCol: column to hold http errors """ self._set(errorCol=value) return self
[docs] def setImageBytes(self, value): """ Args: imageBytes: bytestream of the image to use """ if isinstance(value, list): value = SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.param.ServiceParam.toSeq(value) self._java_obj = self._java_obj.setImageBytes(value) return self
[docs] def setImageBytesCol(self, value): """ Args: imageBytes: bytestream of the image to use """ self._java_obj = self._java_obj.setImageBytesCol(value) return self
[docs] def setImageUrl(self, value): """ Args: imageUrl: the url of the image to use """ if isinstance(value, list): value = SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.param.ServiceParam.toSeq(value) self._java_obj = self._java_obj.setImageUrl(value) return self
[docs] def setImageUrlCol(self, value): """ Args: imageUrl: the url of the image to use """ self._java_obj = self._java_obj.setImageUrlCol(value) return self
[docs] def setInitialPollingDelay(self, value): """ Args: initialPollingDelay: number of milliseconds to wait before first poll for result """ self._set(initialPollingDelay=value) return self
[docs] def setLanguage(self, value): """ Args: language: IThe BCP-47 language code of the text in the document. Currently, only English (en), Dutch (nl), French (fr), German (de), Italian (it), Portuguese (pt), and Spanish (es) are supported. Read supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language. """ if isinstance(value, list): value = SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.param.ServiceParam.toSeq(value) self._java_obj = self._java_obj.setLanguage(value) return self
[docs] def setLanguageCol(self, value): """ Args: language: IThe BCP-47 language code of the text in the document. Currently, only English (en), Dutch (nl), French (fr), German (de), Italian (it), Portuguese (pt), and Spanish (es) are supported. Read supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language. """ self._java_obj = self._java_obj.setLanguageCol(value) return self
[docs] def setMaxPollingRetries(self, value): """ Args: maxPollingRetries: number of times to poll """ self._set(maxPollingRetries=value) return self
[docs] def setOutputCol(self, value): """ Args: outputCol: The name of the output column """ self._set(outputCol=value) return self
[docs] def setPollingDelay(self, value): """ Args: pollingDelay: number of milliseconds to wait between polling """ self._set(pollingDelay=value) return self
[docs] def setSubscriptionKey(self, value): """ Args: subscriptionKey: the API key to use """ if isinstance(value, list): value = SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.param.ServiceParam.toSeq(value) self._java_obj = self._java_obj.setSubscriptionKey(value) return self
[docs] def setSubscriptionKeyCol(self, value): """ Args: subscriptionKey: the API key to use """ self._java_obj = self._java_obj.setSubscriptionKeyCol(value) return self
[docs] def setSuppressMaxRetriesException(self, value): """ Args: suppressMaxRetriesException: set true to suppress the maxumimum retries exception and report in the error column """ self._set(suppressMaxRetriesException=value) return self
[docs] def setTimeout(self, value): """ Args: timeout: number of seconds to wait before closing the connection """ self._set(timeout=value) return self
[docs] def setUrl(self, value): """ Args: url: Url of the service """ self._set(url=value) return self
[docs] def getAADToken(self): """ Returns: AADToken: AAD Token used for authentication """ return self._java_obj.getAADToken()
[docs] def getCustomAuthHeader(self): """ Returns: CustomAuthHeader: A Custom Value for Authorization Header """ return self._java_obj.getCustomAuthHeader()
[docs] def getBackoffs(self): """ Returns: backoffs: array of backoffs to use in the handler """ return self.getOrDefault(self.backoffs)
[docs] def getConcurrency(self): """ Returns: concurrency: max number of concurrent calls """ return self.getOrDefault(self.concurrency)
[docs] def getConcurrentTimeout(self): """ Returns: concurrentTimeout: max number seconds to wait on futures if concurrency >= 1 """ return self.getOrDefault(self.concurrentTimeout)
[docs] def getErrorCol(self): """ Returns: errorCol: column to hold http errors """ return self.getOrDefault(self.errorCol)
[docs] def getImageBytes(self): """ Returns: imageBytes: bytestream of the image to use """ return self._java_obj.getImageBytes()
[docs] def getImageUrl(self): """ Returns: imageUrl: the url of the image to use """ return self._java_obj.getImageUrl()
[docs] def getInitialPollingDelay(self): """ Returns: initialPollingDelay: number of milliseconds to wait before first poll for result """ return self.getOrDefault(self.initialPollingDelay)
[docs] def getLanguage(self): """ Returns: language: IThe BCP-47 language code of the text in the document. Currently, only English (en), Dutch (nl), French (fr), German (de), Italian (it), Portuguese (pt), and Spanish (es) are supported. Read supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language. """ return self._java_obj.getLanguage()
[docs] def getMaxPollingRetries(self): """ Returns: maxPollingRetries: number of times to poll """ return self.getOrDefault(self.maxPollingRetries)
[docs] def getOutputCol(self): """ Returns: outputCol: The name of the output column """ return self.getOrDefault(self.outputCol)
[docs] def getPollingDelay(self): """ Returns: pollingDelay: number of milliseconds to wait between polling """ return self.getOrDefault(self.pollingDelay)
[docs] def getSubscriptionKey(self): """ Returns: subscriptionKey: the API key to use """ return self._java_obj.getSubscriptionKey()
[docs] def getSuppressMaxRetriesException(self): """ Returns: suppressMaxRetriesException: set true to suppress the maxumimum retries exception and report in the error column """ return self.getOrDefault(self.suppressMaxRetriesException)
[docs] def getTimeout(self): """ Returns: timeout: number of seconds to wait before closing the connection """ return self.getOrDefault(self.timeout)
[docs] def getUrl(self): """ Returns: url: Url of the service """ return self.getOrDefault(self.url)
[docs] def setCustomServiceName(self, value): self._java_obj = self._java_obj.setCustomServiceName(value) return self
[docs] def setEndpoint(self, value): self._java_obj = self._java_obj.setEndpoint(value) return self
[docs] def setDefaultInternalEndpoint(self, value): self._java_obj = self._java_obj.setDefaultInternalEndpoint(value) return self
def _transform(self, dataset: DataFrame) -> DataFrame: if running_on_synapse_internal(): try: from synapse.ml.fabric.token_utils import TokenUtils from synapse.ml.fabric.service_discovery import get_fabric_env_config fabric_env_config = get_fabric_env_config().fabric_env_config if self._java_obj.getInternalServiceType() != "openai": self._java_obj.setDefaultAADToken(TokenUtils().get_aad_token()) else: self._java_obj.setDefaultCustomAuthHeader(TokenUtils().get_openai_auth_header()) self.setDefaultInternalEndpoint(fabric_env_config.get_mlflow_workload_endpoint()) except ModuleNotFoundError as e: pass return super()._transform(dataset)
[docs] def setLocation(self, value): self._java_obj = self._java_obj.setLocation(value) return self
[docs] def setLinkedService(self, value): self._java_obj = self._java_obj.setLinkedService(value) return self