Source code for mmlspark.downloader.ModelDownloader

# Copyright (C) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE in project root for information.

import sys

if sys.version >= '3':
    basestring = str

from pyspark.ml.param.shared import *
from mmlspark.core.schema.Utils import *

DEFAULT_URL = "https://mmlspark.azureedge.net/datasets/CNTKModels/"


[docs]class ModelSchema: """ An object that represents a model. Args: name (str): Name of the model dataset (DataFrame): Dataset it was trained on modelType (str): Domain that the model operates on uri (str): The location of the model's bytes hash (str): The sha256 hash of the models bytes size (int): the size of the model in bytes inputNode (int): the node which represents the input numLayers (int): the number of layers of the model layerNames (array): the names of nodes that represent layers in the network """ def __init__(self, name, dataset, modelType, uri, hash, size, inputNode, numLayers, layerNames): self.name = name self.dataset = dataset self.modelType = modelType self.uri = uri self.hash = hash self.size = size self.inputNode = inputNode self.numLayers = numLayers self.layerNames = layerNames def __str__(self): return self.__repr__() def __repr__(self): return "ModelSchema<name: {}, dataset: {}, loc: {}>".format(self.name, self.dataset, self.uri)
[docs] def toJava(self, sparkSession): ctx = sparkSession.sparkContext uri = ctx._jvm.java.net.URI(self.uri) return ctx._jvm.com.microsoft.ml.spark.downloader.ModelSchema( self.name, self.dataset, self.modelType, uri, self.hash, self.size, self.inputNode, self.numLayers, self.layerNames)
[docs] @staticmethod def fromJava(jobj): return ModelSchema(jobj.name(), jobj.dataset(), jobj.modelType(), jobj.uri().toString(), jobj.hash(), jobj.size(), jobj.inputNode(), jobj.numLayers(), list(jobj.layerNames()))
[docs]class ModelDownloader: """ A class for downloading CNTK pretrained models in python. To download all models use the downloadModels function. To browse models from the microsoft server please use remoteModels. Args: sparkSession (SparkSession): A spark session for interfacing between python and java localPath (str): The folder to save models to serverURL (str): The location of the model Server, beware this default can change! """ def __init__(self, sparkSession, localPath, serverURL=DEFAULT_URL): self.localPath = localPath self.serverURL = serverURL self._sparkSession = sparkSession self._ctx = sparkSession.sparkContext self._model_downloader = self._ctx._jvm.com.microsoft.ml.spark.downloader.ModelDownloader( sparkSession._jsparkSession, localPath, serverURL) def _wrap(self, iter): return (ModelSchema.fromJava(s) for s in iter)
[docs] def localModels(self): """ Downloads models stored locally on the filesystem """ return self._wrap(self._model_downloader.localModels())
[docs] def remoteModels(self): """ Downloads models stored remotely. """ return self._wrap(self._model_downloader.remoteModels())
[docs] def downloadModel(self, model): """ Download a model Args: model (object): The model to be downloaded Returns: object: model schema """ model = model.toJava(self._sparkSession) return ModelSchema.fromJava(self._model_downloader.downloadModel(model))
[docs] def downloadByName(self, name): """ Downloads a named model Args: name (str): The name of the model """ return ModelSchema.fromJava(self._model_downloader.downloadByName(name))
[docs] def downloadModels(self, models=None): """ Download models Args: models: The models to be downloaded Returns: list: list of models downloaded """ if models is None: models = self.remoteModels() models = (m.toJava(self._sparkSession) for m in models) return list(self._wrap(self._model_downloader.downloadModels(models)))