我正在尝试在Microsoft Azure中构建一个管道(目前),该管道在输入中具有一个简单的python脚本。问题是我找不到我的输出。在我的笔记本部分中,我构造了以下两个代码:
1)脚本称为“ test.ipynb”
# azureml-core of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset, Datastore
import pandas as pd
import numpy as np
import datetime
import math
#Upload datasets
subscription_id = 'myid'
resource_group = 'myrg'
workspace_name = 'mywn'
workspace = Workspace(subscription_id, resource_group, workspace_name)
dataset_zre = Dataset.get_by_name(workspace, name='file1')
dataset_SLA = Dataset.get_by_name(workspace, name='file2')
df_zre = dataset_zre.to_pandas_dataframe()
df_SLA = dataset_SLA.to_pandas_dataframe()
result = pd.concat([df_SLA,df_zre], sort=True)
result.to_csv(path_or_buf="/mnt/azmnt/code/Users/aniello.spiezia/outputs/output.csv",index=False)
def_data_store = workspace.get_default_datastore()
def_data_store.upload(src_dir = '/mnt/azmnt/code/Users/aniello.spiezia/outputs', target_path = '/mnt/azmnt/code/Users/aniello.spiezia/outputs', overwrite = True)
print("\nFinished!")
#End of the file
2)称为“ pipeline.ipynb”的管道代码
import os
import pandas as pd
import json
import azureml.core
from azureml.core import Workspace, Run, Experiment, Datastore
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.runconfig import CondaDependencies, RunConfiguration
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.telemetry import set_diagnostics_collection
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline, PipelineData, StepSequence
print("SDK Version:", azureml.core.VERSION)
###############################
ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
'Subscription id: ' + ws.subscription_id,
'Resource group: ' + ws.resource_group, sep = '\n')
experiment_name = 'aml-pipeline-cicd' # choose a name for experiment
project_folder = '.' # project folder
experiment = Experiment(ws, experiment_name)
print("Location:", ws.location)
set_diagnostics_collection(send_diagnostics=True)
###############################
cd = CondaDependencies.create(pip_packages=["azureml-sdk==1.0.17", "azureml-train-automl==1.0.17", "pyculiarity", "pytictoc", "cryptography==2.5", "pandas"])
amlcompute_run_config = RunConfiguration(framework = "python", conda_dependencies = cd)
amlcompute_run_config.environment.docker.enabled = False
amlcompute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
amlcompute_run_config.environment.spark.precache_packages = False
###############################
aml_compute_target = "aml-compute"
try:
aml_compute = AmlCompute(ws, aml_compute_target)
print("found existing compute target.")
except:
print("creating new compute target")
provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
idle_seconds_before_scaledown=1800,
min_nodes = 0,
max_nodes = 4)
aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
print("Azure Machine Learning Compute attached")
###############################
def_data_store = ws.get_default_datastore()
def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))
# Naming the intermediate data as anomaly data and assigning it to a variable
output_data = PipelineData("output_data", datastore = def_blob_store)
print("output_data object created")
step = PythonScriptStep(name = "test",
script_name = "test.ipynb",
compute_target = aml_compute,
source_directory = project_folder,
allow_reuse = True,
runconfig = amlcompute_run_config)
print("Step created.")
###############################
steps = [step]
print("Step lists created")
pipeline = Pipeline(workspace = ws, steps = steps)
print ("Pipeline is built")
pipeline.validate()
print("Pipeline validation complete")
pipeline_run = experiment.submit(pipeline)
print("Pipeline is submitted for execution")
pipeline_run.wait_for_completion(show_output = False)
print("Pipeline run completed")
###############################
def_data_store.download(target_path = '.',
prefix = 'outputs',
show_progress = True,
overwrite = True)
model_fname = 'output.csv'
model_path = os.path.join("outputs", model_fname)
pipeline_run.upload_file(name = model_path, path_or_stream = model_path)
print('Uploaded the model {} to experiment {}'.format(model_fname, pipeline_run.experiment.name))
这给我以下错误:
Pipeline run completed
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-22-a8a523969bb3> in <module>
111
112 # Upload the model file explicitly into artifacts (for CI/CD)
--> 113 pipeline_run.upload_file(name = model_path, path_or_stream = model_path)
114 print('Uploaded the model {} to experiment {}'.format(model_fname, pipeline_run.experiment.name))
115
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/run.py in wrapped(self, *args, **kwargs)
47 "therefore, the {} cannot upload files, or log file backed metrics.".format(
48 self, self.__class__.__name__))
---> 49 return func(self, *args, **kwargs)
50 return wrapped
51
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/run.py in upload_file(self, name, path_or_stream)
1749 :rtype: azure.storage.blob.models.ResourceProperties
1750 """
-> 1751 return self._client.artifacts.upload_artifact(path_or_stream, RUN_ORIGIN, self._container, name)
1752
1753 @_check_for_data_container_id
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_restclient/artifacts_client.py in upload_artifact(self, artifact, *args, **kwargs)
108 if isinstance(artifact, str):
109 self._logger.debug("Uploading path artifact")
--> 110 return self.upload_artifact_from_path(artifact, *args, **kwargs)
111 elif isinstance(artifact, IOBase):
112 self._logger.debug("Uploading io artifact")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_restclient/artifacts_client.py in upload_artifact_from_path(self, path, *args, **kwargs)
100 path = os.path.normpath(path)
101 path = os.path.abspath(path)
--> 102 with open(path, "rb") as stream:
103 return self.upload_artifact_from_stream(stream, *args, **kwargs)
104
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/azmnt/code/Users/aniello.spiezia/outputs/output.csv'
您知道可能是什么问题吗?我特别希望将名为“ output.csv”的输出文件保存在某个位置
我正在尝试在Microsoft Azure中构建一个管道(目前),该管道在输入中具有一个简单的python脚本。问题是我找不到我的输出。在我的笔记本部分中,我构造了...
您执行此操作的最佳方法在一定程度上取决于运行完成后如何处理output.csv文件。但是,通常,您可以将csv写入./outputs文件夹:
[在上述Daniel的示例中,您需要从运行中下载输出,而不是从pipeline.ipynb代码中的数据存储下载。而不是调用def_data_store.download(),而是调用pipeline_run.download('outputs / output.csv','。')。