Python 脚本可以从命令行运行,但在 IIS 中从 C# 调用时失败

问题描述 投票:0回答:1

我有一个用 C# 编写并托管在 IIS 上的 MVC 应用程序。在我的 C# 代码中,我尝试调用 Python 脚本来访问 Scopus 网站并检索用户信息。从命令行运行时,Python 脚本运行良好,但当我从 C# 代码调用它时,它会抛出错误。我遇到一些权限问题,所以我手动创建一些文件夹(pip,python ...)并授予 IIS/用户权限。

Error in python script: Traceback (most recent call last):
  File "C:\inetpub\site\scopus.py", line 15, in <module>
    verify_success(sb)
  File "C:\inetpub\site\scopus.py", line 9, in verify_success
    sb.assert_element('//span[contains(text(), "Author Search")]', timeout=30)
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 9428, in assert_element
    self.wait_for_element_visible(selector, by=by, timeout=timeout)
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 8853, in wait_for_element_visible
    return page_actions.wait_for_element_visible(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 496, in wait_for_element_visible
    timeout_exception(NoSuchElementException, message)
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 254, in timeout_exception
    raise exc(msg)
seleniumbase.common.exceptions.NoSuchElementException: Message: 
 Element {//span[contains(text(), "Author Search")]} was not present after 30 seconds!


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\inetpub\site\scopus.py", line 24, in <module>
    verify_success(sb)
  File "C:\inetpub\site\scopus.py", line 9, in verify_success
    sb.assert_element('//span[contains(text(), "Author Search")]', timeout=30)
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 9428, in assert_element
    self.wait_for_element_visible(selector, by=by, timeout=timeout)
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 8853, in wait_for_element_visible
    return page_actions.wait_for_element_visible(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 496, in wait_for_element_visible
    timeout_exception(NoSuchElementException, message)
  File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 254, in timeout_exception
    raise exc(msg)
seleniumbase.common.exceptions.NoSuchElementException: Message: 
 Element {//span[contains(text(), "Author Search")]} was not present after 30 seconds!


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\inetpub\site\scopus.py", line 26, in <module>
    raise Exception("Detected!")
Exception: Detected!

Python代码

from seleniumbase import SB
from bs4 import BeautifulSoup
import sys
import json

def verify_success(sb):
    sb.assert_element('//span[contains(text(), "Author Search")]', timeout=10)
    sb.sleep(3)

userId = sys.argv[1]

with SB(uc=True) as sb:
    sb.uc_open_with_reconnect(f"https://www.scopus.com/authid/detail.uri?authorId={userId}", 3)
    try:
        verify_success(sb)
    except Exception:
        if sb.is_element_visible('input[value*="Verify"]'):
            sb.uc_click('input[value*="Verify"]')
        else:
            sb.uc_gui_click_captcha()
        try:
            verify_success(sb)
        except Exception:
            raise Exception("Detected!")
    finally:
        page_source = sb.get_page_source()
        document = BeautifulSoup(page_source, 'html.parser')
        citations_node = document.select_one("div[data-testid='metrics-section-citations-count'] span[data-testid='unclickable-count']")
        documents_node = document.select_one("div[data-testid='metrics-section-document-count'] span[data-testid='unclickable-count']")
        hindex_node = document.select_one("div[data-testid='metrics-section-h-index'] span[data-testid='unclickable-count']")
        name_node = document.select_one("h1[data-testid='author-profile-name'] strong")
        institute_node = document.select_one("span[data-testid='authorInstitute']")
        scopus_information = {
            "CitationsNumber": int(citations_node.text.replace(",", "") if citations_node else "0"),
            "Documents": int(documents_node.text if documents_node else "0"),
            "HIndex": int(hindex_node.text if hindex_node else "0"),
            "Name": name_node.text.strip() if name_node else "",
            "Institute": institute_node.text.strip() if institute_node else ""
        }
        print(json.dumps(scopus_information, indent=4))

c#代码:


public class ScopusInformation
{
    public string? Name { get; set; }
    public int? CitationsNumber { get; set; }
    public int? Documents { get; set; }
    public int? HIndex { get; set; }
    public string? Institute { get; set; }
    public string? ScopusId { get; set; }
}
public interface IScopusService
{
    Task<ScopusInformation?> GetArticlesForUser(string userId);
}
public class ScopusServiceUsingPython(ILogger<ScopusService> logger) : IScopusService
{
  public async Task<ScopusInformation?> GetArticlesForUser(string userId)
    {
        string cmd = "py";
        var result2 = await DoCmdAsync(cmd, $"scopus.py {userId}");
        ScopusInformation? r2 = JsonSerializer.Deserialize<ScopusInformation>(result2);
        return r2;
    }
 private async Task<string> DoCmdAsync(string cmd, string args)
    {
        logger.LogWarning("DoCmd in python script {cmd} {args}", cmd, args);
        try
        {
            // First, ensure required modules are installed
            await EnsurePythonModulesAsync(cmd, new[] { "seleniumbase", "beautifulsoup4", "pyautogui" });

            var start = new ProcessStartInfo
            {
                FileName = cmd, // cmd is full path to python.exe
                Arguments = args, // args is path to .py file and any cmd line args
                UseShellExecute = false,
                RedirectStandardOutput = true,
                RedirectStandardError = true,
                CreateNoWindow = true
            };

            using var process = new Process { StartInfo = start };
            process.Start();

            var outputTask = process.StandardOutput.ReadToEndAsync();
            var errorTask = process.StandardError.ReadToEndAsync();

            await Task.WhenAll(outputTask, errorTask);

            string result = await outputTask;
            string error = await errorTask;

            if (!process.WaitForExit(30000)) // 30 seconds timeout
            {
                process.Kill();
                throw new TimeoutException("Python script execution timed out after 30 seconds.");
            }

            if (!string.IsNullOrEmpty(error))
            {
                logger.LogWarning("Error in python script: {error}", error);
            }

            logger.LogWarning("Result in python script: {result}", result);
            return result;
        }
        catch (Exception ex)
        {
            logger.LogWarning("Exception in python script: {ex}", ex.ToString());
            return "";
        }
    }

    private async Task EnsurePythonModulesAsync(string pythonPath, string[] modules)
    {
        foreach (var module in modules)
        {
            logger.LogWarning("Checking Python module: {module}", module);
            var checkStart = new ProcessStartInfo
            {
                FileName = pythonPath,
                Arguments = $"-c \"import {module}\"",
                UseShellExecute = false,
                RedirectStandardOutput = true,
                RedirectStandardError = true,
                CreateNoWindow = true
            };

            using var checkProcess = new Process { StartInfo = checkStart };
            checkProcess.Start();
            if (!checkProcess.WaitForExit(10000)) // 10 seconds timeout
            {
                checkProcess.Kill();
                throw new TimeoutException($"Checking for Python module {module} timed out.");
            }

            if (checkProcess.ExitCode != 0)
            {
                logger.LogWarning("Installing missing Python module: {module}", module);
                var installStart = new ProcessStartInfo
                {
                    FileName = pythonPath,
                    Arguments = $"-m pip install {module}",
                    UseShellExecute = false,
                    RedirectStandardOutput = true,
                    RedirectStandardError = true,
                    CreateNoWindow = true
                };

                using var installProcess = new Process { StartInfo = installStart };
                installProcess.Start();

                var outputTask = installProcess.StandardOutput.ReadToEndAsync();
                var errorTask = installProcess.StandardError.ReadToEndAsync();

                if (await Task.WhenAny(Task.WhenAll(outputTask, errorTask), Task.Delay(300000)) == Task.Delay(300000)) // 5 minutes timeout
                {
                    installProcess.Kill();
                    throw new TimeoutException($"Installation of Python module {module} timed out after 5 minutes.");
                }

                string output = await outputTask;
                string error = await errorTask;

                if (installProcess.ExitCode != 0)
                {
                    throw new Exception($"Failed to install Python module {module}: {error}");
                }

                logger.LogWarning("Successfully installed Python module: {module}", module);
                logger.LogWarning("Installation output: {output}", output);
            }
            else
            {
                logger.LogWarning("Python module {module} is already installed.", module);
            }
        }
    }
}
python c# python-3.x asp.net-mvc iis
1个回答
0
投票

我在 C# 控制台应用程序中重新生成此错误,它有时会生成错误,有时会使用 0 数据,有时会正常工作

我只是优化你的Python脚本中的数字,以确保它能正常工作,然后它每次都能工作

scopus.py

from seleniumbase import SB
from bs4 import BeautifulSoup
import sys
import json

def verify_success(sb):
    sb.sleep(15) # give it some time to load the data
    sb.assert_element('//span[contains(text(), "Author Search")]', timeout=10)

userId = sys.argv[1]

with SB(uc=True) as sb:
    sb.uc_open_with_reconnect(f"https://www.scopus.com/authid/detail.uri?authorId={userId}", 10) # increase numbe of reconnect to 
    try:
        verify_success(sb)
    except Exception:
        if sb.is_element_visible('input[value*="Verify"]'):
            sb.uc_click('input[value*="Verify"]')
        else:
            sb.uc_gui_click_captcha()
        try:
            verify_success(sb)
        except Exception:
            raise Exception("Detected!")
    finally:
        page_source = sb.get_page_source()
        document = BeautifulSoup(page_source, 'html.parser')
        citations_node = document.select_one("div[data-testid='metrics-section-citations-count'] span[data-testid='unclickable-count']")
        documents_node = document.select_one("div[data-testid='metrics-section-document-count'] span[data-testid='unclickable-count']")
        hindex_node = document.select_one("div[data-testid='metrics-section-h-index'] span[data-testid='unclickable-count']")
        name_node = document.select_one("h1[data-testid='author-profile-name'] strong")
        institute_node = document.select_one("span[data-testid='authorInstitute']")
        scopus_information = {
            "CitationsNumber": int(citations_node.text.replace(",", "") if citations_node else "0"),
            "Documents": int(documents_node.text.replace(",", "") if documents_node else "0"),  # the fomate numbers with ,
            "HIndex": int(hindex_node.text.replace(",", "") if hindex_node else "0"),           # they fomat numbers with ,
            "Name": name_node.text.strip() if name_node else "",
            "Institute": institute_node.text.strip() if institute_node else ""
        }
        print(json.dumps(scopus_information, indent=4))

程序.cs

using Microsoft.Extensions.Logging;
using System.Diagnostics;
using System.Text.Json;

var loggerFactory = LoggerFactory.Create(builder =>
{
    builder.AddConsole();
});
var logger = loggerFactory.CreateLogger<ScopusServiceUsingPython>();
logger.LogInformation("Start logging");

IScopusService service = new ScopusServiceUsingPython(logger);

Stopwatch stopwatch = Stopwatch.StartNew();
var info = await service.GetArticlesForUser("57221186307");
stopwatch.Stop();
  
Console.WriteLine("Result on C#");
Console.WriteLine(JsonSerializer.Serialize(info));
Console.WriteLine($"finished in :{stopwatch.Elapsed}");
Console.ReadKey();



public class ScopusInformation
{
    public string? Name { get; set; }
    public int? CitationsNumber { get; set; }
    public int? Documents { get; set; }
    public int? HIndex { get; set; }
    public string? Institute { get; set; }
    public string? ScopusId { get; set; }
}
public interface IScopusService
{
    Task<ScopusInformation?> GetArticlesForUser(string userId);
}
public class ScopusServiceUsingPython(ILogger<ScopusServiceUsingPython> logger) : IScopusService
{
    public async Task<ScopusInformation?> GetArticlesForUser(string userId)
    {
        string cmd = "python";
        var result2 = await DoCmdAsync(cmd, $"scopus.py {userId}");
        ScopusInformation? r2 = JsonSerializer.Deserialize<ScopusInformation>(result2);
        return r2;
    }
    private async Task<string> DoCmdAsync(string cmd, string args) 
    {
        logger.LogWarning("DoCmd in python script {cmd} {args}", cmd, args);
        try
        {
            // First, ensure required modules are installed
            await EnsurePythonModulesAsync(cmd, new[] { "seleniumbase", "beautifulsoup4", "pyautogui" });

            var start = new ProcessStartInfo
            {
                FileName = cmd, // cmd is full path to python.exe
                Arguments = args, // args is path to .py file and any cmd line args
                UseShellExecute = false,
                RedirectStandardOutput = true,
                RedirectStandardError = true,
                CreateNoWindow = true
            };

            using var process = new Process { StartInfo = start };
            process.Start();

            var outputTask = process.StandardOutput.ReadToEndAsync();
            var errorTask = process.StandardError.ReadToEndAsync();

            await Task.WhenAll(outputTask, errorTask);

            string result = await outputTask;
            string error = await errorTask;

            if (!process.WaitForExit(30000)) // 30 seconds timeout
            {
                process.Kill();
                throw new TimeoutException("Python script execution timed out after 30 seconds.");
            }

            if (!string.IsNullOrEmpty(error))
            {
                logger.LogWarning("Error in python script: {error}", error);
            }

            logger.LogWarning("Result in python script: {result}", result);
            return result;
        }
        catch (Exception ex)
        {
            logger.LogWarning("Exception in python script: {ex}", ex.ToString());
            return "";
        }
    }

    private async Task EnsurePythonModulesAsync(string pythonPath, string[] modules)
    {
        foreach (var module in modules)
        {
            logger.LogWarning("Checking Python module: {module}", module);
            var checkStart = new ProcessStartInfo
            {
                FileName = pythonPath,
                Arguments = $"-c \"import {module}\"",
                UseShellExecute = false,
                RedirectStandardOutput = true,
                RedirectStandardError = true,
                CreateNoWindow = true
            };

            using var checkProcess = new Process { StartInfo = checkStart };
            checkProcess.Start();
            if (!checkProcess.WaitForExit(10000)) // 10 seconds timeout
            {
                checkProcess.Kill();
                throw new TimeoutException($"Checking for Python module {module} timed out.");
            }

            if (checkProcess.ExitCode != 0)
            {
                logger.LogWarning("Installing missing Python module: {module}", module);
                var installStart = new ProcessStartInfo
                {
                    FileName = pythonPath,
                    Arguments = $"-m pip install {module}",
                    UseShellExecute = false,
                    RedirectStandardOutput = true,
                    RedirectStandardError = true,
                    CreateNoWindow = true
                };

                using var installProcess = new Process { StartInfo = installStart };
                installProcess.Start();

                var outputTask = installProcess.StandardOutput.ReadToEndAsync();
                var errorTask = installProcess.StandardError.ReadToEndAsync();

                if (await Task.WhenAny(Task.WhenAll(outputTask, errorTask), Task.Delay(300000)) == Task.Delay(300000)) // 5 minutes timeout
                {
                    installProcess.Kill();
                    throw new TimeoutException($"Installation of Python module {module} timed out after 5 minutes.");
                }

                string output = await outputTask;
                string error = await errorTask;

                if (installProcess.ExitCode != 0)
                {
                    throw new Exception($"Failed to install Python module {module}: {error}");
                }

                logger.LogWarning("Successfully installed Python module: {module}", module);
                logger.LogWarning("Installation output: {output}", output);
            }
            else
            {
                logger.LogWarning("Python module {module} is already installed.", module);
            }
        }
    }
}

这是我的 Windows 计算机上的结果,使用这个数字大约需要 41 秒来废弃数据

© www.soinside.com 2019 - 2024. All rights reserved.