我有一个用 C# 编写并托管在 IIS 上的 MVC 应用程序。在我的 C# 代码中,我尝试调用 Python 脚本来访问 Scopus 网站并检索用户信息。从命令行运行时,Python 脚本运行良好,但当我从 C# 代码调用它时,它会抛出错误。我遇到一些权限问题,所以我手动创建一些文件夹(pip,python ...)并授予 IIS/用户权限。
Error in python script: Traceback (most recent call last):
File "C:\inetpub\site\scopus.py", line 15, in <module>
verify_success(sb)
File "C:\inetpub\site\scopus.py", line 9, in verify_success
sb.assert_element('//span[contains(text(), "Author Search")]', timeout=30)
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 9428, in assert_element
self.wait_for_element_visible(selector, by=by, timeout=timeout)
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 8853, in wait_for_element_visible
return page_actions.wait_for_element_visible(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 496, in wait_for_element_visible
timeout_exception(NoSuchElementException, message)
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 254, in timeout_exception
raise exc(msg)
seleniumbase.common.exceptions.NoSuchElementException: Message:
Element {//span[contains(text(), "Author Search")]} was not present after 30 seconds!
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\inetpub\site\scopus.py", line 24, in <module>
verify_success(sb)
File "C:\inetpub\site\scopus.py", line 9, in verify_success
sb.assert_element('//span[contains(text(), "Author Search")]', timeout=30)
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 9428, in assert_element
self.wait_for_element_visible(selector, by=by, timeout=timeout)
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\base_case.py", line 8853, in wait_for_element_visible
return page_actions.wait_for_element_visible(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 496, in wait_for_element_visible
timeout_exception(NoSuchElementException, message)
File "C:\Windows\system32\config\systemprofile\AppData\Roaming\Python\Python312\site-packages\seleniumbase\fixtures\page_actions.py", line 254, in timeout_exception
raise exc(msg)
seleniumbase.common.exceptions.NoSuchElementException: Message:
Element {//span[contains(text(), "Author Search")]} was not present after 30 seconds!
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\inetpub\site\scopus.py", line 26, in <module>
raise Exception("Detected!")
Exception: Detected!
Python代码
from seleniumbase import SB
from bs4 import BeautifulSoup
import sys
import json
def verify_success(sb):
sb.assert_element('//span[contains(text(), "Author Search")]', timeout=10)
sb.sleep(3)
userId = sys.argv[1]
with SB(uc=True) as sb:
sb.uc_open_with_reconnect(f"https://www.scopus.com/authid/detail.uri?authorId={userId}", 3)
try:
verify_success(sb)
except Exception:
if sb.is_element_visible('input[value*="Verify"]'):
sb.uc_click('input[value*="Verify"]')
else:
sb.uc_gui_click_captcha()
try:
verify_success(sb)
except Exception:
raise Exception("Detected!")
finally:
page_source = sb.get_page_source()
document = BeautifulSoup(page_source, 'html.parser')
citations_node = document.select_one("div[data-testid='metrics-section-citations-count'] span[data-testid='unclickable-count']")
documents_node = document.select_one("div[data-testid='metrics-section-document-count'] span[data-testid='unclickable-count']")
hindex_node = document.select_one("div[data-testid='metrics-section-h-index'] span[data-testid='unclickable-count']")
name_node = document.select_one("h1[data-testid='author-profile-name'] strong")
institute_node = document.select_one("span[data-testid='authorInstitute']")
scopus_information = {
"CitationsNumber": int(citations_node.text.replace(",", "") if citations_node else "0"),
"Documents": int(documents_node.text if documents_node else "0"),
"HIndex": int(hindex_node.text if hindex_node else "0"),
"Name": name_node.text.strip() if name_node else "",
"Institute": institute_node.text.strip() if institute_node else ""
}
print(json.dumps(scopus_information, indent=4))
c#代码:
public class ScopusInformation
{
public string? Name { get; set; }
public int? CitationsNumber { get; set; }
public int? Documents { get; set; }
public int? HIndex { get; set; }
public string? Institute { get; set; }
public string? ScopusId { get; set; }
}
public interface IScopusService
{
Task<ScopusInformation?> GetArticlesForUser(string userId);
}
public class ScopusServiceUsingPython(ILogger<ScopusService> logger) : IScopusService
{
public async Task<ScopusInformation?> GetArticlesForUser(string userId)
{
string cmd = "py";
var result2 = await DoCmdAsync(cmd, $"scopus.py {userId}");
ScopusInformation? r2 = JsonSerializer.Deserialize<ScopusInformation>(result2);
return r2;
}
private async Task<string> DoCmdAsync(string cmd, string args)
{
logger.LogWarning("DoCmd in python script {cmd} {args}", cmd, args);
try
{
// First, ensure required modules are installed
await EnsurePythonModulesAsync(cmd, new[] { "seleniumbase", "beautifulsoup4", "pyautogui" });
var start = new ProcessStartInfo
{
FileName = cmd, // cmd is full path to python.exe
Arguments = args, // args is path to .py file and any cmd line args
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true
};
using var process = new Process { StartInfo = start };
process.Start();
var outputTask = process.StandardOutput.ReadToEndAsync();
var errorTask = process.StandardError.ReadToEndAsync();
await Task.WhenAll(outputTask, errorTask);
string result = await outputTask;
string error = await errorTask;
if (!process.WaitForExit(30000)) // 30 seconds timeout
{
process.Kill();
throw new TimeoutException("Python script execution timed out after 30 seconds.");
}
if (!string.IsNullOrEmpty(error))
{
logger.LogWarning("Error in python script: {error}", error);
}
logger.LogWarning("Result in python script: {result}", result);
return result;
}
catch (Exception ex)
{
logger.LogWarning("Exception in python script: {ex}", ex.ToString());
return "";
}
}
private async Task EnsurePythonModulesAsync(string pythonPath, string[] modules)
{
foreach (var module in modules)
{
logger.LogWarning("Checking Python module: {module}", module);
var checkStart = new ProcessStartInfo
{
FileName = pythonPath,
Arguments = $"-c \"import {module}\"",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true
};
using var checkProcess = new Process { StartInfo = checkStart };
checkProcess.Start();
if (!checkProcess.WaitForExit(10000)) // 10 seconds timeout
{
checkProcess.Kill();
throw new TimeoutException($"Checking for Python module {module} timed out.");
}
if (checkProcess.ExitCode != 0)
{
logger.LogWarning("Installing missing Python module: {module}", module);
var installStart = new ProcessStartInfo
{
FileName = pythonPath,
Arguments = $"-m pip install {module}",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true
};
using var installProcess = new Process { StartInfo = installStart };
installProcess.Start();
var outputTask = installProcess.StandardOutput.ReadToEndAsync();
var errorTask = installProcess.StandardError.ReadToEndAsync();
if (await Task.WhenAny(Task.WhenAll(outputTask, errorTask), Task.Delay(300000)) == Task.Delay(300000)) // 5 minutes timeout
{
installProcess.Kill();
throw new TimeoutException($"Installation of Python module {module} timed out after 5 minutes.");
}
string output = await outputTask;
string error = await errorTask;
if (installProcess.ExitCode != 0)
{
throw new Exception($"Failed to install Python module {module}: {error}");
}
logger.LogWarning("Successfully installed Python module: {module}", module);
logger.LogWarning("Installation output: {output}", output);
}
else
{
logger.LogWarning("Python module {module} is already installed.", module);
}
}
}
}
我在 C# 控制台应用程序中重新生成此错误,它有时会生成错误,有时会使用 0 数据,有时会正常工作
我只是优化你的Python脚本中的数字,以确保它能正常工作,然后它每次都能工作
scopus.py
from seleniumbase import SB
from bs4 import BeautifulSoup
import sys
import json
def verify_success(sb):
sb.sleep(15) # give it some time to load the data
sb.assert_element('//span[contains(text(), "Author Search")]', timeout=10)
userId = sys.argv[1]
with SB(uc=True) as sb:
sb.uc_open_with_reconnect(f"https://www.scopus.com/authid/detail.uri?authorId={userId}", 10) # increase numbe of reconnect to
try:
verify_success(sb)
except Exception:
if sb.is_element_visible('input[value*="Verify"]'):
sb.uc_click('input[value*="Verify"]')
else:
sb.uc_gui_click_captcha()
try:
verify_success(sb)
except Exception:
raise Exception("Detected!")
finally:
page_source = sb.get_page_source()
document = BeautifulSoup(page_source, 'html.parser')
citations_node = document.select_one("div[data-testid='metrics-section-citations-count'] span[data-testid='unclickable-count']")
documents_node = document.select_one("div[data-testid='metrics-section-document-count'] span[data-testid='unclickable-count']")
hindex_node = document.select_one("div[data-testid='metrics-section-h-index'] span[data-testid='unclickable-count']")
name_node = document.select_one("h1[data-testid='author-profile-name'] strong")
institute_node = document.select_one("span[data-testid='authorInstitute']")
scopus_information = {
"CitationsNumber": int(citations_node.text.replace(",", "") if citations_node else "0"),
"Documents": int(documents_node.text.replace(",", "") if documents_node else "0"), # the fomate numbers with ,
"HIndex": int(hindex_node.text.replace(",", "") if hindex_node else "0"), # they fomat numbers with ,
"Name": name_node.text.strip() if name_node else "",
"Institute": institute_node.text.strip() if institute_node else ""
}
print(json.dumps(scopus_information, indent=4))
程序.cs
using Microsoft.Extensions.Logging;
using System.Diagnostics;
using System.Text.Json;
var loggerFactory = LoggerFactory.Create(builder =>
{
builder.AddConsole();
});
var logger = loggerFactory.CreateLogger<ScopusServiceUsingPython>();
logger.LogInformation("Start logging");
IScopusService service = new ScopusServiceUsingPython(logger);
Stopwatch stopwatch = Stopwatch.StartNew();
var info = await service.GetArticlesForUser("57221186307");
stopwatch.Stop();
Console.WriteLine("Result on C#");
Console.WriteLine(JsonSerializer.Serialize(info));
Console.WriteLine($"finished in :{stopwatch.Elapsed}");
Console.ReadKey();
public class ScopusInformation
{
public string? Name { get; set; }
public int? CitationsNumber { get; set; }
public int? Documents { get; set; }
public int? HIndex { get; set; }
public string? Institute { get; set; }
public string? ScopusId { get; set; }
}
public interface IScopusService
{
Task<ScopusInformation?> GetArticlesForUser(string userId);
}
public class ScopusServiceUsingPython(ILogger<ScopusServiceUsingPython> logger) : IScopusService
{
public async Task<ScopusInformation?> GetArticlesForUser(string userId)
{
string cmd = "python";
var result2 = await DoCmdAsync(cmd, $"scopus.py {userId}");
ScopusInformation? r2 = JsonSerializer.Deserialize<ScopusInformation>(result2);
return r2;
}
private async Task<string> DoCmdAsync(string cmd, string args)
{
logger.LogWarning("DoCmd in python script {cmd} {args}", cmd, args);
try
{
// First, ensure required modules are installed
await EnsurePythonModulesAsync(cmd, new[] { "seleniumbase", "beautifulsoup4", "pyautogui" });
var start = new ProcessStartInfo
{
FileName = cmd, // cmd is full path to python.exe
Arguments = args, // args is path to .py file and any cmd line args
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true
};
using var process = new Process { StartInfo = start };
process.Start();
var outputTask = process.StandardOutput.ReadToEndAsync();
var errorTask = process.StandardError.ReadToEndAsync();
await Task.WhenAll(outputTask, errorTask);
string result = await outputTask;
string error = await errorTask;
if (!process.WaitForExit(30000)) // 30 seconds timeout
{
process.Kill();
throw new TimeoutException("Python script execution timed out after 30 seconds.");
}
if (!string.IsNullOrEmpty(error))
{
logger.LogWarning("Error in python script: {error}", error);
}
logger.LogWarning("Result in python script: {result}", result);
return result;
}
catch (Exception ex)
{
logger.LogWarning("Exception in python script: {ex}", ex.ToString());
return "";
}
}
private async Task EnsurePythonModulesAsync(string pythonPath, string[] modules)
{
foreach (var module in modules)
{
logger.LogWarning("Checking Python module: {module}", module);
var checkStart = new ProcessStartInfo
{
FileName = pythonPath,
Arguments = $"-c \"import {module}\"",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true
};
using var checkProcess = new Process { StartInfo = checkStart };
checkProcess.Start();
if (!checkProcess.WaitForExit(10000)) // 10 seconds timeout
{
checkProcess.Kill();
throw new TimeoutException($"Checking for Python module {module} timed out.");
}
if (checkProcess.ExitCode != 0)
{
logger.LogWarning("Installing missing Python module: {module}", module);
var installStart = new ProcessStartInfo
{
FileName = pythonPath,
Arguments = $"-m pip install {module}",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true
};
using var installProcess = new Process { StartInfo = installStart };
installProcess.Start();
var outputTask = installProcess.StandardOutput.ReadToEndAsync();
var errorTask = installProcess.StandardError.ReadToEndAsync();
if (await Task.WhenAny(Task.WhenAll(outputTask, errorTask), Task.Delay(300000)) == Task.Delay(300000)) // 5 minutes timeout
{
installProcess.Kill();
throw new TimeoutException($"Installation of Python module {module} timed out after 5 minutes.");
}
string output = await outputTask;
string error = await errorTask;
if (installProcess.ExitCode != 0)
{
throw new Exception($"Failed to install Python module {module}: {error}");
}
logger.LogWarning("Successfully installed Python module: {module}", module);
logger.LogWarning("Installation output: {output}", output);
}
else
{
logger.LogWarning("Python module {module} is already installed.", module);
}
}
}
}