我正在尝试抓取表中的数据https://newtin.co811.org/responsedisplay/?ticket=B420800874这个网站对每张票有不同的信息我不知道这是网站还是如果我没有正确使用美丽的汤和代码。我是编码新手,所以我不知道很多参数的设置。
这就是我正在努力做的事情。我收到一个包含大量票号的 Excel 文件。我必须将每张该死的门票的门票号码复制并粘贴到该网站才能获取门票信息。这需要几个小时。我正在尝试构建一个应用程序,我可以加载 Excel 电子表格,它会打开带有票号的网站,提取数据并将其发布到电子表格中,以便我可以快速查看所有信息。我只能让程序提取票证和响应行,但不能提取下面的数据。
import requests
from bs4 import BeautifulSoup
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox
import os
# Define function to extract data from website
def get_ticket_data(ticket_number):
# Remove the last 4 characters from the ticket number
ticket_without_last_4 = ticket_number[:-4]
# Build URL
url = f"https://newtin.co811.org/responsedisplay/?ticket={ticket_without_last_4}"
# Send HTTP request to get the webpage content
response = requests.get(url)
if response.status_code == 200:
# Parse the page content
soup = BeautifulSoup(response.content, 'html.parser')
# Extract specific data from the page using CSS selectors
# Update these selectors according to the webpage structure
div1_data = soup.select_one("div.some-class-for-div1").text.strip() if soup.select_one("div.some-class-for-div1") else "N/A"
div3_data = soup.select_one("div.some-class-for-div3").text.strip() if soup.select_one("div.some-class-for-div3") else "N/A"
return {"Div1": div1_data, "Div3": div3_data}
else:
return {"Div1": "Error", "Div3": "Error"}
# Function to handle spreadsheet display and writing
def process_excel_file(input_data, output_filename):
# Create a list to store scraped data
data_list = []
# Iterate through each row of input data
for index, row in input_data.iterrows():
ticket = row['Ticket']
# Get data from website for each ticket
web_data = get_ticket_data(ticket)
# Append combined row and web data to the list
data_list.append({
"Ticket": ticket,
"Account": row['Account'],
"Completed": row['Completed'],
"Work Date": row['Work Date'],
"Type": row['Type'],
"Category": row['Category'],
"Priority": row['Priority'],
"Company Name": row['Company Name'],
"Web Div1": web_data['Div1'],
"Web Div3": web_data['Div3']
})
# Convert the collected data into a DataFrame for easy manipulation
output_df = pd.DataFrame(data_list)
# Display the DataFrame in the console
print(output_df)
# Show the dataframe in the UI (optional for a basic UI)
# For now, we'll display the first 5 rows
messagebox.showinfo("Data Preview", f"First 5 rows:\n{output_df.head().to_string(index=False)}")
# Save the file using the filename chosen by the user
output_df.to_excel(output_filename, index=False)
messagebox.showinfo("Save Complete", f"File saved as {output_filename}")
# Load your existing Excel file using file dialog
def load_file():
# Open a file dialog to choose an Excel file
file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
if file_path:
try:
# Load the selected Excel file
input_df = pd.read_excel(file_path)
# Ask for save location and file name
output_file = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx")])
if output_file:
# Process and display the data
process_excel_file(input_df, output_file)
else:
messagebox.showerror("Save Error", "Please specify a valid file name to save.")
except Exception as e:
messagebox.showerror("File Error", f"Failed to load file.\nError: {e}")
else:
messagebox.showwarning("No File", "No file selected.")
# Basic UI setup
def main():
# Create the main window
root = tk.Tk()
root.title("Ticket Data Scraper")
# Window size
root.geometry("300x150")
# Add a button to load an Excel file
load_button = tk.Button(root, text="Load Excel File", command=load_file)
load_button.pack(pady=20)
# Run the UI loop
root.mainloop()
if __name__ == "__main__":
main()
您看不到任何数据,因为数据是使用 JavaScript 从不同的 URL 动态加载的。所以beautifulsoup看不到。
您可以使用下一个示例以 Json 形式加载数据:
import json
import requests
def get_ticket_data(ticket):
url = f"https://newtin-api.co811.org/api/tickets/{ticket}/responses?all=true"
data = requests.get(url).json()
return data
ticket = "B420800874"
data = get_ticket_data(ticket)
# pretty print the data
print(json.dumps(data, indent=4))
打印:
{
"ticket": "B420800874",
"revision": "00B",
"completed": "2024-07-26T18:31:30.553Z",
"type": "NEW",
"priority": "NORM",
"category": "LREQ",
"lookup": "STRT",
"original_ticket": "B420800874",
"original_date": "2024-07-26T18:31:30.553Z",
"original_account": "WOODNW",
"original_channel": "WEB",
"replace_by_date": "2024-08-30T05:59:00.000Z",
"expires": "2024-08-25T18:31:30.000Z",
"reference": " ",
"account": "WOODNW",
"channel": "WEB",
"started": "2024-07-26T18:31:21.553Z",
"caller_type": "NONR",
"name": "NW ENTERPRISES GROUP LLC",
"address1": "19805 S LAKE DR",
"city": "CLAREMORE",
"cstate": "OK",
"zip": "74017",
"phone": "8122131230",
"phone_ext": " ",
"caller": "JACOB WOOD",
"caller_phone": "8122131230",
"contact": "MARKO RUIS HERNANDEZ",
"contact_phone": "5039171838",
"fax": " ",
"email": "[email protected]",
"secondary_excavator": " ",
"secondary_contact": " ",
"secondary_phone": " ",
"secondary_phone_ext": " ",
"secondary_email": " ",
"state": "CO",
"county": "EL PASO",
"place": "CIMARRON HILLS",
"st_from_address": "1998",
"st_to_address": "1998",
"street": "TEE POST LN",
"cross1": "MEADOWBROOK PKWY",
"latitude": " ",
"longitude": " ",
"legal": "Y",
"work_date": "2024-07-31T13:00:00.000Z",
"work_end": "2024-07-31T13:00:00.000Z",
"duration_days": 0,
"locate_by": "2024-07-31T05:59:00.000Z",
"response_due": "2024-07-31T05:59:00.000Z",
"hours_notice_clock": 107,
"hours_notice_business": 59,
"work_type": "BORING TO INSTALL FIBER OPTICS",
"done_for": "CB-METRONET-COCH012",
"reason_for_resend": " ",
"extent_top": 38.858641,
"extent_left": -104.677599,
"extent_bottom": 38.857706,
"extent_right": -104.676457,
"area_in_miles": 0.003002,
"blasting": false,
"boring": true,
"meet": false,
"legal_notice": true,
"request_01": false,
"emergency": false,
"damage": false,
"response_required": true,
"location": "SEE FILE ATTACHMENTS\nLOC REAR UTILITY EASE OF ADDRESS\nPLACE FLAGS ON ALL LOCATES\nCONTR CONTACT - MARKO RUIS HERNANDEZ 503-917-1838 - CALL WITH ANY QUESTIONS\n *ACCESS OPEN*\nMAP ATTACHED AS DIGITAL WHITE LINE",
"remarks": null,
"comments": null,
"bestfit_y1": 38.85824,
"bestfit_x1": -104.677746,
"bestfit_y2": 38.858889,
"bestfit_x2": -104.677085,
"bestfit_y3": 38.85811,
"bestfit_x3": -104.67632,
"bestfit_y4": 38.857462,
"bestfit_x4": -104.676981,
"centroid_y": 38.858175,
"centroid_x": -104.677032,
"responses": [
{
"revision": "00B",
"responded": "2024-07-31T21:14:12.193Z",
"response_by": "USIC",
"response": "004",
"description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
"url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1574352251&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
"comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
"mbcode": "CCSOCO01",
"name": "COMCAST",
"status": "RESPONDED",
"phonetic": "COMCAST"
},
{
"revision": "00B",
"responded": "2024-07-31T03:06:19.690Z",
"response_by": "Cameron Thomas (",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": null,
"comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
"mbcode": "CCSOCO01",
"name": "COMCAST",
"status": "RESPONDED",
"phonetic": "COMCAST"
},
{
"revision": "00B",
"responded": "2024-07-31T03:06:01.320Z",
"response_by": "USIC",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1573842453&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
"comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
"mbcode": "CCSOCO01",
"name": "COMCAST",
"status": "RESPONDED",
"phonetic": "COMCAST"
},
{
"revision": "00B",
"responded": "2024-07-26T22:14:26.270Z",
"response_by": "TMS:DBROWN",
"response": "002",
"description": "CLEAR - NO CONFLICT",
"url": null,
"comments": null,
"mbcode": "CHKWS1",
"name": "CHEROKEE METROPOLITAN DISTRICT",
"status": "RESPONDED",
"phonetic": "cherokee METROPOLITAN DISTRICT"
},
{
"revision": "00B",
"responded": "2024-07-26T18:40:57.777Z",
"response_by": "Default User",
"response": "002",
"description": "CLEAR - NO CONFLICT",
"url": null,
"comments": "No PW assets in work area",
"mbcode": "CSPPW1",
"name": "CITY OF COLORADO SPRINGS - PUBLIC WORKS",
"status": "RESPONDED",
"phonetic": "CITY OF COLORADO SPRINGS - PUBLIC WORKS"
},
{
"revision": "00B",
"responded": "2024-07-29T12:19:20.443Z",
"response_by": "Steve R",
"response": "002",
"description": "CLEAR - NO CONFLICT",
"url": null,
"comments": null,
"mbcode": "CSPU00",
"name": "COLORADO SPRINGS UTILITIES",
"status": "RESPONDED",
"phonetic": "COLORADO SPRINGS UTILITIES"
},
{
"revision": "00B",
"responded": "2024-07-26T21:57:50.033Z",
"response_by": "Trent Ortiz",
"response": "002",
"description": "CLEAR - NO CONFLICT",
"url": "https://utilisync-docs.s3.amazonaws.com:443/1b28a286-f552-4d7f-83bf-2b8fd545d685/2e9b9b88-ead7-4ecc-9fa1-66f5ee4c9150.pdf",
"comments": "b'No applicable EPC SW or TL located in the locate area.'",
"mbcode": "ELPASOPW",
"name": "EL PASO COUNTY DEPT. OF PUBLIC WORKS",
"status": "RESPONDED",
"phonetic": "EL PASO COUNTY DEPT. OF PUBLIC WORKS"
},
{
"revision": "00B",
"responded": "2024-07-31T21:14:10.717Z",
"response_by": "USIC",
"response": "004",
"description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
"url": null,
"comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
"mbcode": "MVEL02",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN view ELECTRIC association"
},
{
"revision": "00B",
"responded": "2024-07-31T21:14:08.277Z",
"response_by": "USIC",
"response": "004",
"description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
"url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1574352250&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
"comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
"mbcode": "MVEL02",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN view ELECTRIC association"
},
{
"revision": "00B",
"responded": "2024-07-31T03:29:01.310Z",
"response_by": "USIC",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1573846072&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
"comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
"mbcode": "MVEL02",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN view ELECTRIC association"
},
{
"revision": "00B",
"responded": "2024-07-31T03:29:00.777Z",
"response_by": "USIC",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": null,
"comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
"mbcode": "MVEL02",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN view ELECTRIC association"
},
{
"revision": "00B",
"responded": "2024-07-31T21:14:10.693Z",
"response_by": "USIC",
"response": "004",
"description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
"url": null,
"comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
"mbcode": "MVELFBR",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
},
{
"revision": "00B",
"responded": "2024-07-31T21:14:10.537Z",
"response_by": "USIC",
"response": "004",
"description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
"url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1574352249&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
"comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
"mbcode": "MVELFBR",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
},
{
"revision": "00B",
"responded": "2024-07-31T03:20:02.347Z",
"response_by": "USIC",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1573844444&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
"comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
"mbcode": "MVELFBR",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
},
{
"revision": "00B",
"responded": "2024-07-31T03:19:59.817Z",
"response_by": "USIC",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": null,
"comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
"mbcode": "MVELFBR",
"name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
"status": "RESPONDED",
"phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
},
{
"revision": "00B",
"responded": "2024-07-31T21:14:19.353Z",
"response_by": "Cameron Thomas (",
"response": "004",
"description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
"url": null,
"comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
"mbcode": "QLNCC00",
"name": "CENTURYLINK",
"status": "RESPONDED",
"phonetic": "century link"
},
{
"revision": "00B",
"responded": "2024-07-31T01:14:25.353Z",
"response_by": "CPM Automation 7",
"response": "008",
"description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
"url": null,
"comments": "Please contact our ticket management team if you need additional support at (877)-366-8344.",
"mbcode": "QLNCC00",
"name": "CENTURYLINK",
"status": "RESPONDED",
"phonetic": "century link"
}
]
}