我无法从这个网站抓取内容。是因为数据是动态的吗?

问题描述 投票:0回答:1

我正在尝试抓取表中的数据https://newtin.co811.org/responsedisplay/?ticket=B420800874这个网站对每张票有不同的信息我不知道这是网站还是如果我没有正确使用美丽的汤和代码。我是编码新手,所以我不知道很多参数的设置。

这就是我正在努力做的事情。我收到一个包含大量票号的 Excel 文件。我必须将每张该死的门票的门票号码复制并粘贴到该网站才能获取门票信息。这需要几个小时。我正在尝试构建一个应用程序,我可以加载 Excel 电子表格,它会打开带有票号的网站,提取数据并将其发布到电子表格中,以便我可以快速查看所有信息。我只能让程序提取票证和响应行,但不能提取下面的数据。

import requests
from bs4 import BeautifulSoup
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox
import os

# Define function to extract data from website
def get_ticket_data(ticket_number):
    # Remove the last 4 characters from the ticket number
    ticket_without_last_4 = ticket_number[:-4]
    
    # Build URL
    url = f"https://newtin.co811.org/responsedisplay/?ticket={ticket_without_last_4}"
    
    # Send HTTP request to get the webpage content
    response = requests.get(url)
    
    if response.status_code == 200:
        # Parse the page content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract specific data from the page using CSS selectors
        # Update these selectors according to the webpage structure
        div1_data = soup.select_one("div.some-class-for-div1").text.strip() if soup.select_one("div.some-class-for-div1") else "N/A"
        div3_data = soup.select_one("div.some-class-for-div3").text.strip() if soup.select_one("div.some-class-for-div3") else "N/A"
        
        return {"Div1": div1_data, "Div3": div3_data}
    else:
        return {"Div1": "Error", "Div3": "Error"}

# Function to handle spreadsheet display and writing
def process_excel_file(input_data, output_filename):
    # Create a list to store scraped data
    data_list = []

    # Iterate through each row of input data
    for index, row in input_data.iterrows():
        ticket = row['Ticket']
        # Get data from website for each ticket
        web_data = get_ticket_data(ticket)
        
        # Append combined row and web data to the list
        data_list.append({
            "Ticket": ticket,
            "Account": row['Account'],
            "Completed": row['Completed'],
            "Work Date": row['Work Date'],
            "Type": row['Type'],
            "Category": row['Category'],
            "Priority": row['Priority'],
            "Company Name": row['Company Name'],
            "Web Div1": web_data['Div1'],
            "Web Div3": web_data['Div3']
        })

    # Convert the collected data into a DataFrame for easy manipulation
    output_df = pd.DataFrame(data_list)
    
    # Display the DataFrame in the console
    print(output_df)

    # Show the dataframe in the UI (optional for a basic UI)
    # For now, we'll display the first 5 rows
    messagebox.showinfo("Data Preview", f"First 5 rows:\n{output_df.head().to_string(index=False)}")
    
    # Save the file using the filename chosen by the user
    output_df.to_excel(output_filename, index=False)
    messagebox.showinfo("Save Complete", f"File saved as {output_filename}")

# Load your existing Excel file using file dialog
def load_file():
    # Open a file dialog to choose an Excel file
    file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
    
    if file_path:
        try:
            # Load the selected Excel file
            input_df = pd.read_excel(file_path)
            
            # Ask for save location and file name
            output_file = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx")])
            
            if output_file:
                # Process and display the data
                process_excel_file(input_df, output_file)
            else:
                messagebox.showerror("Save Error", "Please specify a valid file name to save.")
        except Exception as e:
            messagebox.showerror("File Error", f"Failed to load file.\nError: {e}")
    else:
        messagebox.showwarning("No File", "No file selected.")

# Basic UI setup
def main():
    # Create the main window
    root = tk.Tk()
    root.title("Ticket Data Scraper")

    # Window size
    root.geometry("300x150")
    
    # Add a button to load an Excel file
    load_button = tk.Button(root, text="Load Excel File", command=load_file)
    load_button.pack(pady=20)
    
    # Run the UI loop
    root.mainloop()

if __name__ == "__main__":
    main()
python pandas database web-scraping
1个回答
0
投票

您看不到任何数据,因为数据是使用 JavaScript 从不同的 URL 动态加载的。所以看不到。

您可以使用下一个示例以 Json 形式加载数据:

import json
import requests


def get_ticket_data(ticket):
    url = f"https://newtin-api.co811.org/api/tickets/{ticket}/responses?all=true"
    data = requests.get(url).json()
    return data


ticket = "B420800874"
data = get_ticket_data(ticket)

# pretty print the data
print(json.dumps(data, indent=4))

打印:

{
    "ticket": "B420800874",
    "revision": "00B",
    "completed": "2024-07-26T18:31:30.553Z",
    "type": "NEW",
    "priority": "NORM",
    "category": "LREQ",
    "lookup": "STRT",
    "original_ticket": "B420800874",
    "original_date": "2024-07-26T18:31:30.553Z",
    "original_account": "WOODNW",
    "original_channel": "WEB",
    "replace_by_date": "2024-08-30T05:59:00.000Z",
    "expires": "2024-08-25T18:31:30.000Z",
    "reference": " ",
    "account": "WOODNW",
    "channel": "WEB",
    "started": "2024-07-26T18:31:21.553Z",
    "caller_type": "NONR",
    "name": "NW ENTERPRISES GROUP LLC",
    "address1": "19805 S LAKE DR",
    "city": "CLAREMORE",
    "cstate": "OK",
    "zip": "74017",
    "phone": "8122131230",
    "phone_ext": " ",
    "caller": "JACOB WOOD",
    "caller_phone": "8122131230",
    "contact": "MARKO RUIS HERNANDEZ",
    "contact_phone": "5039171838",
    "fax": " ",
    "email": "[email protected]",
    "secondary_excavator": " ",
    "secondary_contact": " ",
    "secondary_phone": " ",
    "secondary_phone_ext": " ",
    "secondary_email": " ",
    "state": "CO",
    "county": "EL PASO",
    "place": "CIMARRON HILLS",
    "st_from_address": "1998",
    "st_to_address": "1998",
    "street": "TEE POST LN",
    "cross1": "MEADOWBROOK PKWY",
    "latitude": " ",
    "longitude": " ",
    "legal": "Y",
    "work_date": "2024-07-31T13:00:00.000Z",
    "work_end": "2024-07-31T13:00:00.000Z",
    "duration_days": 0,
    "locate_by": "2024-07-31T05:59:00.000Z",
    "response_due": "2024-07-31T05:59:00.000Z",
    "hours_notice_clock": 107,
    "hours_notice_business": 59,
    "work_type": "BORING TO INSTALL FIBER OPTICS",
    "done_for": "CB-METRONET-COCH012",
    "reason_for_resend": " ",
    "extent_top": 38.858641,
    "extent_left": -104.677599,
    "extent_bottom": 38.857706,
    "extent_right": -104.676457,
    "area_in_miles": 0.003002,
    "blasting": false,
    "boring": true,
    "meet": false,
    "legal_notice": true,
    "request_01": false,
    "emergency": false,
    "damage": false,
    "response_required": true,
    "location": "SEE FILE ATTACHMENTS\nLOC REAR UTILITY EASE OF ADDRESS\nPLACE FLAGS ON ALL LOCATES\nCONTR CONTACT - MARKO RUIS HERNANDEZ 503-917-1838 - CALL WITH ANY QUESTIONS\n *ACCESS OPEN*\nMAP ATTACHED AS DIGITAL WHITE LINE",
    "remarks": null,
    "comments": null,
    "bestfit_y1": 38.85824,
    "bestfit_x1": -104.677746,
    "bestfit_y2": 38.858889,
    "bestfit_x2": -104.677085,
    "bestfit_y3": 38.85811,
    "bestfit_x3": -104.67632,
    "bestfit_y4": 38.857462,
    "bestfit_x4": -104.676981,
    "centroid_y": 38.858175,
    "centroid_x": -104.677032,
    "responses": [
        {
            "revision": "00B",
            "responded": "2024-07-31T21:14:12.193Z",
            "response_by": "USIC",
            "response": "004",
            "description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
            "url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1574352251&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
            "comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
            "mbcode": "CCSOCO01",
            "name": "COMCAST",
            "status": "RESPONDED",
            "phonetic": "COMCAST"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T03:06:19.690Z",
            "response_by": "Cameron Thomas (",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": null,
            "comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
            "mbcode": "CCSOCO01",
            "name": "COMCAST",
            "status": "RESPONDED",
            "phonetic": "COMCAST"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T03:06:01.320Z",
            "response_by": "USIC",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1573842453&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
            "comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
            "mbcode": "CCSOCO01",
            "name": "COMCAST",
            "status": "RESPONDED",
            "phonetic": "COMCAST"
        },
        {
            "revision": "00B",
            "responded": "2024-07-26T22:14:26.270Z",
            "response_by": "TMS:DBROWN",
            "response": "002",
            "description": "CLEAR - NO CONFLICT",
            "url": null,
            "comments": null,
            "mbcode": "CHKWS1",
            "name": "CHEROKEE METROPOLITAN DISTRICT",
            "status": "RESPONDED",
            "phonetic": "cherokee METROPOLITAN DISTRICT"
        },
        {
            "revision": "00B",
            "responded": "2024-07-26T18:40:57.777Z",
            "response_by": "Default User",
            "response": "002",
            "description": "CLEAR - NO CONFLICT",
            "url": null,
            "comments": "No PW assets in work area",
            "mbcode": "CSPPW1",
            "name": "CITY OF COLORADO SPRINGS - PUBLIC WORKS",
            "status": "RESPONDED",
            "phonetic": "CITY OF COLORADO SPRINGS - PUBLIC WORKS"
        },
        {
            "revision": "00B",
            "responded": "2024-07-29T12:19:20.443Z",
            "response_by": "Steve R",
            "response": "002",
            "description": "CLEAR - NO CONFLICT",
            "url": null,
            "comments": null,
            "mbcode": "CSPU00",
            "name": "COLORADO SPRINGS UTILITIES",
            "status": "RESPONDED",
            "phonetic": "COLORADO SPRINGS UTILITIES"
        },
        {
            "revision": "00B",
            "responded": "2024-07-26T21:57:50.033Z",
            "response_by": "Trent Ortiz",
            "response": "002",
            "description": "CLEAR - NO CONFLICT",
            "url": "https://utilisync-docs.s3.amazonaws.com:443/1b28a286-f552-4d7f-83bf-2b8fd545d685/2e9b9b88-ead7-4ecc-9fa1-66f5ee4c9150.pdf",
            "comments": "b'No applicable EPC SW or TL located in the locate area.'",
            "mbcode": "ELPASOPW",
            "name": "EL PASO COUNTY DEPT. OF PUBLIC WORKS",
            "status": "RESPONDED",
            "phonetic": "EL PASO COUNTY DEPT. OF PUBLIC WORKS"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T21:14:10.717Z",
            "response_by": "USIC",
            "response": "004",
            "description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
            "url": null,
            "comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
            "mbcode": "MVEL02",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN view ELECTRIC association"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T21:14:08.277Z",
            "response_by": "USIC",
            "response": "004",
            "description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
            "url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1574352250&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
            "comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
            "mbcode": "MVEL02",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN view ELECTRIC association"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T03:29:01.310Z",
            "response_by": "USIC",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1573846072&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
            "comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
            "mbcode": "MVEL02",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN view ELECTRIC association"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T03:29:00.777Z",
            "response_by": "USIC",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": null,
            "comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
            "mbcode": "MVEL02",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN view ELECTRIC association"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T21:14:10.693Z",
            "response_by": "USIC",
            "response": "004",
            "description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
            "url": null,
            "comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
            "mbcode": "MVELFBR",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T21:14:10.537Z",
            "response_by": "USIC",
            "response": "004",
            "description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
            "url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1574352249&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
            "comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
            "mbcode": "MVELFBR",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T03:20:02.347Z",
            "response_by": "USIC",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": "https://ticketprocluster.usicllc.com/attachments_pdf?activity_id=1573844444&ticket=6PzunQD2eKlhNkUW1yzkyQ%3D%3D",
            "comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
            "mbcode": "MVELFBR",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T03:19:59.817Z",
            "response_by": "USIC",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": null,
            "comments": "Please contact USIC Customer Service at (800) 778-9140 or [email protected] for further info.",
            "mbcode": "MVELFBR",
            "name": "MOUNTAIN VIEW ELECTRIC ASSOC., INC.",
            "status": "RESPONDED",
            "phonetic": "MOUNTAIN VIEW ELECTRIC ASSOC., INC."
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T21:14:19.353Z",
            "response_by": "Cameron Thomas (",
            "response": "004",
            "description": "NO ACCESS, GATE OR FENCE, NEW TICKET REQUESTED",
            "url": null,
            "comments": "2024/07/31 15:12 Knocked on door and rang door bell no one answered door. No access to rear of lot, no Fence gate to access.",
            "mbcode": "QLNCC00",
            "name": "CENTURYLINK",
            "status": "RESPONDED",
            "phonetic": "century link"
        },
        {
            "revision": "00B",
            "responded": "2024-07-31T01:14:25.353Z",
            "response_by": "CPM Automation 7",
            "response": "008",
            "description": "CALL FACILITY OWNER FOR FURTHER INFO ON LOCATE",
            "url": null,
            "comments": "Please contact our ticket management team if you need additional support at (877)-366-8344.",
            "mbcode": "QLNCC00",
            "name": "CENTURYLINK",
            "status": "RESPONDED",
            "phonetic": "century link"
        }
    ]
}
© www.soinside.com 2019 - 2024. All rights reserved.