如何刮擦具有相同类和ID的项目[重复项]

问题描述 投票:-1回答:1

我想抄袭人的姓名,位置和电话号码,但所有这些都具有相同的班级且没有ID。这里是该网页的链接:https://hipages.com.au/connect/emcoelectricalservices请指导我。比你!

这里是密码:

import requests
from bs4 import BeautifulSoup


def get_page(url):
    response = requests.get(url)

    if not response.ok:
        print('server responded:', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'lxml')
    return soup

def get_detail_data(soup):

    try:
        title = (soup.find('h1', class_="sc-AykKI",id=False).text)
    except:
        title = 'Empty Title'
    print(title)

    try:
        contact_person = (soup.find('span', class_="kBpGee",id=False).text)
    except:
        contact_person = 'Empty Person'
    print(contact_person)

    try:
        location = (soup.find('span', class_="kBpGee",id=False).text)
    except:
        location = 'Empty location'
    print(location)



def main():
    #get data of detail page
    url = "https://hipages.com.au/connect/emcoelectricalservices"
    #get_page(url)
    get_detail_data(get_page(url))



if __name__ == '__main__':
    main()
python web-scraping scrape
1个回答
1
投票

嗨,以下作品:-

# -*- coding: utf-8 -*-
"""
Created on Wed Mar  4 09:52:13 2020

@author: prakh
"""

import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_page(url):
    response = requests.get(url)

    if not response.ok:
        print('server responded:', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'lxml')
    return soup


def get_detail_data(soup):
    titles = []
    persons = []
    locations = []

    try:
        titles.append(soup.find('h1', class_="sc-AykKI",id=False).text)
    except:
        titles.append('Empty Title')

    try:
        persons.append(soup.findAll('span', class_="Contact__Item-sc-1giw2l4-2 kBpGee",id=False)[1].text)
    except:
        persons.append('Empty Person')

    try:
        locations.append(soup.findAll('span', class_="Contact__Item-sc-1giw2l4-2 kBpGee",id=False)[2].text)

    except:
        locations.append('Empty location')   
    final_df = pd.DataFrame(
    {'Title': titles,
     'contact_person': persons,
     'location': locations
    })
    print(final_df)

def main():
    #get data of detail page
    url = "https://hipages.com.au/connect/emcoelectricalservices"
    #get_page(url)
    get_detail_data(get_page(url))

if __name__ == '__main__':
    main()
最新问题
© www.soinside.com 2019 - 2025. All rights reserved.