使用Python从advisorinfo.sec.gov收集数据

问题描述 投票:0回答:1

我正在尝试使用 python 从 Adviserinfo.sec.gov 提取财务顾问数据,但运气不佳。这是一个公开披露网站,您可以在其中输入顾问姓名并通过单击搜索获取公司名称、地址、经验等详细信息。我想看看我是否可以通过传递顾问名称并获取所需的信息来以编程方式在 python 中执行此操作。任何对此的指示都将不胜感激。谢谢!

这是我迄今为止尝试过的示例代码,但我收到“无法检索顾问信息”

import requests
from bs4 import BeautifulSoup


def search_adviser_by_name(first_name, last_name):
    search_url = "https://adviserinfo.sec.gov/IAPD/Individual/Search/Search"
    search_params = {
        'ADVANCED': 'true',
        'FIND_BY_NAME': 'true',
        'INDIVIDUAL_NAME': f"{first_name} {last_name}",
        'resultsPerPage': '10'
    }

    response = requests.get(search_url, params=search_params)
    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.content, 'html.parser')
    search_results = soup.find_all('a', {'class': 'individual-summary'})

    for result in search_results:
        if first_name.lower() in result.text.lower() and last_name.lower() in result.text.lower():
            adviser_url = "https://adviserinfo.sec.gov" + result['href']
            return get_adviser_info(adviser_url)

    return None


def get_adviser_info(adviser_url):
    response = requests.get(adviser_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract specific information using the new HTML structure provided
        name = soup.find('span', class_='text-lg sm:text-sm font-semibold').text.strip()
        firm = soup.find('span', {'class': 'firmName'}).text.strip()
        crd_number = soup.find('span', {'class': 'crdNumber'}).text.strip()

        return {
            'Name': name,
            'Firm': firm,
            'CRD Number': crd_number
        }
    else:
        return None


# Example usage
first_name = 'Kelly'
last_name = 'Demers'
info = search_adviser_by_name(first_name, last_name)
if info:
    print(info)
else:
    print("Failed to retrieve adviser information.")
python web-scraping
1个回答
0
投票

我建议直接访问底层 API。

import requests
import json

def search_adviser_by_name(first, last):
    params = {
        'query': f'{first} {last}',
        'includePrevious': 'true',
        'hl': 'true',
        'nrows': '12',
        'start': '0',
        'r': '25',
        'sort': 'score+desc',
        'wt': 'json',
    }

    response = requests.get('https://api.adviserinfo.sec.gov/search/individual', params=params)

    records = response.json()["hits"]["hits"]

    return [record["_source"] for record in records]

info = search_adviser_by_name("Kelly", "Demers")

print(json.dumps(info, indent=2))

search_adviser_by_name()
的返回值现在是一个字典列表,其中包含与每个顾问相关的所有信息。您可以根据需要对其进行操作,但应包含您正在查找的大部分信息。 CRD 编号位于
ind_source_id
字段中。

🚨 网站上的结果(以及通过 API)是分页的。因此,如果您的查询有很多匹配项,那么您需要迭代结果页面。

指定名字和姓氏的结果:

[
  {
    "ind_source_id": "6842521",
    "ind_firstname": "KELLY",
    "ind_middlename": "MARIE",
    "ind_lastname": "DEMERS",
    "ind_other_names": [
      "KELLY  DEMERS",
      "KELLY DEMERS RATOWITZ"
    ],
    "ind_bc_scope": "Active",
    "ind_ia_scope": "Active",
    "ind_ia_disclosure_fl": "N",
    "ind_approved_finra_registration_count": 1,
    "ind_employments_count": 2,
    "ind_industry_cal_date_iapd": "2017-09-19",
    "ind_ia_current_employments": [
      {
        "firm_id": "7691",
        "firm_name": "MERRILL LYNCH, PIERCE, FENNER & SMITH INCORPORATED",
        "branch_city": "BIRMINGHAM",
        "branch_state": "AL",
        "branch_zip": "35209",
        "ia_only": "Y",
        "firm_bd_sec_number": "7221",
        "firm_bd_full_sec_number": "8-7221",
        "firm_ia_sec_number": "14235",
        "firm_ia_full_sec_number": "801-14235"
      },
      {
        "firm_id": "7691",
        "firm_name": "MERRILL LYNCH, PIERCE, FENNER & SMITH INCORPORATED",
        "branch_city": "BIRMINGHAM",
        "branch_state": "AL",
        "branch_zip": "35209",
        "ia_only": "N",
        "firm_bd_sec_number": "7221",
        "firm_bd_full_sec_number": "8-7221",
        "firm_ia_sec_number": "14235",
        "firm_ia_full_sec_number": "801-14235"
      }
    ]
  },
  {
    "ind_source_id": "4038008",
    "ind_firstname": "KELLIE",
    "ind_middlename": "G",
    "ind_lastname": "BEIERS",
    "ind_other_names": [
      "KELLIE J GIBBS",
      "KELLIE J GIBBS BEIERS",
      "KELLIE J GIBBS/BEIERS"
    ],
    "ind_bc_scope": "NotInScope",
    "ind_ia_scope": "Active",
    "ind_ia_disclosure_fl": "N",
    "ind_approved_finra_registration_count": 0,
    "ind_employments_count": 2,
    "ind_industry_cal_date_iapd": "2014-04-06",
    "ind_ia_current_employments": [
      {
        "firm_id": "307424",
        "firm_name": "ASPEN WEALTH STRATEGIES, LLC",
        "branch_city": "ARVADA",
        "branch_state": "CO",
        "branch_zip": "80002",
        "ia_only": "Y"
      },
      {
        "firm_id": "173912",
        "firm_name": "CHASEFIELD CAPITAL INC.",
        "branch_city": "LAKEWOOD",
        "branch_state": "CO",
        "branch_zip": "80235",
        "ia_only": "Y"
      }
    ]
  }
]
© www.soinside.com 2019 - 2024. All rights reserved.