반응형
웹 분석
-
[ OpenAI / WebsiteQnA tutorial ] 총정리Openai 2023. 2. 28. 20:44
import requests import re import urllib.request from bs4 import BeautifulSoup from collections import deque from html.parser import HTMLParser from urllib.parse import urlparse import os # Regex pattern to match a URL HTTP_URL_PATTERN = r'^http[s]*://.+' # Define root domain to crawl domain = "openai.com" full_url = "https://openai.com/" 1. 데이터 수집 - beautifulsoup 라이브러리를 통한 크롤링 # Create a class t..