python:: urllib.request vs requests

 

import shutil
import tempfile
import urllib.request

with urllib.request.urlopen('http://python.org/') as response:
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        shutil.copyfileobj(response, tmp_file)

with open(tmp_file.name) as html:
    pass

 

 

import os
from pathlib import Path
import urllib.request



def get_download_path():
    """Returns the default downloads path for linux or windows"""
    if os.name == 'nt':
        import winreg
        sub_key = r'SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders'
        downloads_guid = '{374DE290-123F-4565-9164-39C4925E467B}'
        with winreg.OpenKey(winreg.HKEY_CURRENT_USER, sub_key) as key:
            location = winreg.QueryValueEx(key, downloads_guid)[0]
        return location
    else:
        return os.path.join(os.path.expanduser('~'), 'downloads')


# 저장할 csv 주소
list_csv = [
    "https://raw.githubusercontent.com/HyunchanMOON/lessons/master/lessons/gasprices.csv",
    ]


# 다운로드 폴더
# downloads_path = str(pathlib.Path.home() / "Downloads")
downloads_path = get_download_path()
 
 
# 폴더가 존재하지 않는다면 폴더 생성
if not os.path.isdir(downloads_path): 
    os.makedirs(downloads_path)
 
for x in range(len(list_csv)):
    url = list_csv[x]
    filename = url.split("/")[-1]	
    urllib.request.urlretrieve(url, pathlib.PurePath(downloads_path, filename))

 

 

 

 

requests를 더 많이들 사용하는데,

url RESPONSE 200 이 아닐 때 urllib.request는 ERROR 띄워주는 것이 차이 인 듯

import requests
import urllib.request
from bs4 import BeautifulSoup as bs

url = "https://thekkom.github.io/images/yellow_500px.png"
filename = "logo.png"


# urllib use
urllib.request.urlretrieve(url, filename)


# requests use
req = requests.get(url)
print(req)   # Response [200]
print(dir(req))


# req.context / req.html / req.text
with open(filename, 'wb') as f:
    f.write(req.content)
    f.close()
    print("save file")
    


# for html, use beautifulsoup4
req = requests.get(url)
html = req.text
soup = bs(html, 'html.parser')

 

 

 

 

 

 

 

 

 

참고

 

 

https://docs.python.org/3/library/urllib.request.html

 

urllib.request — Extensible library for opening URLs

Source code: Lib/urllib/request.py The urllib.request module defines functions and classes which help in opening URLs (mostly HTTP) in a complex world — basic and digest authentication, redirection...

docs.python.org

 

 

 

 

 

https://docs.python.org/3/howto/urllib2.html#urllib-howto

 

HOWTO Fetch Internet Resources Using The urllib Package

Author, Michael Foord,. Introduction: Related Articles: You may also find useful the following article on fetching web resources with Python: Basic Authentication A tutorial on Basic Authentication...

docs.python.org

 

 

 

 

 

https://moondol-ai.tistory.com/238

 

파이썬 크롤링 requests vs urllib.request 차이는?

오늘은 파이썬 크롤링을 하면서 궁금해하셨을 request와 urllib.request의 차이에 대해 말해보겠습니다. 일단 필요한 모듈을 불러옵니다. import requests import urllib.request import re from bs4 import BeautifulSoup as

moondol-ai.tistory.com

 

 

 

 

 

 

_

반응형