import shutil
import tempfile
import urllib.request
with urllib.request.urlopen('http://python.org/') as response:
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
shutil.copyfileobj(response, tmp_file)
with open(tmp_file.name) as html:
pass
import os
from pathlib import Path
import urllib.request
def get_download_path():
"""Returns the default downloads path for linux or windows"""
if os.name == 'nt':
import winreg
sub_key = r'SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders'
downloads_guid = '{374DE290-123F-4565-9164-39C4925E467B}'
with winreg.OpenKey(winreg.HKEY_CURRENT_USER, sub_key) as key:
location = winreg.QueryValueEx(key, downloads_guid)[0]
return location
else:
return os.path.join(os.path.expanduser('~'), 'downloads')
# 저장할 csv 주소
list_csv = [
"https://raw.githubusercontent.com/HyunchanMOON/lessons/master/lessons/gasprices.csv",
]
# 다운로드 폴더
# downloads_path = str(pathlib.Path.home() / "Downloads")
downloads_path = get_download_path()
# 폴더가 존재하지 않는다면 폴더 생성
if not os.path.isdir(downloads_path):
os.makedirs(downloads_path)
for x in range(len(list_csv)):
url = list_csv[x]
filename = url.split("/")[-1]
urllib.request.urlretrieve(url, pathlib.PurePath(downloads_path, filename))
requests를 더 많이들 사용하는데,
url RESPONSE 200 이 아닐 때 urllib.request는 ERROR 띄워주는 것이 차이 인 듯
import requests
import urllib.request
from bs4 import BeautifulSoup as bs
url = "https://thekkom.github.io/images/yellow_500px.png"
filename = "logo.png"
# urllib use
urllib.request.urlretrieve(url, filename)
# requests use
req = requests.get(url)
print(req) # Response [200]
print(dir(req))
# req.context / req.html / req.text
with open(filename, 'wb') as f:
f.write(req.content)
f.close()
print("save file")
# for html, use beautifulsoup4
req = requests.get(url)
html = req.text
soup = bs(html, 'html.parser')
참고
https://docs.python.org/3/library/urllib.request.html
https://docs.python.org/3/howto/urllib2.html#urllib-howto
https://moondol-ai.tistory.com/238
_
반응형