python:: 크롤링_네이버쇼핑01 _따라해보기

2022-08-13 update

 

크롤링 관련 글을 보면 찔끔찔금 올려두거나 부분부분 잘라서 올려둔 것들이 많아서 막상 따라하면 오류를 뿜거나 작성한지 몇 년이 지나서 웹사이트 구조가 바뀐 것이 반영이 안 되어 실제 동작이 안되는 것이 더러 있다.

  예전에 연습할 때 모아두었다가 잘된 것만 스크랩 해두었는데 그 중에 기억남는 것을 다시 현 시점에서 실행이 잘 되는지 따라 해보자.

 

 

블로그 글(https://kwonkyo.tistory.com/529) 은 네이버 쇼핑항목을 따라해 보면서 처음으로? 성공하고선 기뻐했던 기억이 있다. 그리고 실행법도 자세히 설명하고 있어서 먼저 보아야 한다.  여기는 소스를 그대로 복사한 다음 검색어만 바꾸어 보았다. 현재도 여전히 실행이 잘 된다. 링크를 타고 들어가 제대로 익혀봅시다

 

 

 

 

 

 

 

여전히 잘 작동하는 소스코드, 따라서 일단 실행해 보자 

예제1, 결과가.... 5개만 보이네

아래 코드에서 9~19번줄에 각 항목이 네이버쇼핑에서 선택하는 항목이다.

이 부분의 "구조"를 이해하면 응용하거나 오류를 잡아내는데 좋다.

 

 

## 네이버 쇼핑 크롤링  * request / urllib,urlopen 사용

import requests
from urllib.request import urlopen
import urllib
from bs4 import BeautifulSoup


#검색할 단어
searchText = "다이소 실내 슬리퍼"
startpage = "1"
display = "40"
byList = "total"
sort = "rel"    #네이버가격순
timestamp = ""
viewType = "list"

encText = urllib.parse.quote(searchText)
searchOption = [encText, startpage, display, byList,encText, sort, timestamp, viewType]



# https://search.shopping.naver.com/search/all?
#         frm=NVSHATC
#         &origQuery=%ED%8E%B8%EB%B0%B1%EC%B0%9C%EA%B8%B0
#         &pagingIndex=1
#         &pagingSize=40
#         &productSet=total
#         &query=%ED%8E%B8%EB%B0%B1%EC%B0%9C%EA%B8%B0
#         &sort=rel
#         &timestamp=
#         &viewType=list


def scrap():
    url = ("https://search.shopping.naver.com/search/all?&frm=NVSHATC"
        #    "https://search.shopping.naver.com/search/category?frm=NVSHOVS&origQuery&" 
            "&origQuery=" + searchOption[0] +           # query: 검색 키워드
            "&pagingIndex=" + searchOption[1] +         # 조회할 페이지번호
            "&pagingSize=" + searchOption[2] +          # 페이지당 검색수
            "&productSet=" +  searchOption[3] +         # 해외직구,overseas
            "&query=" + searchOption[4] +               #
            "&sort=" + searchOption[5] +                # sort=rel(네이버가격순)
            "&timestamp=" + searchOption[6] +           # 
            "&viewType=" + searchOption[7] )            # viewType=list(리스트보기)

    # res = requests.get(url)
    # bs = BeautifulSoup(res.content, 'html.parser')
    
    bs = BeautifulSoup(urlopen(url),"html.parser")
    product_list = bs.select("li[class^='basicList_item']")
    
    cnt = len(bs.find_all('div', class_='basicList_title__3P9Q7'))
    
    print(f'-url주소: {url}', f'-상품수: {cnt}','' ,sep="\n")
    # print(cnt)
    
        
    items = bs.find_all(attrs={"class":"basicList_link__1MaTN"})
    for i in range(len(items)):
        print(i+1, items[i].get_text())
        
        
    for li in product_list:
        for goods in li.contents:
            title = price = registerdate = 0

            title = goods.select("a[class^='basicList_link__']")[0].text
            price = goods.select("span[class^='price_num__']")[0].text
            registerdate = goods.select("div[class^='basicList_etc_box__'] > span")[0].text

            print(title, price,  registerdate)

if __name__ == "__main__":
    scrap()

 

 

그런데 문제는 결과가 많아야 하는데 5개만 표시된다. 

실제로 검색한 페이지를 보려면 파란색 url주소를 복사해서 크롬 브라우저에 붙여넣기 해본다.

 

 

 

실제로는 아래 주소로 들어가면 검색되는 항목은 엄청 많다

https://search.shopping.naver.com/search/all?&frm=NVSHATC&origQuery=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC&pagingIndex=1&pagingSize=40&productSet=total&query=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC&sort=rel×tamp=&viewType=list

 

 

 

 

 

 

 

 

예제2, 이제야 제대로 보인다

 

https://curlconverter.com/

 

Convert curl commands to code

GitHub is matching all contributions to this project on GitHub Sponsors. Contribute Now

curlconverter.com

 

EveryX님 글에서의 설명을 잘 따라해 보자.

## 네이버 쇼핑 크롤링 예제2  * request /  curlconverter.com 사용
 
import requests
import json


def scrap():
    cookies = {
        'NNB': 'IRS72UA5CVSGE',
        'autocomplete': 'use',
        'AD_SHP_BID': '22',
        'nid_inf': '1664549890',
        'NID_JKL': 'oVX5xdcK9e5k2nd9c2ugMch8Emj80hDgw0XbSKVn4MM=',
        'NaverSuggestUse': 'unuse%26use',
        'nx_ssl': '2',
        'NID_AUT': 'Hmmps7vzk0b5qOfrEPlMPAfJNV5csZ3Wntjl6/2Frq/m4bvrcKg0zhDHbvkokuDS',
        '_ga': 'GA1.2.1224001448.1650726173',
        '_ga_7VKFYR6RV1': 'GS1.1.1659834690.13.0.1659834690.60',
        'ASID': '7db0ee2e000001827e28ca1800000058',
        'ncpa': '613264|l6rvnxz4|c02e828c89beb2f1bf6dc266cc77a490adf63021|s_21c5ebc27770|48d4834130422bba84845d1671ec03e82b3b0cac:1510716|l6rvp7i0|de9f40e3fecf79a7bbf72a06abdc6eac61e2dcab|s_c1ad54f15163|f339dce04935ed864c9d2500f8e901c903796663',
        'page_uid': 'hYPARdp0J1sssvlFkHdssssstdw-025477',
        'spage_uid': 'hYPARdp0J1sssvlFkHdssssstdw-025477',
        'NID_SES': 'AAABuW8YluCssMERON7C0avxrCx4me89rNSLb8/9ylgFZiLNGV1TUBflu+K9k6GTBzKdhZ+0LbMhQlUtw3uqPb2mi0r9ptpR+xtUGdhY62b1m5t7T/g7TS3U9IEqrE4lpvCpzhv7nLkb1808XtixttrNY/vzfrmVUtkKWsclu/5iJFzUm1I8ai2z8GGImmtr4NRoXrkFVlKIE/gPtf034leURsDywf941MmVMrIj8A2f8ZEgf7IXe7jgS17RyddHT7lFGfouoid+PT9aHGP/6OtNsUl2iXmIkHodZyToHLpy+oSoK3QnELRaBENRgvo31u+pxTmB6u7m6OuYwFjmDAi3WiZWD6AKIATHSWHnJ4S90rH2bnguvm8LueQSGvx//+iFHzjdObkn0RHjVFkRvoakaCPhmwdcqLKttraL/ei+ExsxH3fy6ujJTJ9gdYN+4GCoyqTupdorFgYeXQEAf6qzJkjyMl0x2ttCmk9uOOuu7knQPBBN2JE/xU4/Tlbm4WxDCuDrBWaZSht9L9vAS1KujL3qieWbryRezZXEmpep1c7WXhMR3+81AOm23uqW6/xSarCgo3fCxK4/MYs85WCFVRM=',
        'sus_val': 'Z9e1ECx58ewCN2iplpsgxIl7',
    }

    headers = {
        'authority': 'search.shopping.naver.com',
        'accept': 'application/json, text/plain, */*',
        'accept-language': 'en,ko;q=0.9,ko-KR;q=0.8',
        # Requests sorts cookies= alphabetically
        # 'cookie': 'NNB=IRS72UA5CVSGE; autocomplete=use; AD_SHP_BID=22; nid_inf=1664549890; NID_JKL=oVX5xdcK9e5k2nd9c2ugMch8Emj80hDgw0XbSKVn4MM=; NaverSuggestUse=unuse%26use; nx_ssl=2; NID_AUT=Hmmps7vzk0b5qOfrEPlMPAfJNV5csZ3Wntjl6/2Frq/m4bvrcKg0zhDHbvkokuDS; _ga=GA1.2.1224001448.1650726173; _ga_7VKFYR6RV1=GS1.1.1659834690.13.0.1659834690.60; ASID=7db0ee2e000001827e28ca1800000058; ncpa=613264|l6rvnxz4|c02e828c89beb2f1bf6dc266cc77a490adf63021|s_21c5ebc27770|48d4834130422bba84845d1671ec03e82b3b0cac:1510716|l6rvp7i0|de9f40e3fecf79a7bbf72a06abdc6eac61e2dcab|s_c1ad54f15163|f339dce04935ed864c9d2500f8e901c903796663; page_uid=hYPARdp0J1sssvlFkHdssssstdw-025477; spage_uid=hYPARdp0J1sssvlFkHdssssstdw-025477; NID_SES=AAABuW8YluCssMERON7C0avxrCx4me89rNSLb8/9ylgFZiLNGV1TUBflu+K9k6GTBzKdhZ+0LbMhQlUtw3uqPb2mi0r9ptpR+xtUGdhY62b1m5t7T/g7TS3U9IEqrE4lpvCpzhv7nLkb1808XtixttrNY/vzfrmVUtkKWsclu/5iJFzUm1I8ai2z8GGImmtr4NRoXrkFVlKIE/gPtf034leURsDywf941MmVMrIj8A2f8ZEgf7IXe7jgS17RyddHT7lFGfouoid+PT9aHGP/6OtNsUl2iXmIkHodZyToHLpy+oSoK3QnELRaBENRgvo31u+pxTmB6u7m6OuYwFjmDAi3WiZWD6AKIATHSWHnJ4S90rH2bnguvm8LueQSGvx//+iFHzjdObkn0RHjVFkRvoakaCPhmwdcqLKttraL/ei+ExsxH3fy6ujJTJ9gdYN+4GCoyqTupdorFgYeXQEAf6qzJkjyMl0x2ttCmk9uOOuu7knQPBBN2JE/xU4/Tlbm4WxDCuDrBWaZSht9L9vAS1KujL3qieWbryRezZXEmpep1c7WXhMR3+81AOm23uqW6/xSarCgo3fCxK4/MYs85WCFVRM=; sus_val=Z9e1ECx58ewCN2iplpsgxIl7',
        'dnt': '1',
        'logic': 'PART',
        'referer': 'https://search.shopping.naver.com/search/all?query=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC&frm=NVSHATC&prevQuery=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC',
        'sec-ch-ua': '"Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
    }

    params = {
        'sort': 'rel',
        'pagingIndex': '1',
        'pagingSize': '40',
        'viewType': 'list',
        'productSet': 'total',
        'deliveryFee': '',
        'deliveryTypeValue': '',
        'frm': 'NVSHATC',
        'query': '다이소 실내 슬리퍼',
        'origQuery': '다이소 실내 슬리퍼',
        'iq': '',
        'eq': '',
        'xq': '',
    }


    response = requests.get('https://search.shopping.naver.com/api/search/all', params=params, cookies=cookies, headers=headers)
    itemlist = json.loads(response.text)

    itemlist['shoppingResult']['products']
    
    print(f'-url주소: {response.url}', f'-상품수: {len(itemlist)}','' ,sep="\n")

    for i in itemlist['shoppingResult']['products']:
        title = i['productTitle']
        price = i['price'], 
        registerdate = i['openDate'], 

        print(title, price, registerdate)
       

if __name__ == "__main__":
    scrap()

 

실행하면 값이 잘 나온다

 

 

 

 

 

 

 

 

 

예제3,  조금 더 편의성을 고려해 보자

그런데 위처럼 하면 검색어가 바뀌면 매번 curlconverter.com에 들어가서 소스코드를 복사한 다음, 그것을 내 코드에 복사해 두고 수정을 해야 한다. 조금 번거롭네...

 

그래서 예제1번의 검색항목과 조건을 [리스트]로 만들어 둔 것을 활용해서 약간 손을 봐봅시다.

request.get 함수를 실행하면서 들어가는 headers / params을  #검색할 단어_ 라고 만들어둔 리스트에서 불러오는 것으로 수정해 보았다. 그럼 매번 변경하지 않고 searchText 항목에 검색어를 변경하면 빠르게 찾을 수 있다.

 

## 네이버 쇼핑 크롤링 예제3   * request /  curlconverter.com 사용

import json     #추가1
import urllib   #추가2
import requests


def scrap():
        
    #검색할 단어
    searchText = "다이소 실내 슬리퍼"
    startpage = "1"
    display = "40"
    byList = "total"
    sort = "rel"    #네이버가격순
    timestamp = ""
    viewType = "list"



    encText = urllib.parse.quote(searchText)
    searchOption = [encText, startpage, display, byList, encText, sort, timestamp, viewType, searchText]

    encTextforURL = encText + "&frm=NVSHATC&prevQuery=" + encText
    # '다이소 실내 슬리퍼&frm=NVSHATC&prevQuery=다이소 실내 슬리퍼'
    # urllib.parse.unquote("%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC&frm=NVSHATC&prevQuery=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC")



    # https://search.shopping.naver.com/search/all?
    #     frm=NVSHATC
    #     &origQuery=%ED%8E%B8%EB%B0%B1%EC%B0%9C%EA%B8%B0
    #     &pagingIndex=1
    #     &pagingSize=40
    #     &productSet=total
    #     &query=%ED%8E%B8%EB%B0%B1%EC%B0%9C%EA%B8%B0
    #     &sort=rel
    #     &timestamp=
    #     &viewType=list



    # url = ("https://search.shopping.naver.com/search/all?&frm=NVSHATC"
        # "https://search.shopping.naver.com/search/category?frm=NVSHOVS&origQuery&" 
        # "&origQuery=" + searchOption[0] +           # query: 검색 키워드
        # "&pagingIndex=" + searchOption[1] +         # 조회할 페이지번호
        # "&pagingSize=" + searchOption[2] +          # 페이지당 검색수
        # "&productSet=" +  searchOption[3] +         # 해외직구,overseas
        # "&query="     + searchOption[4] +           #
        # "&sort="      + searchOption[5] +           # sort=rel(네이버가격순)
        # "&timestamp=" + searchOption[6] +           # 
        # "&viewType=" + searchOption[7] )            # viewType=list(리스트보기)


    cookies = {
        'NNB': 'IRS72UA5CVSGE',
        'autocomplete': 'use',
        'AD_SHP_BID': '22',
        'nid_inf': '1664549890',
        'NID_JKL': 'oVX5xdcK9e5k2nd9c2ugMch8Emj80hDgw0XbSKVn4MM=',
        'NaverSuggestUse': 'unuse%26use',
        'nx_ssl': '2',
        'NID_AUT': 'Hmmps7vzk0b5qOfrEPlMPAfJNV5csZ3Wntjl6/2Frq/m4bvrcKg0zhDHbvkokuDS',
        '_ga': 'GA1.2.1224001448.1650726173',
        '_ga_7VKFYR6RV1': 'GS1.1.1659834690.13.0.1659834690.60',
        'ASID': '7db0ee2e000001827e28ca1800000058',
        'ncpa': '613264|l6rvnxz4|c02e828c89beb2f1bf6dc266cc77a490adf63021|s_21c5ebc27770|48d4834130422bba84845d1671ec03e82b3b0cac:1510716|l6rvp7i0|de9f40e3fecf79a7bbf72a06abdc6eac61e2dcab|s_c1ad54f15163|f339dce04935ed864c9d2500f8e901c903796663',
        'page_uid': 'hYPARdp0J1sssvlFkHdssssstdw-025477',
        'spage_uid': 'hYPARdp0J1sssvlFkHdssssstdw-025477',
        'NID_SES': 'AAABuW8YluCssMERON7C0avxrCx4me89rNSLb8/9ylgFZiLNGV1TUBflu+K9k6GTBzKdhZ+0LbMhQlUtw3uqPb2mi0r9ptpR+xtUGdhY62b1m5t7T/g7TS3U9IEqrE4lpvCpzhv7nLkb1808XtixttrNY/vzfrmVUtkKWsclu/5iJFzUm1I8ai2z8GGImmtr4NRoXrkFVlKIE/gPtf034leURsDywf941MmVMrIj8A2f8ZEgf7IXe7jgS17RyddHT7lFGfouoid+PT9aHGP/6OtNsUl2iXmIkHodZyToHLpy+oSoK3QnELRaBENRgvo31u+pxTmB6u7m6OuYwFjmDAi3WiZWD6AKIATHSWHnJ4S90rH2bnguvm8LueQSGvx//+iFHzjdObkn0RHjVFkRvoakaCPhmwdcqLKttraL/ei+ExsxH3fy6ujJTJ9gdYN+4GCoyqTupdorFgYeXQEAf6qzJkjyMl0x2ttCmk9uOOuu7knQPBBN2JE/xU4/Tlbm4WxDCuDrBWaZSht9L9vAS1KujL3qieWbryRezZXEmpep1c7WXhMR3+81AOm23uqW6/xSarCgo3fCxK4/MYs85WCFVRM=',
        'sus_val': 'Z9e1ECx58ewCN2iplpsgxIl7',
    }

    headers = {
        'authority': 'search.shopping.naver.com',
        'accept': 'application/json, text/plain, */*',
        'accept-language': 'en,ko;q=0.9,ko-KR;q=0.8',
        # Requests sorts cookies= alphabetically
        # 'cookie': 'NNB=IRS72UA5CVSGE; autocomplete=use; AD_SHP_BID=22; nid_inf=1664549890; NID_JKL=oVX5xdcK9e5k2nd9c2ugMch8Emj80hDgw0XbSKVn4MM=; NaverSuggestUse=unuse%26use; nx_ssl=2; NID_AUT=Hmmps7vzk0b5qOfrEPlMPAfJNV5csZ3Wntjl6/2Frq/m4bvrcKg0zhDHbvkokuDS; _ga=GA1.2.1224001448.1650726173; _ga_7VKFYR6RV1=GS1.1.1659834690.13.0.1659834690.60; ASID=7db0ee2e000001827e28ca1800000058; ncpa=613264|l6rvnxz4|c02e828c89beb2f1bf6dc266cc77a490adf63021|s_21c5ebc27770|48d4834130422bba84845d1671ec03e82b3b0cac:1510716|l6rvp7i0|de9f40e3fecf79a7bbf72a06abdc6eac61e2dcab|s_c1ad54f15163|f339dce04935ed864c9d2500f8e901c903796663; page_uid=hYPARdp0J1sssvlFkHdssssstdw-025477; spage_uid=hYPARdp0J1sssvlFkHdssssstdw-025477; NID_SES=AAABuW8YluCssMERON7C0avxrCx4me89rNSLb8/9ylgFZiLNGV1TUBflu+K9k6GTBzKdhZ+0LbMhQlUtw3uqPb2mi0r9ptpR+xtUGdhY62b1m5t7T/g7TS3U9IEqrE4lpvCpzhv7nLkb1808XtixttrNY/vzfrmVUtkKWsclu/5iJFzUm1I8ai2z8GGImmtr4NRoXrkFVlKIE/gPtf034leURsDywf941MmVMrIj8A2f8ZEgf7IXe7jgS17RyddHT7lFGfouoid+PT9aHGP/6OtNsUl2iXmIkHodZyToHLpy+oSoK3QnELRaBENRgvo31u+pxTmB6u7m6OuYwFjmDAi3WiZWD6AKIATHSWHnJ4S90rH2bnguvm8LueQSGvx//+iFHzjdObkn0RHjVFkRvoakaCPhmwdcqLKttraL/ei+ExsxH3fy6ujJTJ9gdYN+4GCoyqTupdorFgYeXQEAf6qzJkjyMl0x2ttCmk9uOOuu7knQPBBN2JE/xU4/Tlbm4WxDCuDrBWaZSht9L9vAS1KujL3qieWbryRezZXEmpep1c7WXhMR3+81AOm23uqW6/xSarCgo3fCxK4/MYs85WCFVRM=; sus_val=Z9e1ECx58ewCN2iplpsgxIl7',
        'dnt': searchOption[1],
        'logic': 'PART',
        # 'referer': 'https://search.shopping.naver.com/search/all?query=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC&frm=NVSHATC&prevQuery=%EB%8B%A4%EC%9D%B4%EC%86%8C%20%EC%8B%A4%EB%82%B4%20%EC%8A%AC%EB%A6%AC%ED%8D%BC',
        'referer': 'https://search.shopping.naver.com/search/all?query='+ encTextforURL,
        'sec-ch-ua': '"Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
    }

    params = {
        'sort': searchOption[5],
        'pagingIndex': searchOption[1],
        'pagingSize': searchOption[2],
        'viewType': searchOption[7],
        'productSet': 'total',
        'deliveryFee': '',
        'deliveryTypeValue': '',
        'frm': 'NVSHATC',
        'query': searchOption[8],
        'origQuery': searchOption[8],
        'iq': '',
        'eq': '',
        'xq': '',
        }


    response = requests.get('https://search.shopping.naver.com/api/search/all', params=params, cookies=cookies, headers=headers)
    itemlist = json.loads(response.text)
    # print(itemlist)
    itemlist['shoppingResult']['products']
   
    print(f'-url주소: {response.url}', f'-상품수: {len(itemlist)}','' ,sep="\n")
    
    for i in itemlist['shoppingResult']['products']:
        title = i['productTitle']
        price = i['price'], 
        registerdate = i['openDate'], 

        print(title, price, registerdate)

if __name__ == "__main__":
    scrap()

 

 

 

 

 

 

 

_

 
반응형