오늘의 영어단어 업로드 프로그램

Programming/Project

오늘의 영어단어 업로드 프로그램

khj1999 2021. 8. 18. 11:35

요즘 네이버의 오늘의 영어단어라는 서비스를 통해 매일 영어 단어를 5 개식 외우고 있는데
매일 수기로 공책에 받아적고 공부를 하다 보니 이걸 프로그래밍으로 만들어서 자동으로 데이터를 긁어와서
내 블로그에 업로드를 해주면 일일히 매일 검색할 필요도 없고 나름 인생 첫 프로젝트도 시작을 해봐야 할 거 같아서
시작하게 되었다.

처음 무작정 파이썬의 BeautifulSoup를 사용해서 데이터를 긁어왔는데 필요없는 부분까지 같이 긁어 와서
데이터를 처리해줘야 했다.

import requests
from bs4 import BeautifulSoup

def get_data(before_data): # 영어 단어 가져오기
    tmp = list(before_data)
    for i in range(len(tmp)-1):
        if tmp[i].encode().isalpha() and tmp[i-1] == ' ':
            tmp[i-1] = '<p data-ke-size="size16">'
        if tmp[i].encode().isalpha() and not tmp[i+1].encode().isalpha():
            st = tmp[i]
            st += '&nbsp;  -&nbsp;  '
            tmp[i] = st
    After_data = ''.join(tmp)
    return After_data

url = 'https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=%EC%98%A4%EB%8A%98%EC%9D%98+%EB%8B%A8%EC%96%B4&oquery=%EC%98%A4%EB%8A%98%EC%9D%98+%EC%98%81%EC%96%B4%EB%8B%A8%EC%96%B4&tqi=hecBksprvxsssj%2FGJxGssssstgC-317616'
response = requests.get(url)
    
html = response.text
soup = BeautifulSoup(html, 'html.parser')
data = soup.select_one('#_languageStudy > div > div.dicstudy_area > div.dicstudy_cont > div > div > ul').get_text().replace('발음듣기','')
data = get_data(data)

가져온 데이터를 get_text()함수를 사용해서 텍스트만 긁어 온 다음 '발음 듣기'라는 문자열을 공백으로 치환해주었다.

그런 다음 문자열을 파싱 해주었는데 보면 알 수 있듯이 티스토리는 글을 작성할 때 html로 작성되기 때문에
글이 작성되는 방식을 보고 그에 맞춰서 데이터를 바꿔 주었다

사실 여기까지는 쉬웠다 그런데 문제는 내가 API라는걸 한 번도 사용해본 적이 없기 때문에 여기서 조금 막혔었다
내가 운영하는 블로그는 tistory이니 tistory API를 사용해야해서 App ID와 Secret Key를 획득했다.
그리고 Access Token을 획득한 다음 tistory API 가이드에 따라 코드를 작성해주었다.

import requests
from bs4 import BeautifulSoup
    
    def posting(content, title, access_token, blogName, category):
        tistory_url = 'https://www.tistory.com/apis/post/write?'
        parameters = {
            'access_token' : access_token,
            'output' : 'txt',
            'blogName' : blogName,
            'title' : title,
            'content' : content,
            'visibility' : '0',
            'category' : category,
        }
        result = requests.post(tistory_url, params=parameters)
        result = BeautifulSoup(result.text)
        print(result.prettify())

각각의 정보를 양식에 맞게 대입 해준다음 포스팅을 해준다

https://tistory.github.io/document-tistory-apis/

소개 · GitBook

No results matching ""

tistory.github.io

여기서 titlte에는 time을 import해서 언제 공부한 단어인지를 알려줄 수 있게 날짜를 넣어준다

import time

def get_title(): # YYYY--MM-DD 제목
    title = time.strftime('%Y  -  %m  -  %d', time.localtime(time.time()))
    title += '일 오늘의 영어단어'
    return title

이다음 코드는 작성이 끝났으니 GUI로 Access Token과 BlogName, Category정보를 입력하면
글을 작성할수 있게 GUI를 만들어 주었다.

나는 GUI를 사용해본 적이 없어서 이 부분에서 제일 많은 시간을 소비하였다.

from tkinter import *

root = Tk()
root.title("Today's word")
root.geometry('360x100')
root.resizable(False,False)
# Access Token 
AT_Label = Label(root, text = 'Access Token', width = 13)
AT_Label.grid(row = 0, column = 0)
AT_insert = Entry(root, width = 35)
AT_insert.grid(row = 0, column = 1)

# Blog Name
BN_Label = Label(root, text ='Blog Name', width = 13)
BN_Label.grid(row = 2, column = 0)
BN_insert = Entry(root, width = 35)
BN_insert.grid(row = 2, column = 1)

# category
CT_Label = Label(root, text = 'Category', width = 13)
CT_Label.grid(row = 4, column = 0)
CT_insert = Entry(root, width = 35)
CT_insert.grid(row = 4, column = 1)

Run_Btn = Button(root, text = 'Run', command = Upload)
Run_Btn.grid(row = 6, column = 1, sticky = N+W+E+S)

root.mainloop()

참고로 mainloop()는 코드의 마지막에 작성해주어야 한다

이것을 조합해서 완성하면 이번 프로젝트의 끝이라고 할 수 있다

import requests
import time
from bs4 import BeautifulSoup
from tkinter import *

def get_title(): # YYYY--MM-DD 제목
    title = time.strftime('%Y  -  %m  -  %d', time.localtime(time.time()))
    title += '일 오늘의 영어단어'
    return title

def get_data(before_data): # 영어 단어 가져오기
    tmp = list(before_data)
    for i in range(len(tmp)-1):
        if tmp[i].encode().isalpha() and tmp[i-1] == ' ':
            tmp[i-1] = '<p data-ke-size="size16">'
        if tmp[i].encode().isalpha() and not tmp[i+1].encode().isalpha():
            st = tmp[i]
            st += '&nbsp;  -&nbsp;  '
            tmp[i] = st
    After_data = ''.join(tmp)
    return After_data

def RUN():
    def Upload():
        url = 'https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=%EC%98%A4%EB%8A%98%EC%9D%98+%EB%8B%A8%EC%96%B4&oquery=%EC%98%A4%EB%8A%98%EC%9D%98+%EC%98%81%EC%96%B4%EB%8B%A8%EC%96%B4&tqi=hecBksprvxsssj%2FGJxGssssstgC-317616'
        response = requests.get(url)
        if response.status_code == 200: 
            html = response.text
            soup = BeautifulSoup(html, 'html.parser')
            data = soup.select_one('#_languageStudy > div > div.dicstudy_area > div.dicstudy_cont > div > div > ul').get_text().replace('발음듣기','')
            posting(get_data(data),get_title(),AT_insert.get(), BN_insert.get(), CT_insert.get())
        else : 
            print(response.status_code)

    def posting(content, title, access_token, blogName, category):
        tistory_url = 'https://www.tistory.com/apis/post/write?'
        parameters = {
            'access_token' : access_token,
            'output' : 'txt',
            'blogName' : blogName,
            'title' : title,
            'content' : content,
            'visibility' : '0',
            'category' : category,
        }
        result = requests.post(tistory_url, params=parameters)
        result = BeautifulSoup(result.text)
        print(result.prettify())

    root = Tk()
    root.title("Today's word")
    root.geometry('360x100')
    root.resizable(False,False)
    # Access Token 
    AT_Label = Label(root, text = 'Access Token', width = 13)
    AT_Label.grid(row = 0, column = 0)
    AT_insert = Entry(root, width = 35)
    AT_insert.grid(row = 0, column = 1)

    # Blog Name
    BN_Label = Label(root, text ='Blog Name', width = 13)
    BN_Label.grid(row = 2, column = 0)
    BN_insert = Entry(root, width = 35)
    BN_insert.grid(row = 2, column = 1)

    # category
    CT_Label = Label(root, text = 'Category', width = 13)
    CT_Label.grid(row = 4, column = 0)
    CT_insert = Entry(root, width = 35)
    CT_insert.grid(row = 4, column = 1)

    Run_Btn = Button(root, text = 'Run', command = Upload)
    Run_Btn.grid(row = 6, column = 1, sticky = N+W+E+S)

    root.mainloop()
    
RUN()

GUI 부분이 마음에 들지는 않지만 이 정도면 못 봐줄 정도는 아니라고 생각해서 프로젝트를 완료했다.

마지막으로 exe파일로 만들었는데 왜인지 윈도우 디펜더에서는 이걸 트로이 백신으로 인식해서 계속 지워버린다
해결 방법이 있을꺼 같은데 지금 당장 중요한건 아니니 다음에 기회가 되면 하자고 생각했다