본문 바로가기
카테고리 없음

DART_to_HWP(jupyter notebook test)

by 회사원코딩 2020. 4. 4.
반응형

 

 

In [17]:
from time import sleep

import pyperclip as cb
import pandas as pd
from selenium import webdriver
import win32com.client as win32
In [18]:
driver = webdriver.Chrome(r"./chromedriver.exe")
driver.get('http://dart.fss.or.kr/dsac001/mainAll.do')
In [20]:
# onclick_list = []
# for i in driver.find_elements_by_css_selector('a[href^="/dsaf001/main.do?rcpNo="]'):
#     onclick_list.append(i.get_attribute('onclick'))

onclick_list = [
    i.get_attribute('onclick') for i in driver.find_elements_by_css_selector('a[href^="/dsaf001/main.do?rcpNo="]')]
onclick_list
Out[20]:
["openReportViewer('20191025000531'); return false;",
 "openReportViewer('20191025000530'); return false;",
 "openReportViewer('20191025000529'); return false;",
 "openReportViewer('20191025000528'); return false;",
 "openReportViewer('20191025900590'); return false;",
 "openReportViewer('20191025000511'); return false;",
 "openReportViewer('20191025000509'); return false;",
 "openReportViewer('20191025000508'); return false;",
 "openReportViewer('20191025000507'); return false;",
 "openReportViewer('20191025900561'); return false;",
 "openReportViewer('20191025000500'); return false;",
 "openReportViewer('20191025900563'); return false;",
 "openReportViewer('20191025000503'); return false;",
 "openReportViewer('20191025000504'); return false;",
 "openReportViewer('20191025900559'); return false;",
 "openReportViewer('20191025000524'); return false;",
 "openReportViewer('20191025000502'); return false;",
 "openReportViewer('20191025000501'); return false;",
 "openReportViewer('20191025000499'); return false;",
 "openReportViewer('20191025900553'); return false;",
 "openReportViewer('20191025000498'); return false;",
 "openReportViewer('20191025900550'); return false;",
 "openReportViewer('20191025000484'); return false;",
 "openReportViewer('20191018000061'); return false;",
 "openReportViewer('20191018000132'); return false;",
 "openReportViewer('20191025900547'); return false;",
 "openReportViewer('20191025000472'); return false;",
 "openReportViewer('20191025000479'); return false;",
 "openReportViewer('20191025900539'); return false;",
 "openReportViewer('20191025000495'); return false;",
 "openReportViewer('20191025000494'); return false;",
 "openReportViewer('20191025000493'); return false;",
 "openReportViewer('20191025800540'); return false;",
 "openReportViewer('20191025000487'); return false;",
 "openReportViewer('20191025900534'); return false;",
 "openReportViewer('20191025600530'); return false;",
 "openReportViewer('20191025800535'); return false;",
 "openReportViewer('20191025000474'); return false;",
 "openReportViewer('20191025000481'); return false;",
 "openReportViewer('20191025000480'); return false;",
 "openReportViewer('20191025800531'); return false;",
 "openReportViewer('20191025000477'); return false;",
 "openReportViewer('20191025900527'); return false;",
 "openReportViewer('20191025800525'); return false;",
 "openReportViewer('20191025000466'); return false;",
 "openReportViewer('20191025900514'); return false;",
 "openReportViewer('20191025000462'); return false;",
 "openReportViewer('20191025900517'); return false;",
 "openReportViewer('20191025900515'); return false;",
 "openReportViewer('20191025800523'); return false;",
 "openReportViewer('20191025900518'); return false;",
 "openReportViewer('20191025000470'); return false;",
 "openReportViewer('20191025900484'); return false;",
 "openReportViewer('20191025900509'); return false;",
 "openReportViewer('20191025800510'); return false;",
 "openReportViewer('20191025800489'); return false;",
 "openReportViewer('20191025800503'); return false;",
 "openReportViewer('20191025800482'); return false;",
 "openReportViewer('20191025800505'); return false;",
 "openReportViewer('20191025000459'); return false;",
 "openReportViewer('20191025000458'); return false;",
 "openReportViewer('20191025800497'); return false;",
 "openReportViewer('20191025800499'); return false;",
 "openReportViewer('20191025800495'); return false;",
 "openReportViewer('20191025900478'); return false;",
 "openReportViewer('20191025900459'); return false;",
 "openReportViewer('20191025000451'); return false;",
 "openReportViewer('20191025000450'); return false;",
 "openReportViewer('20191025000446'); return false;",
 "openReportViewer('20191025000443'); return false;",
 "openReportViewer('20191025800488'); return false;",
 "openReportViewer('20191025800486'); return false;",
 "openReportViewer('20191025000444'); return false;",
 "openReportViewer('20191025900475'); return false;",
 "openReportViewer('20191025800476'); return false;",
 "openReportViewer('20191025900472'); return false;",
 "openReportViewer('20191025000435'); return false;",
 "openReportViewer('20191025000431'); return false;",
 "openReportViewer('20191025000423'); return false;",
 "openReportViewer('20191025000439'); return false;",
 "openReportViewer('20191025000437'); return false;",
 "openReportViewer('20191025000436'); return false;",
 "openReportViewer('20191025900461'); return false;",
 "openReportViewer('20191025800454'); return false;",
 "openReportViewer('20191025900456'); return false;",
 "openReportViewer('20191025000433'); return false;",
 "openReportViewer('20191025000432'); return false;",
 "openReportViewer('20191025000430'); return false;",
 "openReportViewer('20191025000429'); return false;",
 "openReportViewer('20191025000428'); return false;",
 "openReportViewer('20191025000426'); return false;",
 "openReportViewer('20191025900455'); return false;",
 "openReportViewer('20191025800457'); return false;",
 "openReportViewer('20191025000399'); return false;",
 "openReportViewer('20191025900450'); return false;",
 "openReportViewer('20191025000421'); return false;",
 "openReportViewer('20191025000422'); return false;",
 "openReportViewer('20191025000420'); return false;",
 "openReportViewer('20191025000398'); return false;",
 "openReportViewer('20191025900430'); return false;"]
In [21]:
link_list = []

for i in onclick_list[:5]:
    driver.execute_script(i)
    driver.switch_to.window(driver.window_handles[1])
    driver.execute_script(driver.find_element_by_css_selector('a[href="#download"]').get_attribute('onclick'))
    driver.switch_to.window(driver.window_handles[2])
    current_len = len(link_list)
    link_list.append(driver.find_element_by_css_selector('a[href^="/pdf"]').get_attribute('href'))
    
    
    while True:
        if len(link_list) != current_len:
            break
        else:
            sleep(0.1)
    print(link_list[-1])
    
    
    driver.close()
    driver.switch_to.window(driver.window_handles[1])
    driver.close()
    driver.switch_to.window(driver.window_handles[0])
 
http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000531&dcm_no=6937294
http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000530&dcm_no=6937291
http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000529&dcm_no=6937288
http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000528&dcm_no=6937285
http://dart.fss.or.kr/pdf/download/zip.do?rcp_no=20191025900590&dcm_no=6937277
In [22]:
# 너무 오래 걸리는 관계로...
import pickle
with open(r"C:\Users\smj02\Desktop\hwp-파이썬관련코드\link_list.pickle", 'rb') as f:
    link_list = pickle.load(f)
In [24]:
link_list[:10]
Out[24]:
['http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000531&dcm_no=6937294',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000530&dcm_no=6937291',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000529&dcm_no=6937288',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000528&dcm_no=6937285',
 'http://dart.fss.or.kr/pdf/download/zip.do?rcp_no=20191025900590&dcm_no=6937277',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000511&dcm_no=6937245',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000509&dcm_no=6937236',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000508&dcm_no=6937231',
 'http://dart.fss.or.kr/pdf/download/pdf.do?rcp_no=20191025000507&dcm_no=6937228',
 'http://dart.fss.or.kr/pdf/download/zip.do?rcp_no=20191025900561&dcm_no=6937227']
In [31]:
# driver.page_source
# table = pd.read_html(driver.page_source)
table = pd.read_html(driver.page_source)[0]

driver.close()
In [32]:
table.head()
Out[32]:
  시간 공시대상회사 보고서명 제출인 접수일자 비고
0 18:51 와이디온라인 [기재정정]주요사항보고서(유상증자결정) 와이디온라인 2019.10.28 NaN
1 18:48 와이디온라인 [기재정정]주요사항보고서(감자결정) 와이디온라인 2019.10.28 NaN
2 18:46 와이디온라인 [기재정정]주요사항보고서(유상증자결정) 와이디온라인 2019.10.28 NaN
3 18:42 와이디온라인 [기재정정]주요사항보고서(감자결정) 와이디온라인 2019.10.28 NaN
4 18:34 와이디온라인 [기재정정]회생계획인가 와이디온라인 2019.10.25 NaN
In [33]:
len(table)
Out[33]:
100
In [34]:
len(table.columns)
Out[34]:
6
In [35]:
hwp = win32.Dispatch('HWPFrame.HwpObject')
hwp.RegisterModule("FilePathCheckDLL", "SecurityModule")  # 보안모듈
Out[35]:
True
In [36]:
hwp.Open(r"C:\Users\smj02\Desktop\dart_template.hwp")
Out[36]:
True
In [38]:
def hwp_insert_hyperlink(text, url):
    hwp.HAction.Run("TableCellBlock")
    hwp.HAction.GetDefault("InsertHyperlink", hwp.HParameterSet.HHyperLink.HSet)
    hwp.HParameterSet.HHyperLink.Text = text
    hwp.HParameterSet.HHyperLink.Command = url
    hwp.HAction.Execute("InsertHyperlink", hwp.HParameterSet.HHyperLink.HSet)
In [37]:
def hwp_insert_text(text):
    hwp.HAction.GetDefault("InsertText", hwp.HParameterSet.HInsertText.HSet)
    hwp.HParameterSet.HInsertText.Text = text
    hwp.HAction.Execute("InsertText", hwp.HParameterSet.HInsertText.HSet)
In [ ]:
# for i in len(table):
#     for j in table.loc[i]:
#         cb.copy(j)
#         hwp.Run('Paste')
#         hwp.Run('TableRightCellAppend')
#     hwp.Run('TableAppendRow')
In [39]:
# 좀 더 다듬어보면,
for i in range(len(table)):
    for idx, text in enumerate(table.loc[i]):
        if idx == 0 and i == 0:
            hwp_insert_text(text)
        elif idx == 2:  # 보고서명 칼럼, 하이퍼링크를 삽입해야 한다.
            hwp.Run('TableRightCellAppend')
            hwp_insert_hyperlink(text=text, url='{};1;0;0'.format(link_list[i].replace("?","\\?")))
        elif idx == 5:  # 오른쪽 끝 비고란, 전부 Nan이므로 아무것도 입력하지 않고 넘어간다.
            hwp.Run('TableRightCellAppend')
        else:  # 그 외에는 텍스트를 그대로 입력한다.
            hwp.Run('TableRightCellAppend')
            hwp_insert_text(text)
In [ ]:
 

 

 

 

 


회사원코딩이 추천하는 오늘자 파이썬 입문교재 3종

 

1. 모두의 파이썬 X 알고리즘(합본호):누구나 쉽게 배우는 프로그래밍 기초

 

모두의 파이썬 X 알고리즘(합본호):누구나 쉽게 배우는 프로그래밍 기초

COUPANG

www.coupang.com

파이썬으로 프로그래밍에 처음 입문하기엔 아주 재미있고 쉽게 배울 수 있음.
다른언어 사용자가 파이썬을 배우는 교재로는 비추천. 다소 가벼운 느낌.

 

2. 초보자를 위한 파이썬 200제

 

초보자를 위한 파이썬 200제

COUPANG

www.coupang.com

깔끔한 예제와 소스에 대한 자세한 설명이 돋보이는 교재.
입문-초급-중급-고급 난이도로 단계별 도전이 가능한 꽤 괜찮은 입문서.

 

3. 혼자 공부하는 첫 프로그래밍 with 파이썬:1:1 과외하듯 배우는 왕초보 코딩 입문서

 

혼자 공부하는 첫 프로그래밍 with 파이썬:1:1 과외하듯 배우는 왕초보 코딩 입문서

COUPANG

www.coupang.com

초유의 베스트셀러 "혼공파"마저도 당신에게 어렵게 느껴진다면?
유튜브 무료강의, 온라인실습과 다양한 이미지를 통해
제목대로 "1:1과외하듯" 파이썬을 배워볼 수 있음.

반응형

댓글1