728x90

preprocessing.ipynb

- finaldata.pkl : shape (3690789, 144)

EXCC_CONF_DE
USE_DE
RECHCT_EXCC_ORG_REG_YN
TRSC_EVDC_SE_CD
TRSC_EVDC_NO
TRSC_PFMC_REG_DT
TRSC_PFMC_CNCL_DT
TRSC_PFMC_STT_CD
CNCL_FORM_SE_CD
CNCL_BEF_TRSC_PFMC_STT_CD
TRSC_DE
SPLY_AMT_DFFR_RSN_CN
VAT_HDLG_SE_CD
SPLR_BCMP_NM
SPLR_BSNSR_REG_NO
SPLR_BSTP_NM
SPLR_BUCDT_NM
SPLR_ADDR
SPLR_BSNS_REG_STT_CD
SPLR_TAXT_TP_SE_CD
CLSBS_SPLR_EXCT_RSN_CN
RECHCT_EXCT_ID
ANNL
AGRT_ORGN_ID
GVCTM_PTTN_PAY_YN
TTL_BSNS_AMT
GOV_CTRB_AMT
UPAY_GOV_CTRB_AMT
MM_TRSF_BNDS_AMT
LCGVN_ALOT_CASH_AMT
LCGVN_ALOT_SPOT_AMT
PRVT_ALOT_CASH_AMT
PRVT_ALOT_SPOT_AMT
AGRT_VLID_YN
EXCT_LMT_YN
NAT_RND_BSNS_YN
DVLM_DTRS_YN
MRS_DTRS_YN
AGRT_ORGN_ROLE_SE_CD
ORGN_TTL_BSNS_AMT
AGRT_BNDS_AMT
ODY_TRSF_BNDS_AMT
DBT_TRSF_BNDS_AMT
SPOT_CROV_AMT
PRYY_CASH_CROV_AMT
PRYY_SPOT_CROV_AMT
TEMP_MM_TRSF_BNDS_AMT
TEMP_ODY_TRSF_BNDS_AMT
TEMP_DBT_TRSF_BNDS_AMT
PRVT_ALOT_AMT_BEF_YN
AGRT_ORGN_CHNG_SE_CD
BEXP_DVS_EXCT_YN
LCGVN_PAYM_WAY_SE_CD
EXCT_LMT_AMT
CRMT_ALLW_YN
TESSR_ECPT_SE_CD
SBJT_KIND_SE_CD
SBJT_SE_CD
CCLT_TFEE_AMT
TECL_CD
HIRK_RECHCT_USE_ITEPD_YN
PTC_ITEPD_CD
DPTC_ITEPD_CD_x
CASH_SPOT_SE_CD
USE_SPLY_AMT
USE_VAT_AMT
USE_CROV_SPLY_AMT
USE_CROV_VAT_AMT
EVDC_PPS_ATCH_DOC_ID
VAT_HDLG_RSTO_AMT
RSTO_CONF_SPLY_AMT
RSTO_CONF_VAT_AMT
RSTO_CONF_CROV_SPLY_AMT
RSTO_CONF_CROV_VAT_AMT
ORGN_CNLK_DTRS_YN
ETC_EVDC_ECTN_CD
ETC_EVDC_ECTN_RSN_CN
USE_AMT_SE_CD
AST_AMT
CRAST_AMT
NCAS_AMT
DEBT_AMT
CRLBT_AMT
NCL_AMT
CPTL_TTL_SUM_AMT
CPTL_AMT
SALE_AMT
SALE_TTL_GAIN_AMT
OPRFT_AMT
ROS_AMT
CEOS_AMT
COTA_COST_DEDU_BEF_NPRFT_AMT
COTA_COST_AMT
TTM_NPRFT_AMT
EQMT_UNQ_NO
CNTR_STT_CD
EQMT_SE_CD
ACQS_DE
ACQS_AMT
CEDC_YN
EQMT_REG_AMT
REGTM_ACQS_AMT
RECHCT_USE_ITEPD_ID
RECHCT_USE_TRSC_PFMC_ID
AGORG_BZEXC_ID
DSAPVL_HDLG_STT_CD
DSAPVL_RSN_SE_CD
DSAPVL_RSN_DTL_CN
LCOST_DTL_SN
PAY_AMT
LCOST_PAY_SE_CD
LCOST_OBJT_SE_CD
PAY_AGRT_ORGN_ID
PAY_OBJT_PTCPR_ID
PAY_OBJT_USER_ID
PAY_OBJT_RCHSP_SN
TXIVC_APRB_NO
ISSU_DE
WRT_DE
SPPNR_BSNSR_REG_NO
SPPNR_BSTP_NM
SPPNR_BUCDT_NM
SPPNR_ZIP_NO
SPPNR_ADDR
SUM_AMT
ART_REG_IT
FSU_AMT
FAT_AMT
FCO_AMT
FEC_AMT
FPU_SUM_AMT
FRCS_TAXT_TP_INFO_CN
FRCS_ADDR
USE_DT
TXMT
SECH_AMT
DDC_YN_DCSN_SE_CD
CHAC_TRSF_RQS_SN
DPTC_ITEPD_CD_y
CHAC_TRSF_RQS_DTIT_SN
RQS_AMT
ART_NM
EVDC_DOC_ID
BSNSR_REG_NO

- df=dummies(df, "DSAPVL_RSN_SE_CD") :  159열로 확장

data collection.ipynb

- mergedata.pkl 을 EXCC_CONF_DE>='20160101' 해서 shape (3172234, 247)

df = pd.merge(data, eqpt, how = 'left', on="RECHCT_USE_ITEPD_ID")

df.shape = (3178356, 279)

lcost.shape = (2110515 ,27) // lcost가 뭔데?(data1~data33)

df = pd.merge(df, lcost, how = 'left', on="RECHCT_USE_ITEPD_ID")

Tdata를 templtdata.pkl로 떨굼 // Tdata가 뭔데?(T1~T22)

cdata를 templcdata.pkl 로 떨굼 // cdata가 뭔데?(C1~C44)

rdata = pd.read_excel('/home/grusls/backupdata/rawdata/R.xls')

rdata를 templrdata.pkl로 떨굼

ddata = pd.read_excel('/home/grusls/backupdata/rawdata/DD.xls')

ddata를 templddata.pkl로 떨굼

df = pd.merge(df, Tdata, how = 'left', on="RECHCT_USE_TRSC_PFMC_ID")

df = pd.merge(df, cdata, how = 'left', on="RECHCT_USE_TRSC_PFMC_ID")

orgdataorgdata = pd.read_excel('/home/grusls/backupdata/rawdata/BSNSRNO.xls')

orgdata = orgdata.rename(columns = {"ORGN_ID" : "AGRT_ORGN_ID"})

orgdata를 temporg.pkl 로 떨굼

df.shape = (3690375, 378)

df = pd.merge(df, rdata, how = 'left', on="RECHCT_USE_TRSC_PFMC_ID")

df.shape = (3690375, 396)

df = pd.merge(df, ddata, how = 'left', on="RECHCT_USE_TRSC_PFMC_ID")

df = pd.merge(df, orgdata, how = 'left', on="AGRT_ORGN_ID")

df를 finaldata.pkl로 떨굼

test notebook.ipynb

 

 

model_others.json

model_others_test.json

model_VAT.json

model_VAT_test.json

 

 

 

 

728x90

+ Recent posts