@@ -339,7 +339,6 @@ def get_pop(
339339 infmort_rates = None ,
340340 imm_rates = None ,
341341 initial_pop = None ,
342- pre_pop_dist = None ,
343342 country_id = UN_COUNTRY_CODE ,
344343 start_year = START_YEAR ,
345344 end_year = END_YEAR ,
@@ -368,42 +367,18 @@ def get_pop(
368367 and model age
369368 initial_pop_data (Pandas DataFrame): initial population data
370369 for the first year of model calibration (start_year)
371- pre_pop_dist (Numpy array): population distribution for the year
372- before the initial year for calibration
373370 country_id (str): country id for UN data
374371 start_year (int): start year data
375372 end_year (int): end year for data
376373 download_path (str): path to save population distribution data
377374
378375 Returns:
379376 pop_2D (Numpy array): population distribution over T0 periods
380- pre_pop (Numpy array): population distribution one year before
381- initial year for calibration of omega_S_preTP
382377 """
383378 # Generate time path of the nonstationary population distribution
384379 # Get path up to end of data year
385380 pop_2D = np .zeros ((end_year + 2 - start_year , E + S ))
386381 if infer_pop :
387- if pre_pop_dist is None :
388- pre_pop_data = get_un_data (
389- "47" ,
390- country_id = country_id ,
391- start_year = start_year ,
392- end_year = start_year ,
393- )
394- if download_path :
395- pre_pop_data .to_csv (
396- os .path .join (download_path , "raw_pre_pop_data_UN.csv" ),
397- index = False ,
398- )
399- pre_pop_sample = pre_pop_data [
400- (pre_pop_data ["age" ] >= min_age )
401- & (pre_pop_data ["age" ] <= max_age )
402- ]
403- pre_pop = pre_pop_sample .value .values
404- pre_pop_dist = pop_rebin (pre_pop , E + S )
405- else :
406- pre_pop = pre_pop_dist
407382 if initial_pop is None :
408383 initial_pop_data = get_un_data (
409384 "47" ,
@@ -458,38 +433,17 @@ def get_pop(
458433 pop = pop_data_sample .value .values
459434 # Generate the current population distribution given that E+S might
460435 # be less than max_age-min_age+1
461- # age_per_EpS = np.arange(1, E + S + 1)
462436 pop_EpS = pop_rebin (pop , E + S )
463437 pop_2D [y - start_year , :] = pop_EpS
464438
465- # get population distribution one year before initial year for
466- # calibration of omega_S_preTP
467- pre_pop_data = get_un_data (
468- "47" ,
469- country_id = country_id ,
470- start_year = start_year - 1 ,
471- end_year = start_year - 1 ,
472- )
473- pre_pop_sample = pre_pop_data [
474- (pre_pop_data ["age" ] >= min_age ) & (pre_pop_data ["age" ] <= max_age )
475- ]
476- pre_pop = pre_pop_sample .value .values
477-
478439 if download_path :
479440 np .savetxt (
480441 os .path .join (download_path , "population_distribution.csv" ),
481442 pop_2D ,
482443 delimiter = "," ,
483444 )
484- np .savetxt (
485- os .path .join (
486- download_path , "pre_period_population_distribution.csv"
487- ),
488- pre_pop ,
489- delimiter = "," ,
490- )
491445
492- return pop_2D , pre_pop
446+ return pop_2D
493447
494448
495449def pop_rebin (curr_pop_dist , totpers_new ):
@@ -715,7 +669,6 @@ def get_pop_objs(
715669 imm_rates = None ,
716670 infer_pop = False ,
717671 pop_dist = None ,
718- pre_pop_dist = None ,
719672 country_id = UN_COUNTRY_CODE ,
720673 initial_data_year = START_YEAR - 1 ,
721674 final_data_year = START_YEAR + 2 ,
@@ -746,9 +699,6 @@ def get_pop_objs(
746699 infer_pop (bool): =True if want to infer the population
747700 pop_dist (array_like): user provided population distribution,
748701 dimensions are T0+1 x E+S
749- pre_pop_dist (array_like): user provided population distribution
750- for the year before the initial year for calibration,
751- length E+S
752702 country_id (str): country id for UN data
753703 initial_data_year (int): initial year of data to use
754704 (not relevant if have user provided data)
@@ -880,7 +830,7 @@ def get_pop_objs(
880830 initial_pop = pop_dist [0 , :].reshape (1 , pop_dist .shape [- 1 ])
881831 else :
882832 initial_pop = None
883- pop_2D , pre_pop = get_pop (
833+ pop_2D = get_pop (
884834 E ,
885835 S ,
886836 min_age ,
@@ -891,14 +841,13 @@ def get_pop_objs(
891841 infmort_rates ,
892842 imm_rates ,
893843 initial_pop ,
894- pre_pop_dist ,
895844 country_id ,
896- initial_data_year , # TODO: should this be start_data_year?
845+ initial_data_year ,
897846 final_data_year ,
898847 download_path = download_path ,
899848 )
900849 else :
901- pop_2D , pre_pop = get_pop (
850+ pop_2D = get_pop (
902851 E ,
903852 S ,
904853 min_age ,
@@ -910,21 +859,14 @@ def get_pop_objs(
910859 )
911860 else :
912861 # Check first dims of pop_dist as input by user
913- print ("T0 = " , T0 )
914862 assert pop_dist .shape [0 ] == T0 + 1 # population needs to be
915863 # one year longer in order to find immigration rates
916864 assert pop_dist .shape [- 1 ] == E + S
917- # Check that pre_pop specified
918- assert pre_pop_dist is not None
919- assert pre_pop_dist .shape [0 ] == pop_dist .shape [1 ]
920- pre_pop = pre_pop_dist
921865 # Create 2D array of population distribution
922866 pop_2D = np .zeros ((T0 + 1 , E + S ))
923867 for t in range (T0 + 1 ):
924868 pop_EpS = pop_rebin (pop_dist [t , :], E + S )
925869 pop_2D [t , :] = pop_EpS
926- # Get percentage distribution for S periods for pre-TP period
927- pre_pop_EpS = pop_rebin (pre_pop , E + S )
928870 # Get immigration rates if not provided
929871 if imm_rates is None :
930872 imm_rates_orig = get_imm_rates (
@@ -960,26 +902,13 @@ def get_pop_objs(
960902 )
961903 # If the population distribution was given, check it for consistency
962904 # with the fertility, mortality, and immigration rates
963- # if pop_dist is not None and not infer_pop:
964- # len_pop_dist = pop_dist.shape[0]
965- # pop_counter_2D = np.zeros((len_pop_dist, E + S))
966905 len_pop_dist = pop_2D .shape [0 ]
967906 pop_counter_2D = np .zeros ((len_pop_dist , E + S ))
968907 # set initial population distribution in the counterfactual to
969908 # the first year of the user provided distribution
970- # pop_counter_2D[0, :] = pop_dist[0, :]
971909 pop_counter_2D [0 , :] = pop_2D [0 , :]
972910 for t in range (1 , len_pop_dist ):
973911 # find newborns next period
974- # newborns = np.dot(fert_rates[t - 1, :], pop_counter_2D[t - 1, :])
975-
976- # pop_counter_2D[t, 0] = (
977- # 1 - infmort_rates[t - 1]
978- # ) * newborns + imm_rates[t - 1, 0] * pop_counter_2D[t - 1, 0]
979- # pop_counter_2D[t, 1:] = (
980- # pop_counter_2D[t - 1, :-1] * (1 - mort_rates[t - 1, :-1])
981- # + pop_counter_2D[t - 1, 1:] * imm_rates_orig[t - 1, 1:]
982- # )
983912 newborns = np .dot (fert_rates [t - 1 , :], pop_counter_2D [t - 1 , :])
984913
985914 pop_counter_2D [t , 0 ] = (
@@ -990,98 +919,8 @@ def get_pop_objs(
990919 + pop_counter_2D [t - 1 , 1 :] * imm_rates_orig [t - 1 , 1 :]
991920 )
992921 # Check that counterfactual pop dist is close to pop dist given
993- # assert np.allclose(pop_counter_2D, pop_dist)
994922 assert np .allclose (pop_counter_2D , pop_2D )
995923
996- # """"
997- # CHANGE - in OG-Core, we are implicitly assuming pre-TP rates of mortality,
998- # fertility, and immigration are the same as the period 0 rates.
999-
1000- # So let's just infer the pre-pop_dist from those.
1001- # """
1002- # pop1 = pop_2D[0, :]
1003- # fert0 = fert_rates[0, :]
1004- # mort0 = mort_rates[0, :]
1005- # infmort0 = infmort_rates[0]
1006- # imm0 = imm_rates_orig[0, :]
1007- # pre_pop_guess = pop1.copy()
1008-
1009- # # I can't solve this analytically, so set up a system of equation
1010- # # to solve
1011- # def pre_pop_solve(pre_pop_guess, pop1, fert0, mort0, infmort0, imm0):
1012- # pre_pop = pre_pop_guess
1013- # errors = np.zeros(E + S)
1014- # errors[0] = pop1[0] - (
1015- # (1 - infmort0) * (fert0 * pre_pop).sum() + imm0[0] * pre_pop[0]
1016- # )
1017- # errors[1:] = pop1[1:] - (
1018- # pre_pop[:-1] * (1 - mort0[:-1]) + pre_pop[1:] * imm0[1:]
1019- # )
1020- # # print("Max error = ", np.abs(errors).max())
1021- # return errors
1022-
1023- # opt_res = opt.root(
1024- # pre_pop_solve,
1025- # pre_pop_guess,
1026- # args=(pop1, fert0, mort0, infmort0, imm0),
1027- # method="lm",
1028- # )
1029- # pre_pop = opt_res.x
1030- # print(
1031- # "Success? ",
1032- # opt_res.success,
1033- # ", Max diff = ",
1034- # np.abs(opt_res.fun).max(),
1035- # )
1036- # pre_pop_EpS = pop_rebin(pre_pop, E + S)
1037-
1038- # # Check result
1039- # initial_pop_counter = np.zeros(E + S)
1040- # newborns = (fert_rates[0, :] * pre_pop[:]).sum()
1041- # initial_pop_counter[0] = (
1042- # 1 - infmort_rates[0]
1043- # ) * newborns + imm_rates_orig[0, 0] * pre_pop[0]
1044- # initial_pop_counter[1:] = (
1045- # pre_pop[:-1] * (1 - mort_rates[0, :-1])
1046- # + pre_pop[1:] * imm_rates_orig[0, 1:]
1047- # )
1048- # # Test that using pre pop get to pop in period 1
1049- # print("Max diff = ", np.abs(pop_2D[0, :] - initial_pop_counter).max())
1050- # # assert np.allclose(initial_pop_counter, pop_2D[0, :])
1051-
1052- """
1053- NEW CODE - use the actual UN historical data instead of solving backwards
1054- """
1055- # Get percentage distribution for S periods for pre-TP period
1056- # Use the actual UN historical data instead of solving backwards
1057- # pre_pop_EpS = pop_rebin(pre_pop, E + S) -- this assignment is
1058- # above on line 924, but keep for clarity
1059-
1060- # Check result
1061- # Verify that the UN pre-period data is reasonably consistent
1062- # with the period 0 population using the demographic transition equations
1063- initial_pop_counter = np .zeros (E + S )
1064- newborns = (fert_rates [0 , :] * pre_pop_EpS [:]).sum ()
1065- initial_pop_counter [0 ] = (
1066- 1 - infmort_rates [0 ]
1067- ) * newborns + imm_rates_orig [0 , 0 ] * pre_pop_EpS [0 ]
1068- initial_pop_counter [1 :] = (
1069- pre_pop_EpS [:- 1 ] * (1 - mort_rates [0 , :- 1 ])
1070- + pre_pop_EpS [1 :] * imm_rates_orig [0 , 1 :]
1071- )
1072-
1073- max_diff = np .abs (pop_2D [0 , :] - initial_pop_counter ).max ()
1074- print ("Pre-period population verification: Max diff = " , max_diff )
1075-
1076- if max_diff > 100_000 :
1077- print (
1078- "WARNING: Large difference between UN pre-period population "
1079- + "and period 0 population ({:.2f}). " .format (max_diff )
1080- + "This may indicate inconsistencies in the data or "
1081- + "immigration rate calculations, but using UN historical "
1082- + "data as it is more reliable than backward-solved estimates."
1083- )
1084-
1085924 # Create the transition matrix for the population distribution
1086925 # from T0 going forward (i.e., past when we have data on forecasts)
1087926 OMEGA_orig = np .zeros ((E + S , E + S ))
@@ -1115,7 +954,8 @@ def get_pop_objs(
1115954 # steady-state distribution by adjusting immigration rates, holding
1116955 # constant mortality, fertility, and SS growth rates
1117956 imm_tol = 1e-14
1118- fixper = int (1.5 * S + T0 )
957+ fixper = int (1.5 * S )
958+ assert fixper > T0 # ensure that we are fixing period after data
1119959 omega_SSfx = omega_path_lev [fixper , :] / omega_path_lev [fixper , :].sum ()
1120960 imm_objs = (
1121961 fert_rates [fixper , :],
@@ -1140,13 +980,10 @@ def get_pop_objs(
1140980 omega_path_S [fixper , :].reshape ((1 , S )), (T + S - fixper , 1 )
1141981 )
1142982 g_n_path = np .zeros (T + S )
1143- g_n_path [1 : ] = (
983+ g_n_path [: - 1 ] = (
1144984 omega_path_lev [1 :, - S :].sum (axis = 1 )
1145985 - omega_path_lev [:- 1 , - S :].sum (axis = 1 )
1146986 ) / omega_path_lev [:- 1 , - S :].sum (axis = 1 )
1147- g_n_path [0 ] = (
1148- omega_path_lev [0 , - S :].sum () - pre_pop_EpS [- S :].sum ()
1149- ) / pre_pop_EpS [- S :].sum ()
1150987 g_n_path [fixper + 1 :] = g_n_SS
1151988 imm_rates_mat = np .concatenate (
1152989 (
@@ -1158,10 +995,6 @@ def get_pop_objs(
1158995 ),
1159996 axis = 0 ,
1160997 )
1161- # compute pop objects for the year before the model start year
1162- # population distribution
1163- # TODO: can delete this and proobalby any "pre_pop" calculations above
1164- omega_S_preTP = pre_pop_EpS [- S :] / pre_pop_EpS [- S :].sum ()
1165998
1166999 if GraphDiag :
11671000 # Check whether original SS population distribution is close to
0 commit comments