Skip to content

Commit 891f170

Browse files
committed
fix fixper, remove pre_pop references
1 parent 5c27e96 commit 891f170

File tree

2 files changed

+24
-199
lines changed

2 files changed

+24
-199
lines changed

ogcore/demographics.py

Lines changed: 7 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,6 @@ def get_pop(
339339
infmort_rates=None,
340340
imm_rates=None,
341341
initial_pop=None,
342-
pre_pop_dist=None,
343342
country_id=UN_COUNTRY_CODE,
344343
start_year=START_YEAR,
345344
end_year=END_YEAR,
@@ -368,42 +367,18 @@ def get_pop(
368367
and model age
369368
initial_pop_data (Pandas DataFrame): initial population data
370369
for the first year of model calibration (start_year)
371-
pre_pop_dist (Numpy array): population distribution for the year
372-
before the initial year for calibration
373370
country_id (str): country id for UN data
374371
start_year (int): start year data
375372
end_year (int): end year for data
376373
download_path (str): path to save population distribution data
377374
378375
Returns:
379376
pop_2D (Numpy array): population distribution over T0 periods
380-
pre_pop (Numpy array): population distribution one year before
381-
initial year for calibration of omega_S_preTP
382377
"""
383378
# Generate time path of the nonstationary population distribution
384379
# Get path up to end of data year
385380
pop_2D = np.zeros((end_year + 2 - start_year, E + S))
386381
if infer_pop:
387-
if pre_pop_dist is None:
388-
pre_pop_data = get_un_data(
389-
"47",
390-
country_id=country_id,
391-
start_year=start_year,
392-
end_year=start_year,
393-
)
394-
if download_path:
395-
pre_pop_data.to_csv(
396-
os.path.join(download_path, "raw_pre_pop_data_UN.csv"),
397-
index=False,
398-
)
399-
pre_pop_sample = pre_pop_data[
400-
(pre_pop_data["age"] >= min_age)
401-
& (pre_pop_data["age"] <= max_age)
402-
]
403-
pre_pop = pre_pop_sample.value.values
404-
pre_pop_dist = pop_rebin(pre_pop, E + S)
405-
else:
406-
pre_pop = pre_pop_dist
407382
if initial_pop is None:
408383
initial_pop_data = get_un_data(
409384
"47",
@@ -458,38 +433,17 @@ def get_pop(
458433
pop = pop_data_sample.value.values
459434
# Generate the current population distribution given that E+S might
460435
# be less than max_age-min_age+1
461-
# age_per_EpS = np.arange(1, E + S + 1)
462436
pop_EpS = pop_rebin(pop, E + S)
463437
pop_2D[y - start_year, :] = pop_EpS
464438

465-
# get population distribution one year before initial year for
466-
# calibration of omega_S_preTP
467-
pre_pop_data = get_un_data(
468-
"47",
469-
country_id=country_id,
470-
start_year=start_year - 1,
471-
end_year=start_year - 1,
472-
)
473-
pre_pop_sample = pre_pop_data[
474-
(pre_pop_data["age"] >= min_age) & (pre_pop_data["age"] <= max_age)
475-
]
476-
pre_pop = pre_pop_sample.value.values
477-
478439
if download_path:
479440
np.savetxt(
480441
os.path.join(download_path, "population_distribution.csv"),
481442
pop_2D,
482443
delimiter=",",
483444
)
484-
np.savetxt(
485-
os.path.join(
486-
download_path, "pre_period_population_distribution.csv"
487-
),
488-
pre_pop,
489-
delimiter=",",
490-
)
491445

492-
return pop_2D, pre_pop
446+
return pop_2D
493447

494448

495449
def pop_rebin(curr_pop_dist, totpers_new):
@@ -715,7 +669,6 @@ def get_pop_objs(
715669
imm_rates=None,
716670
infer_pop=False,
717671
pop_dist=None,
718-
pre_pop_dist=None,
719672
country_id=UN_COUNTRY_CODE,
720673
initial_data_year=START_YEAR - 1,
721674
final_data_year=START_YEAR + 2,
@@ -746,9 +699,6 @@ def get_pop_objs(
746699
infer_pop (bool): =True if want to infer the population
747700
pop_dist (array_like): user provided population distribution,
748701
dimensions are T0+1 x E+S
749-
pre_pop_dist (array_like): user provided population distribution
750-
for the year before the initial year for calibration,
751-
length E+S
752702
country_id (str): country id for UN data
753703
initial_data_year (int): initial year of data to use
754704
(not relevant if have user provided data)
@@ -880,7 +830,7 @@ def get_pop_objs(
880830
initial_pop = pop_dist[0, :].reshape(1, pop_dist.shape[-1])
881831
else:
882832
initial_pop = None
883-
pop_2D, pre_pop = get_pop(
833+
pop_2D = get_pop(
884834
E,
885835
S,
886836
min_age,
@@ -891,14 +841,13 @@ def get_pop_objs(
891841
infmort_rates,
892842
imm_rates,
893843
initial_pop,
894-
pre_pop_dist,
895844
country_id,
896-
initial_data_year, # TODO: should this be start_data_year?
845+
initial_data_year,
897846
final_data_year,
898847
download_path=download_path,
899848
)
900849
else:
901-
pop_2D, pre_pop = get_pop(
850+
pop_2D = get_pop(
902851
E,
903852
S,
904853
min_age,
@@ -910,21 +859,14 @@ def get_pop_objs(
910859
)
911860
else:
912861
# Check first dims of pop_dist as input by user
913-
print("T0 = ", T0)
914862
assert pop_dist.shape[0] == T0 + 1 # population needs to be
915863
# one year longer in order to find immigration rates
916864
assert pop_dist.shape[-1] == E + S
917-
# Check that pre_pop specified
918-
assert pre_pop_dist is not None
919-
assert pre_pop_dist.shape[0] == pop_dist.shape[1]
920-
pre_pop = pre_pop_dist
921865
# Create 2D array of population distribution
922866
pop_2D = np.zeros((T0 + 1, E + S))
923867
for t in range(T0 + 1):
924868
pop_EpS = pop_rebin(pop_dist[t, :], E + S)
925869
pop_2D[t, :] = pop_EpS
926-
# Get percentage distribution for S periods for pre-TP period
927-
pre_pop_EpS = pop_rebin(pre_pop, E + S)
928870
# Get immigration rates if not provided
929871
if imm_rates is None:
930872
imm_rates_orig = get_imm_rates(
@@ -960,26 +902,13 @@ def get_pop_objs(
960902
)
961903
# If the population distribution was given, check it for consistency
962904
# with the fertility, mortality, and immigration rates
963-
# if pop_dist is not None and not infer_pop:
964-
# len_pop_dist = pop_dist.shape[0]
965-
# pop_counter_2D = np.zeros((len_pop_dist, E + S))
966905
len_pop_dist = pop_2D.shape[0]
967906
pop_counter_2D = np.zeros((len_pop_dist, E + S))
968907
# set initial population distribution in the counterfactual to
969908
# the first year of the user provided distribution
970-
# pop_counter_2D[0, :] = pop_dist[0, :]
971909
pop_counter_2D[0, :] = pop_2D[0, :]
972910
for t in range(1, len_pop_dist):
973911
# find newborns next period
974-
# newborns = np.dot(fert_rates[t - 1, :], pop_counter_2D[t - 1, :])
975-
976-
# pop_counter_2D[t, 0] = (
977-
# 1 - infmort_rates[t - 1]
978-
# ) * newborns + imm_rates[t - 1, 0] * pop_counter_2D[t - 1, 0]
979-
# pop_counter_2D[t, 1:] = (
980-
# pop_counter_2D[t - 1, :-1] * (1 - mort_rates[t - 1, :-1])
981-
# + pop_counter_2D[t - 1, 1:] * imm_rates_orig[t - 1, 1:]
982-
# )
983912
newborns = np.dot(fert_rates[t - 1, :], pop_counter_2D[t - 1, :])
984913

985914
pop_counter_2D[t, 0] = (
@@ -990,98 +919,8 @@ def get_pop_objs(
990919
+ pop_counter_2D[t - 1, 1:] * imm_rates_orig[t - 1, 1:]
991920
)
992921
# Check that counterfactual pop dist is close to pop dist given
993-
# assert np.allclose(pop_counter_2D, pop_dist)
994922
assert np.allclose(pop_counter_2D, pop_2D)
995923

996-
# """"
997-
# CHANGE - in OG-Core, we are implicitly assuming pre-TP rates of mortality,
998-
# fertility, and immigration are the same as the period 0 rates.
999-
1000-
# So let's just infer the pre-pop_dist from those.
1001-
# """
1002-
# pop1 = pop_2D[0, :]
1003-
# fert0 = fert_rates[0, :]
1004-
# mort0 = mort_rates[0, :]
1005-
# infmort0 = infmort_rates[0]
1006-
# imm0 = imm_rates_orig[0, :]
1007-
# pre_pop_guess = pop1.copy()
1008-
1009-
# # I can't solve this analytically, so set up a system of equation
1010-
# # to solve
1011-
# def pre_pop_solve(pre_pop_guess, pop1, fert0, mort0, infmort0, imm0):
1012-
# pre_pop = pre_pop_guess
1013-
# errors = np.zeros(E + S)
1014-
# errors[0] = pop1[0] - (
1015-
# (1 - infmort0) * (fert0 * pre_pop).sum() + imm0[0] * pre_pop[0]
1016-
# )
1017-
# errors[1:] = pop1[1:] - (
1018-
# pre_pop[:-1] * (1 - mort0[:-1]) + pre_pop[1:] * imm0[1:]
1019-
# )
1020-
# # print("Max error = ", np.abs(errors).max())
1021-
# return errors
1022-
1023-
# opt_res = opt.root(
1024-
# pre_pop_solve,
1025-
# pre_pop_guess,
1026-
# args=(pop1, fert0, mort0, infmort0, imm0),
1027-
# method="lm",
1028-
# )
1029-
# pre_pop = opt_res.x
1030-
# print(
1031-
# "Success? ",
1032-
# opt_res.success,
1033-
# ", Max diff = ",
1034-
# np.abs(opt_res.fun).max(),
1035-
# )
1036-
# pre_pop_EpS = pop_rebin(pre_pop, E + S)
1037-
1038-
# # Check result
1039-
# initial_pop_counter = np.zeros(E + S)
1040-
# newborns = (fert_rates[0, :] * pre_pop[:]).sum()
1041-
# initial_pop_counter[0] = (
1042-
# 1 - infmort_rates[0]
1043-
# ) * newborns + imm_rates_orig[0, 0] * pre_pop[0]
1044-
# initial_pop_counter[1:] = (
1045-
# pre_pop[:-1] * (1 - mort_rates[0, :-1])
1046-
# + pre_pop[1:] * imm_rates_orig[0, 1:]
1047-
# )
1048-
# # Test that using pre pop get to pop in period 1
1049-
# print("Max diff = ", np.abs(pop_2D[0, :] - initial_pop_counter).max())
1050-
# # assert np.allclose(initial_pop_counter, pop_2D[0, :])
1051-
1052-
"""
1053-
NEW CODE - use the actual UN historical data instead of solving backwards
1054-
"""
1055-
# Get percentage distribution for S periods for pre-TP period
1056-
# Use the actual UN historical data instead of solving backwards
1057-
# pre_pop_EpS = pop_rebin(pre_pop, E + S) -- this assignment is
1058-
# above on line 924, but keep for clarity
1059-
1060-
# Check result
1061-
# Verify that the UN pre-period data is reasonably consistent
1062-
# with the period 0 population using the demographic transition equations
1063-
initial_pop_counter = np.zeros(E + S)
1064-
newborns = (fert_rates[0, :] * pre_pop_EpS[:]).sum()
1065-
initial_pop_counter[0] = (
1066-
1 - infmort_rates[0]
1067-
) * newborns + imm_rates_orig[0, 0] * pre_pop_EpS[0]
1068-
initial_pop_counter[1:] = (
1069-
pre_pop_EpS[:-1] * (1 - mort_rates[0, :-1])
1070-
+ pre_pop_EpS[1:] * imm_rates_orig[0, 1:]
1071-
)
1072-
1073-
max_diff = np.abs(pop_2D[0, :] - initial_pop_counter).max()
1074-
print("Pre-period population verification: Max diff = ", max_diff)
1075-
1076-
if max_diff > 100_000:
1077-
print(
1078-
"WARNING: Large difference between UN pre-period population "
1079-
+ "and period 0 population ({:.2f}). ".format(max_diff)
1080-
+ "This may indicate inconsistencies in the data or "
1081-
+ "immigration rate calculations, but using UN historical "
1082-
+ "data as it is more reliable than backward-solved estimates."
1083-
)
1084-
1085924
# Create the transition matrix for the population distribution
1086925
# from T0 going forward (i.e., past when we have data on forecasts)
1087926
OMEGA_orig = np.zeros((E + S, E + S))
@@ -1115,7 +954,8 @@ def get_pop_objs(
1115954
# steady-state distribution by adjusting immigration rates, holding
1116955
# constant mortality, fertility, and SS growth rates
1117956
imm_tol = 1e-14
1118-
fixper = int(1.5 * S + T0)
957+
fixper = int(1.5 * S)
958+
assert fixper > T0 # ensure that we are fixing period after data
1119959
omega_SSfx = omega_path_lev[fixper, :] / omega_path_lev[fixper, :].sum()
1120960
imm_objs = (
1121961
fert_rates[fixper, :],
@@ -1140,13 +980,10 @@ def get_pop_objs(
1140980
omega_path_S[fixper, :].reshape((1, S)), (T + S - fixper, 1)
1141981
)
1142982
g_n_path = np.zeros(T + S)
1143-
g_n_path[1:] = (
983+
g_n_path[:-1] = (
1144984
omega_path_lev[1:, -S:].sum(axis=1)
1145985
- omega_path_lev[:-1, -S:].sum(axis=1)
1146986
) / omega_path_lev[:-1, -S:].sum(axis=1)
1147-
g_n_path[0] = (
1148-
omega_path_lev[0, -S:].sum() - pre_pop_EpS[-S:].sum()
1149-
) / pre_pop_EpS[-S:].sum()
1150987
g_n_path[fixper + 1 :] = g_n_SS
1151988
imm_rates_mat = np.concatenate(
1152989
(
@@ -1158,10 +995,6 @@ def get_pop_objs(
1158995
),
1159996
axis=0,
1160997
)
1161-
# compute pop objects for the year before the model start year
1162-
# population distribution
1163-
# TODO: can delete this and proobalby any "pre_pop" calculations above
1164-
omega_S_preTP = pre_pop_EpS[-S:] / pre_pop_EpS[-S:].sum()
1165998

1166999
if GraphDiag:
11671000
# Check whether original SS population distribution is close to

0 commit comments

Comments
 (0)