Skip to content

Commit 3f6e4e6

Browse files
authored
Merge pull request #2 from ds2010/main
update to v0.3
2 parents c784b90 + bddbf42 commit 3f6e4e6

File tree

8 files changed

+231
-26
lines changed

8 files changed

+231
-26
lines changed

README.md

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Stochastic Frontier Analysis (SFA)
22

3-
43
## Installation
54

65
The [`pySFA`](https://pypi.org/project/pysfa/) package is now avaiable on PyPI and the latest development version can be installed from the Github repository [`pySFA`](https://github.com/gEAPA/pySFA). Please feel free to download and test it. We welcome any bug reports and feedback.
@@ -12,3 +11,40 @@ The [`pySFA`](https://pypi.org/project/pysfa/) package is now avaiable on PyPI a
1211
#### GitHub
1312

1413
pip install -U git+https://github.com/gEAPA/pySFA
14+
15+
16+
## Authors
17+
18+
- [Sheng Dai](https://daisheng.io), PhD, Turku School of Economics, University of Turku, Finland.
19+
- [Zhiqiang Liao](https://liaozhiqiang.com), Doctoral Researcher, Aalto University School of Business, Finland.
20+
21+
22+
## Demo: Estimating a production function by `pySFA`
23+
24+
```python
25+
import numpy as np
26+
import pandas as pd
27+
from pysfa import SFA
28+
from pysfa.dataset import load_Tim_Coelli_frontier
29+
30+
31+
# import the data from Tim Coelli Frontier 4.1
32+
df = load_Tim_Coelli_frontier(x_select=['labour', 'capital'],
33+
y_select=['output'])
34+
y = np.log(df.y)
35+
x = np.log(df.x)
36+
37+
# Estimate SFA model
38+
res = SFA.SFA(y, x, fun=SFA.FUN_PROD, lamda0=1, method=SFA.TE_teJ)
39+
40+
# print estimates
41+
print(res.get_beta())
42+
print(res.get_lambda())
43+
print(res.get_sigma2())
44+
print(res.get_sigmau2())
45+
print(res.get_sigmav2())
46+
47+
# print TE
48+
print(res.get_technical_efficiency())
49+
```
50+

pysfa/SFA.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,20 @@ def __init__(self, y, x, fun=FUN_PROD, lamda0=1, method=TE_teJ):
1919
x (float) of shape (n, d): input variables.
2020
fun (String, optional): FUN_PROD (production frontier) or FUN_COST (cost frontier). Defaults to FUN_PROD.
2121
"""
22-
self.y, self.x= tools.assert_valid_basic_data(y, x, fun)
23-
22+
self.y, self.x = tools.assert_valid_basic_data(y, x, fun)
2423
self.fun, self.lamda0, self.method = fun, lamda0, method
2524

2625
def __mle(self):
2726

2827
# initial OLS regression
2928
reg = LinearRegression().fit(X=self.x, y=self.y)
3029
beta0 = np.concatenate(([reg.intercept_], reg.coef_), axis=0)
31-
print(beta0)
3230
parm = np.concatenate((beta0, [self.lamda0]), axis=0)
3331

3432
# Maximum Likelihood Estimation
3533
def __loglik(parm):
3634
''' Log-likelihood function'''
37-
N, K = len(self.x[0]), len(self.x[1]) + 1
35+
N, K = len(self.x), len(self.x[0]) + 1
3836
beta0, lamda0 = parm[0:K], parm[K]
3937
e = self.__resfun(beta0)
4038
s = np.sum(e**2)/N
@@ -45,7 +43,7 @@ def __loglik(parm):
4543
fit = opt.minimize(__loglik, parm, method='BFGS').x
4644

4745
# beta, residuals, lambda, sigma^2
48-
K = len(self.x[1]) + 1
46+
K = len(self.x[0]) + 1
4947
self.beta = fit[0:K]
5048
self.residuals = self.__resfun(self.beta)
5149
self.lamda = fit[K]
@@ -62,7 +60,10 @@ def __teJ(self):
6260
'''Efficiencies estimates using the conditional mean approach
6361
Jondrow et al. (1982, 235)'''
6462

65-
self.sign = 1
63+
if self.fun == FUN_COST:
64+
self.sign == -1
65+
else:
66+
self.sign = 1
6667
self.ustar = - self.sign * self.residuals * \
6768
self.lamda**2/(1+self.lamda**2)
6869
self.sstar = self.lamda/(1+self.lamda**2)*sqrt(self.sigma2)
@@ -73,7 +74,10 @@ def __te(self):
7374
'''Efficiencies estimated by minimizing the mean square error;
7475
Eq. (7.21) in Bogetoft and Otto (2011, 219) and Battese and Coelli (1988, 392)'''
7576

76-
self.sign = 1
77+
if self.fun == FUN_COST:
78+
self.sign == -1
79+
else:
80+
self.sign = 1
7781
self.ustar = - self.sign * self.residuals * \
7882
self.lamda**2/(1+self.lamda**2)
7983
self.sstar = self.lamda/(1+self.lamda**2)*sqrt(self.sigma2)
@@ -85,7 +89,10 @@ def __teMod(self):
8589
'''Efficiencies estimates using the conditional mode approach;
8690
Bogetoft and Otto (2011, 219), Jondrow et al. (1982, 235)'''
8791

88-
self.sign = 1
92+
if self.fun == FUN_COST:
93+
self.sign == -1
94+
else:
95+
self.sign = 1
8996
self.ustar = - self.sign * self.residuals * \
9097
self.lamda**2/(1+self.lamda**2)
9198
return np.exp(np.minimum(0, -self.ustar))
@@ -105,7 +112,7 @@ def get_technical_efficiency(self):
105112
elif self.method == TE_teMod:
106113
return self.__teMod()
107114
else:
108-
raise ValueError("Undefined estimation technique.")
115+
raise ValueError("Undefined decomposition technique.")
109116

110117
def get_beta(self):
111118
'''Return the estimated coefficients'''

pysfa/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
from . import constant
2+
from . import dataset
13
from . import SFA
24

35
__all__ = [
6+
'constant',
7+
'dataset',
48
'SFA',
59
]

pysfa/constant.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,38 @@
1-
# Frontier
1+
# function
22
FUN_PROD = "prod"
33
"""
4-
FUN_PROD: Production frontier.
4+
FUN_PROD: Production function.
55
"""
66

77
FUN_COST = "cost"
88
"""
9-
FUN_COST: Cost frontier.
9+
FUN_COST: Cost function.
1010
"""
1111

1212
FUN_Categories = {
13-
FUN_PROD: "Production frontier",
14-
FUN_COST: "Cost frontier"
13+
FUN_PROD: "Production function",
14+
FUN_COST: "Cost function"
1515
}
1616

1717

1818
# Technical inefficiency
1919
TE_teJ = "teJ"
2020
"""
21-
RED_MOM: Method of moments.
21+
TE_teJ: Using conditional mean approach.
2222
"""
2323

2424
TE_te = "te"
2525
"""
26-
RED_QLE: Quassi-likelihood estimation.
26+
TE_te: Minimizing the mean square error.
2727
"""
2828

2929
TE_teMod = "teMod"
3030
"""
31-
RED_KDE: Kernel deconvolution estimation.
31+
TE_teMod: Using conditional mode approach.
3232
"""
3333

3434
RED_Categories = {
35-
TE_teJ: "Method of moments",
36-
TE_te: "Quassi-likelihood estimation",
37-
TE_teMod: "Kernel deconvolution estimation"
35+
TE_teJ: "Conditional mean",
36+
TE_te: "Mean square error",
37+
TE_teMod: "Conditional mode"
3838
}
39-

pysfa/data/electricityFirms.csv

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
OPEX,CAPEX,TOTEX,Energy,Length,Customers,PerUndGr
2+
681,729,1612,75,878,4933,0.11
3+
559,673,1659,62,964,6149,0.21
4+
836,851,1708,78,676,6098,0.75
5+
7559,8384,18918,683,12522,55226,0.13
6+
424,562,1167,27,697,1670,0.03
7+
1483,1587,3395,295,953,22949,0.65
8+
658,570,1333,44,917,3599,0.11
9+
1433,1311,3518,171,1580,11081,0.16
10+
850,564,1415,98,116,377,1
11+
1155,1108,2469,203,740,10134,0.64
12+
14235,11594,28750,2203,7007,167239,0.61
13+
44481,50321,117554,6600,67611,420473,0.23
14+
1116,766,1925,117,436,7176,0.61
15+
1604,946,2747,135,902,8614,0.46
16+
27723,19818,48605,3601,6007,334757,0.92
17+
2480,2420,5486,409,2773,14953,0.19
18+
494,476,1091,43,506,3156,0.32
19+
801,466,1297,61,541,4296,0.05
20+
875,555,1691,62,1081,6044,0.07
21+
2133,1913,4605,256,2540,23361,0.31
22+
1139,1635,3102,197,1817,6071,0.05
23+
907,1127,2260,200,1106,14936,0.49
24+
120,106,341,17,133,772,0.06
25+
3454,2428,6100,489,1312,44594,0.87
26+
535,479,1440,53,789,3391,0.05
27+
974,754,1958,95,971,6806,0.37
28+
929,853,1976,75,869,5165,0.24
29+
9842,13925,29722,985,25611,95367,0.09
30+
548,412,1254,123,51,24,0.44
31+
1456,1136,2665,165,875,14646,0.71
32+
725,569,1376,73,716,5069,0.39
33+
2525,388,3121,540,70,58,0.31
34+
2002,1442,3864,300,1301,20325,0.47
35+
1846,1112,3221,207,429,16878,0.73
36+
982,1094,2561,99,1618,8566,0.2
37+
2727,2151,5779,164,3330,12231,0.12
38+
1799,2073,4380,171,3736,15217,0.06
39+
604,675,1423,73,989,5711,0.25
40+
400,430,907,40,646,2968,0.2
41+
4092,3173,7915,482,3294,42952,0.4
42+
3362,3078,6639,456,1375,48140,0.66
43+
390,438,868,23,589,2227,0.18
44+
10852,9366,25556,1233,12512,98650,0.13
45+
688,700,1540,85,866,6022,0.36
46+
761,701,1564,100,800,7193,0.4
47+
453,576,1229,25,1078,3342,0.04
48+
4076,4007,9807,494,4696,43911,0.29
49+
308,297,669,17,432,1752,0.03
50+
2746,2529,6097,315,4042,26265,0.2
51+
5614,5509,12154,1042,4296,75870,0.6
52+
400,519,1186,39,614,2211,0.01
53+
1821,1753,4020,223,2117,12945,0.09
54+
794,747,1589,98,418,5146,0.66
55+
2269,2795,6414,348,2127,21072,0.37
56+
711,556,1515,77,762,4513,0.16
57+
4609,5342,10600,993,3205,80702,0.7
58+
1766,2338,5431,402,3207,25994,0.13
59+
813,666,1872,130,905,5394,0.19
60+
884,1104,2206,138,1423,9015,0.26
61+
1662,1358,3767,117,2532,9930,0.24
62+
81,106,268,22,133,1467,0.22
63+
11776,11864,28295,988,20934,84445,0.04
64+
4021,3767,9689,749,3225,47572,0.53
65+
2597,3224,7226,378,3567,30801,0.21
66+
995,848,1871,95,340,7812,0.89
67+
548,587,1280,43,977,4272,0.02
68+
1573,1780,3539,237,882,19455,0.52
69+
4129,4001,9853,440,6330,26798,0.22
70+
2151,1450,3758,266,772,21662,0.83
71+
2438,2496,5499,316,4117,22313,0.28
72+
14064,15175,37368,1601,24485,106336,0.08
73+
2058,1521,3735,268,928,19899,0.74
74+
8643,6819,16141,1654,3567,124661,0.59
75+
483,367,987,37,730,2611,0.08
76+
1018,939,2067,158,822,10537,0.56
77+
1593,2326,5105,196,3470,13391,0.09
78+
7501,4734,12687,1141,2360,67456,0.62
79+
305,411,861,19,520,1207,0.17
80+
5426,6446,12831,787,5808,60239,0.41
81+
2618,2795,6055,293,3741,23446,0.2
82+
1033,951,2156,137,902,11654,0.39
83+
6786,6638,13794,1281,3009,93769,0.75
84+
2169,2172,5054,210,3693,17129,0.16
85+
40787,45434,108310,4825,60659,378089,0.18
86+
2741,2475,6162,310,3381,19059,0.16
87+
307,225,594,28,351,2078,0.07
88+
321,281,672,30,338,2008,0.32
89+
300,289,616,15,318,1364,0.01
90+
891,693,1776,105,575,9084,0.59

pysfa/dataset.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import pandas as pd
2+
import numpy as np
3+
import os
4+
5+
file_path = os.path.dirname(__file__)
6+
7+
8+
class production_data:
9+
"""Example datasets provided by the pySFA
10+
"""
11+
12+
def __init__(self, dmu, x, y, b=None, z=None):
13+
"""General data structure
14+
15+
Args:
16+
dmu (String): decision making unit.
17+
x (Numbers): input variables.
18+
y (Numbers): output variables.
19+
b (Numbers, optional): bad output variables. Defaults to None.
20+
z (Numbers, optional): contextual variables. Defaults to None.
21+
"""
22+
self.decision_making_unit = dmu
23+
self.x, self.y, self.b, self.z = x, y, b, z
24+
25+
26+
def load_Finnish_electricity_firm(x_select=['Energy', 'Length', 'Customers'], y_select=['OPEX', 'CAPEX', 'TOTEX'], z_select=['PerUndGr']):
27+
"""Loading Finnish electricity firm data
28+
29+
Args:
30+
x_select (list, optional): input variables. Defaults to ['Energy', 'Length', 'Customers'].
31+
y_select (list, optional): output variable. Defaults to ['OPEX', 'CAPEX', 'TOTEX'].
32+
z_select (list, optional): contextual variable. Defaults to ['PerUndGr'].
33+
34+
Returns:
35+
Numbers: selected input-output
36+
"""
37+
dataframe = pd.read_csv(
38+
file_path+"/data/electricityFirms.csv")
39+
dmu = np.asanyarray(dataframe.index.tolist()).T
40+
x = np.column_stack(
41+
[np.asanyarray(dataframe[selected]).T for selected in x_select])
42+
y = np.column_stack(
43+
[np.asanyarray(dataframe[selected]).T for selected in y_select])
44+
if z_select != None:
45+
z = np.column_stack(
46+
[np.asanyarray(dataframe[selected]).T for selected in z_select])
47+
return production_data(dmu, x, y, z=z)
48+
49+
50+
def load_Tim_Coelli_frontier(x_select=['capital', 'labour'], y_select=['output']):
51+
"""Loading Tim Coelli 4.1 data
52+
53+
Args:
54+
x_select (list, optional): input variables. Defaults to ['capital', 'labour'].
55+
y_select (list, optional): output variable. Defaults to ['output'].
56+
57+
Returns:
58+
Numbers: selected input-output
59+
"""
60+
dataframe = pd.read_csv(
61+
file_path+"/data/41Firm.csv")
62+
dmu = np.asanyarray(dataframe['firm']).T
63+
x = np.column_stack(
64+
[np.asanyarray(dataframe[selected]).T for selected in x_select])
65+
y = np.column_stack(
66+
[np.asanyarray(dataframe[selected]).T for selected in y_select])
67+
return production_data(dmu, x, y)

pysfa/utils/tools.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@ def assert_valid_basic_data(y, x, fun):
2525

2626
if len(y_shape) == 2 and y_shape[1] != 1:
2727
raise ValueError(
28-
"The multidimensional output data is supported by direciontal based models.")
28+
"The output must be one dimensional array.")
2929

3030
if y_shape[0] != x_shape[0]:
3131
raise ValueError(
3232
"x and y must have the same length.")
3333

3434
return y, x
3535

36+
3637
def trans_list(li):
3738
if type(li) == list:
3839
return li
@@ -56,4 +57,4 @@ def to_2d_list(li):
5657
for value in li:
5758
rl.append([value])
5859
return rl
59-
return li
60+
return li

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55

66
setup_args = dict(
77
name='pysfa',
8-
version='0.2',
8+
version='0.3',
99
description='A Python Package for Stochastic Frontier Analysis',
1010
long_description_content_type="text/markdown",
1111
long_description=README,
1212
license='MIT',
1313
packages=find_packages(),
14-
author='Sheng Dai',
14+
author='Sheng Dai, Zhiqiang Liao',
1515
author_email='[email protected]',
1616
keywords=['SFA', 'MLE', 'TE'],
1717
url='https://github.com/gEAPA/pySFA',
@@ -30,6 +30,7 @@
3030

3131
install_requires = [
3232
'numpy>=1.19.2',
33+
'pandas>=1.1.3',
3334
'scipy>=1.5.2',
3435
'scikit-learn>=1.2.2',
3536
]

0 commit comments

Comments
 (0)