Merge pull request #2 from ds2010/main

ds2010 · web-flow · commit 3f6e4e6026f3 · 2023-05-09T20:00:03.000+03:00
update to v0.3
diff --git a/README.md b/README.md
@@ -1,6 +1,5 @@
 # Stochastic Frontier Analysis (SFA)
 
-
 ## Installation
 
 The [`pySFA`](https://pypi.org/project/pysfa/) package is now avaiable on PyPI and the latest development version can be installed from the Github repository [`pySFA`](https://github.com/gEAPA/pySFA). Please feel free to download and test it. We welcome any bug reports and feedback.
@@ -12,3 +11,40 @@ The [`pySFA`](https://pypi.org/project/pysfa/) package is now avaiable on PyPI a
 #### GitHub
 
     pip install -U git+https://github.com/gEAPA/pySFA
+
+
+## Authors
+
+- [Sheng Dai](https://daisheng.io), PhD, Turku School of Economics, University of Turku, Finland.
+- [Zhiqiang Liao](https://liaozhiqiang.com), Doctoral Researcher, Aalto University School of Business, Finland.
+
+
+## Demo: Estimating a production function by `pySFA`
+
+```python
+import numpy as np
+import pandas as pd
+from pysfa import SFA
+from pysfa.dataset import load_Tim_Coelli_frontier
+
+
+# import the data from Tim Coelli Frontier 4.1
+df = load_Tim_Coelli_frontier(x_select=['labour', 'capital'],
+                              y_select=['output'])
+y = np.log(df.y)
+x = np.log(df.x)
+
+# Estimate SFA model
+res = SFA.SFA(y, x, fun=SFA.FUN_PROD, lamda0=1, method=SFA.TE_teJ)
+
+# print estimates
+print(res.get_beta())
+print(res.get_lambda())
+print(res.get_sigma2())
+print(res.get_sigmau2())
+print(res.get_sigmav2())
+
+# print TE
+print(res.get_technical_efficiency())
+```
+
diff --git a/pysfa/SFA.py b/pysfa/SFA.py
@@ -19,22 +19,20 @@ def __init__(self, y, x, fun=FUN_PROD, lamda0=1, method=TE_teJ):
               x (float) of shape (n, d): input variables.
               fun (String, optional): FUN_PROD (production frontier) or FUN_COST (cost frontier). Defaults to FUN_PROD.
           """
-        self.y, self.x= tools.assert_valid_basic_data(y, x, fun)
-
+        self.y, self.x = tools.assert_valid_basic_data(y, x, fun)
         self.fun, self.lamda0, self.method = fun, lamda0, method
 
     def __mle(self):
 
         # initial OLS regression
         reg = LinearRegression().fit(X=self.x, y=self.y)
         beta0 = np.concatenate(([reg.intercept_], reg.coef_), axis=0)
-        print(beta0)
         parm = np.concatenate((beta0, [self.lamda0]), axis=0)
 
         # Maximum Likelihood Estimation
         def __loglik(parm):
             ''' Log-likelihood function'''
-            N, K = len(self.x[0]), len(self.x[1]) + 1
+            N, K = len(self.x), len(self.x[0]) + 1
             beta0, lamda0 = parm[0:K], parm[K]
             e = self.__resfun(beta0)
             s = np.sum(e**2)/N
@@ -45,7 +43,7 @@ def __loglik(parm):
         fit = opt.minimize(__loglik, parm, method='BFGS').x
 
         # beta, residuals, lambda, sigma^2
-        K = len(self.x[1]) + 1
+        K = len(self.x[0]) + 1
         self.beta = fit[0:K]
         self.residuals = self.__resfun(self.beta)
         self.lamda = fit[K]
@@ -62,7 +60,10 @@ def __teJ(self):
         '''Efficiencies estimates using the conditional mean approach 
             Jondrow et al. (1982, 235)'''
 
-        self.sign = 1
+        if self.fun == FUN_COST:
+            self.sign == -1
+        else:
+            self.sign = 1
         self.ustar = - self.sign * self.residuals * \
             self.lamda**2/(1+self.lamda**2)
         self.sstar = self.lamda/(1+self.lamda**2)*sqrt(self.sigma2)
@@ -73,7 +74,10 @@ def __te(self):
         '''Efficiencies estimated by minimizing the mean square error; 
             Eq. (7.21) in Bogetoft and Otto (2011, 219) and Battese and Coelli (1988, 392)'''
 
-        self.sign = 1
+        if self.fun == FUN_COST:
+            self.sign == -1
+        else:
+            self.sign = 1
         self.ustar = - self.sign * self.residuals * \
             self.lamda**2/(1+self.lamda**2)
         self.sstar = self.lamda/(1+self.lamda**2)*sqrt(self.sigma2)
@@ -85,7 +89,10 @@ def __teMod(self):
         '''Efficiencies estimates using the conditional mode approach;
             Bogetoft and Otto (2011, 219), Jondrow et al. (1982, 235)'''
 
-        self.sign = 1
+        if self.fun == FUN_COST:
+            self.sign == -1
+        else:
+            self.sign = 1
         self.ustar = - self.sign * self.residuals * \
             self.lamda**2/(1+self.lamda**2)
         return np.exp(np.minimum(0, -self.ustar))
@@ -105,7 +112,7 @@ def get_technical_efficiency(self):
         elif self.method == TE_teMod:
             return self.__teMod()
         else:
-            raise ValueError("Undefined estimation technique.")
+            raise ValueError("Undefined decomposition technique.")
 
     def get_beta(self):
         '''Return the estimated coefficients'''
diff --git a/pysfa/__init__.py b/pysfa/__init__.py
@@ -1,5 +1,9 @@
+from . import constant
+from . import dataset
 from . import SFA
 
 __all__ = [
+    'constant',
+    'dataset',
     'SFA',
 ]
diff --git a/pysfa/constant.py b/pysfa/constant.py
@@ -1,39 +1,38 @@
-# Frontier
+# function
 FUN_PROD = "prod"
 """
-FUN_PROD: Production frontier.
+FUN_PROD: Production function.
 """
 
 FUN_COST = "cost"
 """
-FUN_COST: Cost frontier.
+FUN_COST: Cost function.
 """
 
 FUN_Categories = {
-    FUN_PROD: "Production frontier",
-    FUN_COST: "Cost frontier"
+    FUN_PROD: "Production function",
+    FUN_COST: "Cost function"
 }
 
 
 # Technical inefficiency
 TE_teJ = "teJ"
 """
-RED_MOM: Method of moments.
+TE_teJ: Using conditional mean approach.
 """
 
 TE_te = "te"
 """
-RED_QLE: Quassi-likelihood estimation.
+TE_te: Minimizing the mean square error.
 """
 
 TE_teMod = "teMod"
 """
-RED_KDE: Kernel deconvolution estimation.
+TE_teMod: Using conditional mode approach.
 """
 
 RED_Categories = {
-    TE_teJ: "Method of moments",
-    TE_te: "Quassi-likelihood estimation",
-    TE_teMod: "Kernel deconvolution estimation"
+    TE_teJ: "Conditional mean",
+    TE_te: "Mean square error",
+    TE_teMod: "Conditional mode"
 }
-
diff --git a/pysfa/data/electricityFirms.csv b/pysfa/data/electricityFirms.csv
@@ -0,0 +1,90 @@
+OPEX,CAPEX,TOTEX,Energy,Length,Customers,PerUndGr
+681,729,1612,75,878,4933,0.11
+559,673,1659,62,964,6149,0.21
+836,851,1708,78,676,6098,0.75
+7559,8384,18918,683,12522,55226,0.13
+424,562,1167,27,697,1670,0.03
+1483,1587,3395,295,953,22949,0.65
+658,570,1333,44,917,3599,0.11
+1433,1311,3518,171,1580,11081,0.16
+850,564,1415,98,116,377,1
+1155,1108,2469,203,740,10134,0.64
+14235,11594,28750,2203,7007,167239,0.61
+44481,50321,117554,6600,67611,420473,0.23
+1116,766,1925,117,436,7176,0.61
+1604,946,2747,135,902,8614,0.46
+27723,19818,48605,3601,6007,334757,0.92
+2480,2420,5486,409,2773,14953,0.19
+494,476,1091,43,506,3156,0.32
+801,466,1297,61,541,4296,0.05
+875,555,1691,62,1081,6044,0.07
+2133,1913,4605,256,2540,23361,0.31
+1139,1635,3102,197,1817,6071,0.05
+907,1127,2260,200,1106,14936,0.49
+120,106,341,17,133,772,0.06
+3454,2428,6100,489,1312,44594,0.87
+535,479,1440,53,789,3391,0.05
+974,754,1958,95,971,6806,0.37
+929,853,1976,75,869,5165,0.24
+9842,13925,29722,985,25611,95367,0.09
+548,412,1254,123,51,24,0.44
+1456,1136,2665,165,875,14646,0.71
+725,569,1376,73,716,5069,0.39
+2525,388,3121,540,70,58,0.31
+2002,1442,3864,300,1301,20325,0.47
+1846,1112,3221,207,429,16878,0.73
+982,1094,2561,99,1618,8566,0.2
+2727,2151,5779,164,3330,12231,0.12
+1799,2073,4380,171,3736,15217,0.06
+604,675,1423,73,989,5711,0.25
+400,430,907,40,646,2968,0.2
+4092,3173,7915,482,3294,42952,0.4
+3362,3078,6639,456,1375,48140,0.66
+390,438,868,23,589,2227,0.18
+10852,9366,25556,1233,12512,98650,0.13
+688,700,1540,85,866,6022,0.36
+761,701,1564,100,800,7193,0.4
+453,576,1229,25,1078,3342,0.04
+4076,4007,9807,494,4696,43911,0.29
+308,297,669,17,432,1752,0.03
+2746,2529,6097,315,4042,26265,0.2
+5614,5509,12154,1042,4296,75870,0.6
+400,519,1186,39,614,2211,0.01
+1821,1753,4020,223,2117,12945,0.09
+794,747,1589,98,418,5146,0.66
+2269,2795,6414,348,2127,21072,0.37
+711,556,1515,77,762,4513,0.16
+4609,5342,10600,993,3205,80702,0.7
+1766,2338,5431,402,3207,25994,0.13
+813,666,1872,130,905,5394,0.19
+884,1104,2206,138,1423,9015,0.26
+1662,1358,3767,117,2532,9930,0.24
+81,106,268,22,133,1467,0.22
+11776,11864,28295,988,20934,84445,0.04
+4021,3767,9689,749,3225,47572,0.53
+2597,3224,7226,378,3567,30801,0.21
+995,848,1871,95,340,7812,0.89
+548,587,1280,43,977,4272,0.02
+1573,1780,3539,237,882,19455,0.52
+4129,4001,9853,440,6330,26798,0.22
+2151,1450,3758,266,772,21662,0.83
+2438,2496,5499,316,4117,22313,0.28
+14064,15175,37368,1601,24485,106336,0.08
+2058,1521,3735,268,928,19899,0.74
+8643,6819,16141,1654,3567,124661,0.59
+483,367,987,37,730,2611,0.08
+1018,939,2067,158,822,10537,0.56
+1593,2326,5105,196,3470,13391,0.09
+7501,4734,12687,1141,2360,67456,0.62
+305,411,861,19,520,1207,0.17
+5426,6446,12831,787,5808,60239,0.41
+2618,2795,6055,293,3741,23446,0.2
+1033,951,2156,137,902,11654,0.39
+6786,6638,13794,1281,3009,93769,0.75
+2169,2172,5054,210,3693,17129,0.16
+40787,45434,108310,4825,60659,378089,0.18
+2741,2475,6162,310,3381,19059,0.16
+307,225,594,28,351,2078,0.07
+321,281,672,30,338,2008,0.32
+300,289,616,15,318,1364,0.01
+891,693,1776,105,575,9084,0.59
diff --git a/pysfa/dataset.py b/pysfa/dataset.py
@@ -0,0 +1,67 @@
+import pandas as pd
+import numpy as np
+import os
+
+file_path = os.path.dirname(__file__)
+
+
+class production_data:
+    """Example datasets provided by the pySFA
+    """
+
+    def __init__(self, dmu, x, y, b=None, z=None):
+        """General data structure
+
+        Args:
+            dmu (String): decision making unit.
+            x (Numbers): input variables.
+            y (Numbers): output variables.
+            b (Numbers, optional): bad output variables. Defaults to None.
+            z (Numbers, optional): contextual variables. Defaults to None.
+        """
+        self.decision_making_unit = dmu
+        self.x, self.y, self.b, self.z = x, y, b, z
+
+
+def load_Finnish_electricity_firm(x_select=['Energy', 'Length', 'Customers'], y_select=['OPEX', 'CAPEX', 'TOTEX'], z_select=['PerUndGr']):
+    """Loading Finnish electricity firm data
+
+    Args:
+        x_select (list, optional): input variables. Defaults to ['Energy', 'Length', 'Customers'].
+        y_select (list, optional): output variable. Defaults to ['OPEX', 'CAPEX', 'TOTEX'].
+        z_select (list, optional): contextual variable. Defaults to ['PerUndGr'].
+
+    Returns:
+        Numbers: selected input-output
+    """
+    dataframe = pd.read_csv(
+        file_path+"/data/electricityFirms.csv")
+    dmu = np.asanyarray(dataframe.index.tolist()).T
+    x = np.column_stack(
+        [np.asanyarray(dataframe[selected]).T for selected in x_select])
+    y = np.column_stack(
+        [np.asanyarray(dataframe[selected]).T for selected in y_select])
+    if z_select != None:
+        z = np.column_stack(
+            [np.asanyarray(dataframe[selected]).T for selected in z_select])
+    return production_data(dmu, x, y, z=z)
+
+
+def load_Tim_Coelli_frontier(x_select=['capital', 'labour'], y_select=['output']):
+    """Loading Tim Coelli 4.1 data
+
+    Args:
+        x_select (list, optional): input variables. Defaults to ['capital', 'labour'].
+        y_select (list, optional): output variable. Defaults to ['output'].
+
+    Returns:
+        Numbers: selected input-output
+    """
+    dataframe = pd.read_csv(
+        file_path+"/data/41Firm.csv")
+    dmu = np.asanyarray(dataframe['firm']).T
+    x = np.column_stack(
+        [np.asanyarray(dataframe[selected]).T for selected in x_select])
+    y = np.column_stack(
+        [np.asanyarray(dataframe[selected]).T for selected in y_select])
+    return production_data(dmu, x, y)
diff --git a/pysfa/utils/tools.py b/pysfa/utils/tools.py
@@ -25,14 +25,15 @@ def assert_valid_basic_data(y, x, fun):
 
     if len(y_shape) == 2 and y_shape[1] != 1:
         raise ValueError(
-            "The multidimensional output data is supported by direciontal based models.")
+            "The output must be one dimensional array.")
 
     if y_shape[0] != x_shape[0]:
         raise ValueError(
             "x and y must have the same length.")
 
     return y, x
 
+
 def trans_list(li):
     if type(li) == list:
         return li
@@ -56,4 +57,4 @@ def to_2d_list(li):
         for value in li:
             rl.append([value])
         return rl
-    return li      
+    return li
diff --git a/setup.py b/setup.py
@@ -5,13 +5,13 @@
 
 setup_args = dict(
     name='pysfa',
-    version='0.2',
+    version='0.3',
     description='A Python Package for Stochastic Frontier Analysis',
     long_description_content_type="text/markdown",
     long_description=README,
     license='MIT',
     packages=find_packages(),
-    author='Sheng Dai',
+    author='Sheng Dai, Zhiqiang Liao',
     author_email='sheng.dai@utu.fi',
     keywords=['SFA', 'MLE', 'TE'],
     url='https://github.com/gEAPA/pySFA',
@@ -30,6 +30,7 @@
 
 install_requires = [
     'numpy>=1.19.2',
+    'pandas>=1.1.3',
     'scipy>=1.5.2',
     'scikit-learn>=1.2.2',
 ]

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,9 @@`
	`1`	`+from . import constant`
	`2`	`+from . import dataset`
`1`	`3`	`from . import SFA`
`2`	`4`
`3`	`5`	`__all__ = [`
	`6`	`+ 'constant',`
	`7`	`+ 'dataset',`
`4`	`8`	`'SFA',`
`5`	`9`	`]`