diff options
author | Igor Radovanovic <74266147+IgorWounds@users.noreply.github.com> | 2024-02-08 20:01:38 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-08 19:01:38 +0000 |
commit | b286800c2313bccbbaa7bc34ee44dd3e7399e34f (patch) | |
tree | 4813afc137ac4d325ddc0ef05e0a1a1a3d8b4522 | |
parent | 6088c98ceb290ed8c9f843455050a2bee5560331 (diff) |
Improve the docstrings (#6057)
-rw-r--r-- | openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py | 141 |
1 files changed, 138 insertions, 3 deletions
diff --git a/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py b/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py index 248f57f8040..f733ef07fd0 100644 --- a/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py +++ b/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py @@ -33,6 +33,11 @@ router = Router(prefix="") def correlation_matrix(data: List[Data]) -> OBBject[List[Data]]: """Get the correlation matrix of an input dataset. + The correlation matrix provides a view of how different variables in your dataset relate to one another. + By quantifying the degree to which variables move in relation to each other, this matrix can help identify patterns, + trends, and potential areas for deeper analysis. The correlation score ranges from -1 to 1, with -1 indicating a + perfect negative correlation, 0 indicating no correlation, and 1 indicating a perfect positive correlation. + Parameters ---------- data : List[Data] @@ -42,6 +47,12 @@ def correlation_matrix(data: List[Data]) -> OBBject[List[Data]]: ------- OBBject[List[Data]]: Correlation matrix. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.correlation_matrix(data=stock_data) """ df = basemodel_to_df(data) # remove non float columns from the dataframe to perform the correlation @@ -65,7 +76,12 @@ def ols_regression( y_column: str, x_columns: List[str], ) -> OBBject[Dict]: - """Perform OLS regression. This returns the model and results objects from statsmodels. + """Perform Ordinary Least Squares (OLS) regression. + + OLS regression is a fundamental statistical method to explore and model the relationship between a + dependent variable and one or more independent variables. By fitting the best possible linear equation to the data, + it helps uncover how changes in the independent variables are associated with changes in the dependent variable. + This returns the model and results objects from statsmodels library. Parameters ---------- @@ -80,6 +96,12 @@ def ols_regression( ------- OBBject[Dict]: OBBject with the results being model and results objects. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.ols_regression(data=stock_data, y_column="close", x_columns=["open", "high", "low"]) """ X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns)) y = get_target_column(basemodel_to_df(data), y_column) @@ -94,7 +116,9 @@ def ols_regression_summary( y_column: str, x_columns: List[str], ) -> OBBject[Data]: - """Perform OLS regression. This returns the summary object from statsmodels. + """Perform Ordinary Least Squares (OLS) regression. + + This returns the summary object from statsmodels. Parameters ---------- @@ -109,6 +133,12 @@ def ols_regression_summary( ------- OBBject[Data]: OBBject with the results being summary object. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.ols_regression_summary(data=stock_data, y_column="close", x_columns=["open", "high", "low"]) """ X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns)) y = get_target_column(basemodel_to_df(data), y_column) @@ -161,6 +191,14 @@ def autocorrelation( ) -> OBBject[Dict]: """Perform Durbin-Watson test for autocorrelation. + The Durbin-Watson test is a widely used method for detecting the presence of autocorrelation in the residuals + from a statistical or econometric model. Autocorrelation occurs when past values in the data series influence + future values, which can be a critical issue in time-series analysis, affecting the reliability of + model predictions. The test provides a statistic that ranges from 0 to 4, where a value around 2 suggests + no autocorrelation, values towards 0 indicate positive autocorrelation, and values towards 4 suggest + negative autocorrelation. Understanding the degree of autocorrelation helps in refining models to better capture + the underlying dynamics of the data, ensuring more accurate and trustworthy results. + Parameters ---------- data: List[Data] @@ -174,6 +212,12 @@ def autocorrelation( ------- OBBject[Dict]: OBBject with the results being the score from the test. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.autocorrelation(data=stock_data, y_column="close", x_columns=["open", "high", "low"]) """ X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns)) y = get_target_column(basemodel_to_df(data), y_column) @@ -190,6 +234,14 @@ def residual_autocorrelation( ) -> OBBject[Data]: """Perform Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation. + The Breusch-Godfrey Lagrange Multiplier test is a sophisticated tool for uncovering autocorrelation within the + residuals of a regression model. Autocorrelation in residuals can indicate that a model fails to capture some + aspect of the underlying data structure, possibly leading to biased or inefficient estimates. + By specifying the number of lags, you can control the depth of the test to check for autocorrelation, + allowing for a tailored analysis that matches the specific characteristics of your data. + This test is particularly valuable in econometrics and time-series analysis, where understanding the independence + of errors is crucial for model validity. + Parameters ---------- data: List[Data] @@ -205,6 +257,12 @@ def residual_autocorrelation( ------- OBBject[Data]: OBBject with the results being the score from the test. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.residual_autocorrelation(data=stock_data, y_column="close", x_columns=["open", "high", "low"]) """ X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns)) y = get_target_column(basemodel_to_df(data), y_column) @@ -229,6 +287,15 @@ def cointegration( ) -> OBBject[Data]: """Show co-integration between two timeseries using the two step Engle-Granger test. + The two-step Engle-Granger test is a method designed to detect co-integration between two time series. + Co-integration is a statistical property indicating that two or more time series move together over the long term, + even if they are individually non-stationary. This concept is crucial in economics and finance, where identifying + pairs or groups of assets that share a common stochastic trend can inform long-term investment strategies + and risk management practices. The Engle-Granger test first checks for a stable, long-term relationship by + regressing one time series on the other and then tests the residuals for stationarity. + If the residuals are found to be stationary, it suggests that despite any short-term deviations, + the series are bound by an equilibrium relationship over time. + Parameters ---------- data: List[Data] @@ -242,6 +309,12 @@ def cointegration( ------- OBBject[Data]: OBBject with the results being the score from the test. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.cointegration(data=stock_data, columns=["open", "close"]) """ pairs = list(combinations(columns, 2)) dataset = get_target_columns(basemodel_to_df(data), columns) @@ -275,6 +348,14 @@ def causality( ) -> OBBject[Data]: """Perform Granger causality test to determine if X "causes" y. + The Granger causality test is a statistical hypothesis test to determine if one time series is useful in + forecasting another. While "causality" in this context does not imply a cause-and-effect relationship in + the philosophical sense, it does test whether changes in one variable are systematically followed by changes + in another variable, suggesting a predictive relationship. By specifying a lag, you set the number of periods to + look back in the time series to assess this relationship. This test is particularly useful in economic and + financial data analysis, where understanding the lead-lag relationship between indicators can inform investment + decisions and policy making. + Parameters ---------- data: List[Data] @@ -290,6 +371,12 @@ def causality( ------- OBBject[Data]: OBBject with the results being the score from the test. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.causality(data=stock_data, y_column="close", x_column="open") """ X = get_target_column(basemodel_to_df(data), x_column) y = get_target_column(basemodel_to_df(data), y_column) @@ -315,7 +402,16 @@ def unit_root( column: str, regression: Literal["c", "ct", "ctt"] = "c", ) -> OBBject[Data]: - """Perform Augmented Dickey-Fuller unit root test. + """Perform Augmented Dickey-Fuller (ADF) unit root test. + + The ADF test is a popular method for testing the presence of a unit root in a time series. + A unit root indicates that the series may be non-stationary, meaning its statistical properties such as mean, + variance, and autocorrelation can change over time. The presence of a unit root suggests that the time series might + be influenced by a random walk process, making it unpredictable and challenging for modeling and forecasting. + The 'regression' parameter allows you to specify the model used in the test: 'c' for a constant term, + 'ct' for a constant and trend term, and 'ctt' for a constant, linear, and quadratic trend. + This flexibility helps tailor the test to the specific characteristics of your data, providing a more accurate + assessment of its stationarity. Parameters ---------- @@ -331,6 +427,13 @@ def unit_root( ------- OBBject[Data]: OBBject with the results being the score from the test. + + Examples + -------- + >>> from openbb import obb + >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df() + >>> obb.econometrics.unit_root(data=stock_data, column="close") + >>> obb.econometrics.unit_root(data=stock_data, column="close", regression="ct") """ dataset = get_target_column(basemodel_to_df(data), column) adfstat, pvalue, usedlag, nobs, _, icbest = adfuller(dataset, regression=regression) @@ -352,6 +455,11 @@ def panel_random_effects( ) -> OBBject[Dict]: """Perform One-way Random Effects model for panel data. + One-way Random Effects model to panel data is offering a nuanced approach to analyzing data that spans across both + time and entities (such as individuals, companies, countries, etc.). By acknowledging and modeling the random + variation that exists within these entities, this method provides insights into the general patterns that + emerge across the dataset. + Parameters ---------- data: List[Data] @@ -381,6 +489,11 @@ def panel_between( ) -> OBBject[Dict]: """Perform a Between estimator regression on panel data. + The Between estimator for regression analysis on panel data is focusing on the differences between entities + (such as individuals, companies, or countries) over time. By aggregating the data for each entity and analyzing the + average outcomes, this method provides insights into the overall impact of explanatory variables (x_columns) on + the dependent variable (y_column) across all entities. + Parameters ---------- data: List[Data] @@ -410,6 +523,12 @@ def panel_pooled( ) -> OBBject[Dict]: """Perform a Pooled coefficient estimator regression on panel data. + The Pooled coefficient estimator for regression analysis on panel data is treating the data as a large + cross-section without distinguishing between variations across time or entities + (such as individuals, companies, or countries). By assuming that the explanatory variables (x_columns) have a + uniform effect on the dependent variable (y_column) across all entities and time periods, this method simplifies + the analysis and provides a generalized view of the relationships within the data. + Parameters ---------- data: List[Data] @@ -439,6 +558,11 @@ def panel_fixed( ) -> OBBject[Dict]: """One- and two-way fixed effects estimator for panel data. + The Fixed Effects estimator to panel data is enabling a focused analysis on the unique characteristics of entities + (such as individuals, companies, or countries) and/or time periods. By controlling for entity-specific and/or + time-specific influences, this method isolates the effect of explanatory variables (x_columns) on the dependent + variable (y_column), under the assumption that these entity or time effects capture unobserved heterogeneity. + Parameters ---------- data: List[Data] @@ -468,6 +592,11 @@ def panel_first_difference( ) -> OBBject[Dict]: """Perform a first-difference estimate for panel data. + The First-Difference estimator for panel data analysis is focusing on the changes between consecutive observations + for each entity (such as individuals, companies, or countries). By differencing the data, this method effectively + removes entity-specific effects that are constant over time, allowing for the examination of the impact of changes + in explanatory variables (x_columns) on the change in the dependent variable (y_column). + Parameters ---------- data: List[Data] @@ -497,6 +626,12 @@ def panel_fmac( ) -> OBBject[Dict]: """Fama-MacBeth estimator for panel data. + The Fama-MacBeth estimator, a two-step procedure renowned for its application in finance to estimate the risk + premiums and evaluate the capital asset pricing model. By first estimating cross-sectional regressions for each + time period and then averaging the regression coefficients over time, this method provides insights into the + relationship between the dependent variable (y_column) and explanatory variables (x_columns) across different + entities (such as individuals, companies, or countries). + Parameters ---------- data: List[Data] |