summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgor Radovanovic <74266147+IgorWounds@users.noreply.github.com>2024-02-08 20:01:38 +0100
committerGitHub <noreply@github.com>2024-02-08 19:01:38 +0000
commitb286800c2313bccbbaa7bc34ee44dd3e7399e34f (patch)
tree4813afc137ac4d325ddc0ef05e0a1a1a3d8b4522
parent6088c98ceb290ed8c9f843455050a2bee5560331 (diff)
Improve the docstrings (#6057)
-rw-r--r--openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py141
1 files changed, 138 insertions, 3 deletions
diff --git a/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py b/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py
index 248f57f8040..f733ef07fd0 100644
--- a/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py
+++ b/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py
@@ -33,6 +33,11 @@ router = Router(prefix="")
def correlation_matrix(data: List[Data]) -> OBBject[List[Data]]:
"""Get the correlation matrix of an input dataset.
+ The correlation matrix provides a view of how different variables in your dataset relate to one another.
+ By quantifying the degree to which variables move in relation to each other, this matrix can help identify patterns,
+ trends, and potential areas for deeper analysis. The correlation score ranges from -1 to 1, with -1 indicating a
+ perfect negative correlation, 0 indicating no correlation, and 1 indicating a perfect positive correlation.
+
Parameters
----------
data : List[Data]
@@ -42,6 +47,12 @@ def correlation_matrix(data: List[Data]) -> OBBject[List[Data]]:
-------
OBBject[List[Data]]:
Correlation matrix.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.correlation_matrix(data=stock_data)
"""
df = basemodel_to_df(data)
# remove non float columns from the dataframe to perform the correlation
@@ -65,7 +76,12 @@ def ols_regression(
y_column: str,
x_columns: List[str],
) -> OBBject[Dict]:
- """Perform OLS regression. This returns the model and results objects from statsmodels.
+ """Perform Ordinary Least Squares (OLS) regression.
+
+ OLS regression is a fundamental statistical method to explore and model the relationship between a
+ dependent variable and one or more independent variables. By fitting the best possible linear equation to the data,
+ it helps uncover how changes in the independent variables are associated with changes in the dependent variable.
+ This returns the model and results objects from statsmodels library.
Parameters
----------
@@ -80,6 +96,12 @@ def ols_regression(
-------
OBBject[Dict]:
OBBject with the results being model and results objects.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.ols_regression(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
"""
X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
y = get_target_column(basemodel_to_df(data), y_column)
@@ -94,7 +116,9 @@ def ols_regression_summary(
y_column: str,
x_columns: List[str],
) -> OBBject[Data]:
- """Perform OLS regression. This returns the summary object from statsmodels.
+ """Perform Ordinary Least Squares (OLS) regression.
+
+ This returns the summary object from statsmodels.
Parameters
----------
@@ -109,6 +133,12 @@ def ols_regression_summary(
-------
OBBject[Data]:
OBBject with the results being summary object.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.ols_regression_summary(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
"""
X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
y = get_target_column(basemodel_to_df(data), y_column)
@@ -161,6 +191,14 @@ def autocorrelation(
) -> OBBject[Dict]:
"""Perform Durbin-Watson test for autocorrelation.
+ The Durbin-Watson test is a widely used method for detecting the presence of autocorrelation in the residuals
+ from a statistical or econometric model. Autocorrelation occurs when past values in the data series influence
+ future values, which can be a critical issue in time-series analysis, affecting the reliability of
+ model predictions. The test provides a statistic that ranges from 0 to 4, where a value around 2 suggests
+ no autocorrelation, values towards 0 indicate positive autocorrelation, and values towards 4 suggest
+ negative autocorrelation. Understanding the degree of autocorrelation helps in refining models to better capture
+ the underlying dynamics of the data, ensuring more accurate and trustworthy results.
+
Parameters
----------
data: List[Data]
@@ -174,6 +212,12 @@ def autocorrelation(
-------
OBBject[Dict]:
OBBject with the results being the score from the test.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.autocorrelation(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
"""
X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
y = get_target_column(basemodel_to_df(data), y_column)
@@ -190,6 +234,14 @@ def residual_autocorrelation(
) -> OBBject[Data]:
"""Perform Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation.
+ The Breusch-Godfrey Lagrange Multiplier test is a sophisticated tool for uncovering autocorrelation within the
+ residuals of a regression model. Autocorrelation in residuals can indicate that a model fails to capture some
+ aspect of the underlying data structure, possibly leading to biased or inefficient estimates.
+ By specifying the number of lags, you can control the depth of the test to check for autocorrelation,
+ allowing for a tailored analysis that matches the specific characteristics of your data.
+ This test is particularly valuable in econometrics and time-series analysis, where understanding the independence
+ of errors is crucial for model validity.
+
Parameters
----------
data: List[Data]
@@ -205,6 +257,12 @@ def residual_autocorrelation(
-------
OBBject[Data]:
OBBject with the results being the score from the test.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.residual_autocorrelation(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
"""
X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
y = get_target_column(basemodel_to_df(data), y_column)
@@ -229,6 +287,15 @@ def cointegration(
) -> OBBject[Data]:
"""Show co-integration between two timeseries using the two step Engle-Granger test.
+ The two-step Engle-Granger test is a method designed to detect co-integration between two time series.
+ Co-integration is a statistical property indicating that two or more time series move together over the long term,
+ even if they are individually non-stationary. This concept is crucial in economics and finance, where identifying
+ pairs or groups of assets that share a common stochastic trend can inform long-term investment strategies
+ and risk management practices. The Engle-Granger test first checks for a stable, long-term relationship by
+ regressing one time series on the other and then tests the residuals for stationarity.
+ If the residuals are found to be stationary, it suggests that despite any short-term deviations,
+ the series are bound by an equilibrium relationship over time.
+
Parameters
----------
data: List[Data]
@@ -242,6 +309,12 @@ def cointegration(
-------
OBBject[Data]:
OBBject with the results being the score from the test.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.cointegration(data=stock_data, columns=["open", "close"])
"""
pairs = list(combinations(columns, 2))
dataset = get_target_columns(basemodel_to_df(data), columns)
@@ -275,6 +348,14 @@ def causality(
) -> OBBject[Data]:
"""Perform Granger causality test to determine if X "causes" y.
+ The Granger causality test is a statistical hypothesis test to determine if one time series is useful in
+ forecasting another. While "causality" in this context does not imply a cause-and-effect relationship in
+ the philosophical sense, it does test whether changes in one variable are systematically followed by changes
+ in another variable, suggesting a predictive relationship. By specifying a lag, you set the number of periods to
+ look back in the time series to assess this relationship. This test is particularly useful in economic and
+ financial data analysis, where understanding the lead-lag relationship between indicators can inform investment
+ decisions and policy making.
+
Parameters
----------
data: List[Data]
@@ -290,6 +371,12 @@ def causality(
-------
OBBject[Data]:
OBBject with the results being the score from the test.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.causality(data=stock_data, y_column="close", x_column="open")
"""
X = get_target_column(basemodel_to_df(data), x_column)
y = get_target_column(basemodel_to_df(data), y_column)
@@ -315,7 +402,16 @@ def unit_root(
column: str,
regression: Literal["c", "ct", "ctt"] = "c",
) -> OBBject[Data]:
- """Perform Augmented Dickey-Fuller unit root test.
+ """Perform Augmented Dickey-Fuller (ADF) unit root test.
+
+ The ADF test is a popular method for testing the presence of a unit root in a time series.
+ A unit root indicates that the series may be non-stationary, meaning its statistical properties such as mean,
+ variance, and autocorrelation can change over time. The presence of a unit root suggests that the time series might
+ be influenced by a random walk process, making it unpredictable and challenging for modeling and forecasting.
+ The 'regression' parameter allows you to specify the model used in the test: 'c' for a constant term,
+ 'ct' for a constant and trend term, and 'ctt' for a constant, linear, and quadratic trend.
+ This flexibility helps tailor the test to the specific characteristics of your data, providing a more accurate
+ assessment of its stationarity.
Parameters
----------
@@ -331,6 +427,13 @@ def unit_root(
-------
OBBject[Data]:
OBBject with the results being the score from the test.
+
+ Examples
+ --------
+ >>> from openbb import obb
+ >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+ >>> obb.econometrics.unit_root(data=stock_data, column="close")
+ >>> obb.econometrics.unit_root(data=stock_data, column="close", regression="ct")
"""
dataset = get_target_column(basemodel_to_df(data), column)
adfstat, pvalue, usedlag, nobs, _, icbest = adfuller(dataset, regression=regression)
@@ -352,6 +455,11 @@ def panel_random_effects(
) -> OBBject[Dict]:
"""Perform One-way Random Effects model for panel data.
+ One-way Random Effects model to panel data is offering a nuanced approach to analyzing data that spans across both
+ time and entities (such as individuals, companies, countries, etc.). By acknowledging and modeling the random
+ variation that exists within these entities, this method provides insights into the general patterns that
+ emerge across the dataset.
+
Parameters
----------
data: List[Data]
@@ -381,6 +489,11 @@ def panel_between(
) -> OBBject[Dict]:
"""Perform a Between estimator regression on panel data.
+ The Between estimator for regression analysis on panel data is focusing on the differences between entities
+ (such as individuals, companies, or countries) over time. By aggregating the data for each entity and analyzing the
+ average outcomes, this method provides insights into the overall impact of explanatory variables (x_columns) on
+ the dependent variable (y_column) across all entities.
+
Parameters
----------
data: List[Data]
@@ -410,6 +523,12 @@ def panel_pooled(
) -> OBBject[Dict]:
"""Perform a Pooled coefficient estimator regression on panel data.
+ The Pooled coefficient estimator for regression analysis on panel data is treating the data as a large
+ cross-section without distinguishing between variations across time or entities
+ (such as individuals, companies, or countries). By assuming that the explanatory variables (x_columns) have a
+ uniform effect on the dependent variable (y_column) across all entities and time periods, this method simplifies
+ the analysis and provides a generalized view of the relationships within the data.
+
Parameters
----------
data: List[Data]
@@ -439,6 +558,11 @@ def panel_fixed(
) -> OBBject[Dict]:
"""One- and two-way fixed effects estimator for panel data.
+ The Fixed Effects estimator to panel data is enabling a focused analysis on the unique characteristics of entities
+ (such as individuals, companies, or countries) and/or time periods. By controlling for entity-specific and/or
+ time-specific influences, this method isolates the effect of explanatory variables (x_columns) on the dependent
+ variable (y_column), under the assumption that these entity or time effects capture unobserved heterogeneity.
+
Parameters
----------
data: List[Data]
@@ -468,6 +592,11 @@ def panel_first_difference(
) -> OBBject[Dict]:
"""Perform a first-difference estimate for panel data.
+ The First-Difference estimator for panel data analysis is focusing on the changes between consecutive observations
+ for each entity (such as individuals, companies, or countries). By differencing the data, this method effectively
+ removes entity-specific effects that are constant over time, allowing for the examination of the impact of changes
+ in explanatory variables (x_columns) on the change in the dependent variable (y_column).
+
Parameters
----------
data: List[Data]
@@ -497,6 +626,12 @@ def panel_fmac(
) -> OBBject[Dict]:
"""Fama-MacBeth estimator for panel data.
+ The Fama-MacBeth estimator, a two-step procedure renowned for its application in finance to estimate the risk
+ premiums and evaluate the capital asset pricing model. By first estimating cross-sectional regressions for each
+ time period and then averaging the regression coefficients over time, this method provides insights into the
+ relationship between the dependent variable (y_column) and explanatory variables (x_columns) across different
+ entities (such as individuals, companies, or countries).
+
Parameters
----------
data: List[Data]