Improve the docstrings (#6057)

author: Igor Radovanovic <74266147+IgorWounds@users.noreply.github.com> 2024-02-08 20:01:38 +0100
committer: GitHub <noreply@github.com> 2024-02-08 19:01:38 +0000
commit: b286800c2313bccbbaa7bc34ee44dd3e7399e34f (patch)
tree: 4813afc137ac4d325ddc0ef05e0a1a1a3d8b4522
parent: 6088c98ceb290ed8c9f843455050a2bee5560331 (diff)
1 files changed, 138 insertions, 3 deletions
diff --git a/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py b/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py
index 248f57f8040..f733ef07fd0 100644
--- a/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py
+++ b/openbb_platform/extensions/econometrics/openbb_econometrics/econometrics_router.py
@@ -33,6 +33,11 @@ router = Router(prefix="")
 def correlation_matrix(data: List[Data]) -> OBBject[List[Data]]:
     """Get the correlation matrix of an input dataset.
 
+    The correlation matrix provides a view of how different variables in your dataset relate to one another.
+    By quantifying the degree to which variables move in relation to each other, this matrix can help identify patterns,
+    trends, and potential areas for deeper analysis. The correlation score ranges from -1 to 1, with -1 indicating a
+    perfect negative correlation, 0 indicating no correlation, and 1 indicating a perfect positive correlation.
+
     Parameters
     ----------
     data : List[Data]
@@ -42,6 +47,12 @@ def correlation_matrix(data: List[Data]) -> OBBject[List[Data]]:
     -------
     OBBject[List[Data]]:
         Correlation matrix.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.correlation_matrix(data=stock_data)
     """
     df = basemodel_to_df(data)
     # remove non float columns from the dataframe to perform the correlation
@@ -65,7 +76,12 @@ def ols_regression(
     y_column: str,
     x_columns: List[str],
 ) -> OBBject[Dict]:
-    """Perform OLS regression.  This returns the model and results objects from statsmodels.
+    """Perform Ordinary Least Squares (OLS) regression.
+
+    OLS regression is a fundamental statistical method to explore and model the relationship between a
+    dependent variable and one or more independent variables. By fitting the best possible linear equation to the data,
+    it helps uncover how changes in the independent variables are associated with changes in the dependent variable.
+    This returns the model and results objects from statsmodels library.
 
     Parameters
     ----------
@@ -80,6 +96,12 @@ def ols_regression(
     -------
     OBBject[Dict]:
         OBBject with the results being model and results objects.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.ols_regression(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
     """
     X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
     y = get_target_column(basemodel_to_df(data), y_column)
@@ -94,7 +116,9 @@ def ols_regression_summary(
     y_column: str,
     x_columns: List[str],
 ) -> OBBject[Data]:
-    """Perform OLS regression. This returns the summary object from statsmodels.
+    """Perform Ordinary Least Squares (OLS) regression.
+
+    This returns the summary object from statsmodels.
 
     Parameters
     ----------
@@ -109,6 +133,12 @@ def ols_regression_summary(
     -------
     OBBject[Data]:
         OBBject with the results being summary object.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.ols_regression_summary(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
     """
     X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
     y = get_target_column(basemodel_to_df(data), y_column)
@@ -161,6 +191,14 @@ def autocorrelation(
 ) -> OBBject[Dict]:
     """Perform Durbin-Watson test for autocorrelation.
 
+    The Durbin-Watson test is a widely used method for detecting the presence of autocorrelation in the residuals
+    from a statistical or econometric model. Autocorrelation occurs when past values in the data series influence
+    future values, which can be a critical issue in time-series analysis, affecting the reliability of
+    model predictions. The test provides a statistic that ranges from 0 to 4, where a value around 2 suggests
+    no autocorrelation, values towards 0 indicate positive autocorrelation, and values towards 4 suggest
+    negative autocorrelation. Understanding the degree of autocorrelation helps in refining models to better capture
+    the underlying dynamics of the data, ensuring more accurate and trustworthy results.
+
     Parameters
     ----------
     data: List[Data]
@@ -174,6 +212,12 @@ def autocorrelation(
     -------
     OBBject[Dict]:
         OBBject with the results being the score from the test.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.autocorrelation(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
     """
     X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
     y = get_target_column(basemodel_to_df(data), y_column)
@@ -190,6 +234,14 @@ def residual_autocorrelation(
 ) -> OBBject[Data]:
     """Perform Breusch-Godfrey Lagrange Multiplier tests for residual autocorrelation.
 
+    The Breusch-Godfrey Lagrange Multiplier test is a sophisticated tool for uncovering autocorrelation within the
+    residuals of a regression model. Autocorrelation in residuals can indicate that a model fails to capture some
+    aspect of the underlying data structure, possibly leading to biased or inefficient estimates.
+    By specifying the number of lags, you can control the depth of the test to check for autocorrelation,
+    allowing for a tailored analysis that matches the specific characteristics of your data.
+    This test is particularly valuable in econometrics and time-series analysis, where understanding the independence
+    of errors is crucial for model validity.
+
     Parameters
     ----------
     data: List[Data]
@@ -205,6 +257,12 @@ def residual_autocorrelation(
     -------
     OBBject[Data]:
         OBBject with the results being the score from the test.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.residual_autocorrelation(data=stock_data, y_column="close", x_columns=["open", "high", "low"])
     """
     X = sm.add_constant(get_target_columns(basemodel_to_df(data), x_columns))
     y = get_target_column(basemodel_to_df(data), y_column)
@@ -229,6 +287,15 @@ def cointegration(
 ) -> OBBject[Data]:
     """Show co-integration between two timeseries using the two step Engle-Granger test.
 
+    The two-step Engle-Granger test is a method designed to detect co-integration between two time series.
+    Co-integration is a statistical property indicating that two or more time series move together over the long term,
+    even if they are individually non-stationary. This concept is crucial in economics and finance, where identifying
+    pairs or groups of assets that share a common stochastic trend can inform long-term investment strategies
+    and risk management practices. The Engle-Granger test first checks for a stable, long-term relationship by
+    regressing one time series on the other and then tests the residuals for stationarity.
+    If the residuals are found to be stationary, it suggests that despite any short-term deviations,
+    the series are bound by an equilibrium relationship over time.
+
     Parameters
     ----------
     data: List[Data]
@@ -242,6 +309,12 @@ def cointegration(
     -------
     OBBject[Data]:
         OBBject with the results being the score from the test.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.cointegration(data=stock_data, columns=["open", "close"])
     """
     pairs = list(combinations(columns, 2))
     dataset = get_target_columns(basemodel_to_df(data), columns)
@@ -275,6 +348,14 @@ def causality(
 ) -> OBBject[Data]:
     """Perform Granger causality test to determine if X "causes" y.
 
+    The Granger causality test is a statistical hypothesis test to determine if one time series is useful in
+    forecasting another. While "causality" in this context does not imply a cause-and-effect relationship in
+    the philosophical sense, it does test whether changes in one variable are systematically followed by changes
+    in another variable, suggesting a predictive relationship. By specifying a lag, you set the number of periods to
+    look back in the time series to assess this relationship. This test is particularly useful in economic and
+    financial data analysis, where understanding the lead-lag relationship between indicators can inform investment
+    decisions and policy making.
+
     Parameters
     ----------
     data: List[Data]
@@ -290,6 +371,12 @@ def causality(
     -------
     OBBject[Data]:
         OBBject with the results being the score from the test.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.causality(data=stock_data, y_column="close", x_column="open")
     """
     X = get_target_column(basemodel_to_df(data), x_column)
     y = get_target_column(basemodel_to_df(data), y_column)
@@ -315,7 +402,16 @@ def unit_root(
     column: str,
     regression: Literal["c", "ct", "ctt"] = "c",
 ) -> OBBject[Data]:
-    """Perform Augmented Dickey-Fuller unit root test.
+    """Perform Augmented Dickey-Fuller (ADF) unit root test.
+
+    The ADF test is a popular method for testing the presence of a unit root in a time series.
+    A unit root indicates that the series may be non-stationary, meaning its statistical properties such as mean,
+    variance, and autocorrelation can change over time. The presence of a unit root suggests that the time series might
+    be influenced by a random walk process, making it unpredictable and challenging for modeling and forecasting.
+    The 'regression' parameter allows you to specify the model used in the test: 'c' for a constant term,
+    'ct' for a constant and trend term, and 'ctt' for a constant, linear, and quadratic trend.
+    This flexibility helps tailor the test to the specific characteristics of your data, providing a more accurate
+    assessment of its stationarity.
 
     Parameters
     ----------
@@ -331,6 +427,13 @@ def unit_root(
     -------
     OBBject[Data]:
         OBBject with the results being the score from the test.
+
+    Examples
+    --------
+    >>> from openbb import obb
+    >>> stock_data = obb.equity.price.historical(symbol="TSLA", start_date="2023-01-01", provider="fmp").to_df()
+    >>> obb.econometrics.unit_root(data=stock_data, column="close")
+    >>> obb.econometrics.unit_root(data=stock_data, column="close", regression="ct")
     """
     dataset = get_target_column(basemodel_to_df(data), column)
     adfstat, pvalue, usedlag, nobs, _, icbest = adfuller(dataset, regression=regression)
@@ -352,6 +455,11 @@ def panel_random_effects(
 ) -> OBBject[Dict]:
     """Perform One-way Random Effects model for panel data.
 
+    One-way Random Effects model to panel data is offering a nuanced approach to analyzing data that spans across both
+    time and entities (such as individuals, companies, countries, etc.). By acknowledging and modeling the random
+    variation that exists within these entities, this method provides insights into the general patterns that
+    emerge across the dataset.
+
     Parameters
     ----------
     data: List[Data]
@@ -381,6 +489,11 @@ def panel_between(
 ) -> OBBject[Dict]:
     """Perform a Between estimator regression on panel data.
 
+    The Between estimator for regression analysis on panel data is focusing on the differences between entities
+    (such as individuals, companies, or countries) over time. By aggregating the data for each entity and analyzing the
+    average outcomes, this method provides insights into the overall impact of explanatory variables (x_columns) on
+    the dependent variable (y_column) across all entities.
+
     Parameters
     ----------
     data: List[Data]
@@ -410,6 +523,12 @@ def panel_pooled(
 ) -> OBBject[Dict]:
     """Perform a Pooled coefficient estimator regression on panel data.
 
+    The Pooled coefficient estimator for regression analysis on panel data is treating the data as a large
+    cross-section without distinguishing between variations across time or entities
+    (such as individuals, companies, or countries). By assuming that the explanatory variables (x_columns) have a
+    uniform effect on the dependent variable (y_column) across all entities and time periods, this method simplifies
+    the analysis and provides a generalized view of the relationships within the data.
+
     Parameters
     ----------
     data: List[Data]
@@ -439,6 +558,11 @@ def panel_fixed(
 ) -> OBBject[Dict]:
     """One- and two-way fixed effects estimator for panel data.
 
+    The Fixed Effects estimator to panel data is enabling a focused analysis on the unique characteristics of entities
+    (such as individuals, companies, or countries) and/or time periods. By controlling for entity-specific and/or
+    time-specific influences, this method isolates the effect of explanatory variables (x_columns) on the dependent
+    variable (y_column), under the assumption that these entity or time effects capture unobserved heterogeneity.
+
     Parameters
     ----------
     data: List[Data]
@@ -468,6 +592,11 @@ def panel_first_difference(
 ) -> OBBject[Dict]:
     """Perform a first-difference estimate for panel data.
 
+    The First-Difference estimator for panel data analysis is focusing on the changes between consecutive observations
+    for each entity (such as individuals, companies, or countries). By differencing the data, this method effectively
+    removes entity-specific effects that are constant over time, allowing for the examination of the impact of changes
+    in explanatory variables (x_columns) on the change in the dependent variable (y_column).
+
     Parameters
     ----------
     data: List[Data]
@@ -497,6 +626,12 @@ def panel_fmac(
 ) -> OBBject[Dict]:
     """Fama-MacBeth estimator for panel data.
 
+    The Fama-MacBeth estimator, a two-step procedure renowned for its application in finance to estimate the risk
+    premiums and evaluate the capital asset pricing model. By first estimating cross-sectional regressions for each
+    time period and then averaging the regression coefficients over time, this method provides insights into the
+    relationship between the dependent variable (y_column) and explanatory variables (x_columns) across different
+    entities (such as individuals, companies, or countries).
+
     Parameters
     ----------
     data: List[Data]
author	Igor Radovanovic <74266147+IgorWounds@users.noreply.github.com>	2024-02-08 20:01:38 +0100
committer	GitHub <noreply@github.com>	2024-02-08 19:01:38 +0000
commit	b286800c2313bccbbaa7bc34ee44dd3e7399e34f (patch)
tree	4813afc137ac4d325ddc0ef05e0a1a1a3d8b4522
parent	6088c98ceb290ed8c9f843455050a2bee5560331 (diff)