summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornorthern-64bit <75195383+northern-64bit@users.noreply.github.com>2023-05-25 16:54:11 +0200
committerGitHub <noreply@github.com>2023-05-25 14:54:11 +0000
commit8e21b340e0b657e359de0e63a1a8032153a6c89b (patch)
treebb74eca55a3b5413a6caeab9b0258be76a4c519a
parente30da799aa5aa0900cbffc1ce455c28fd55cc2ca (diff)
Adds test for collinearity to the `econometrics` menu (#5018)
* Add `vif` command to the ecomometrics menu Adds `vif` command, which tests collinearity by calculating the variance inflation factor. * Fix spelling * Fix ruff * Fix ruff * Fix mypy + add sdk example * Fix mypy * Fix pylint * Fix input + error handling * Remove rouge print + error handling for one column * fix linting
-rw-r--r--openbb_terminal/core/sdk/models/econometrics_sdk_model.py2
-rw-r--r--openbb_terminal/core/sdk/trail_map.csv1
-rw-r--r--openbb_terminal/econometrics/econometrics_controller.py106
-rw-r--r--openbb_terminal/econometrics/econometrics_model.py50
-rw-r--r--openbb_terminal/econometrics/econometrics_view.py44
-rw-r--r--openbb_terminal/miscellaneous/i18n/en.yml2
-rw-r--r--openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics.openbb1
-rw-r--r--openbb_terminal/sdk.py1
8 files changed, 194 insertions, 13 deletions
diff --git a/openbb_terminal/core/sdk/models/econometrics_sdk_model.py b/openbb_terminal/core/sdk/models/econometrics_sdk_model.py
index 690dd7ca2d3..018d5de9fcf 100644
--- a/openbb_terminal/core/sdk/models/econometrics_sdk_model.py
+++ b/openbb_terminal/core/sdk/models/econometrics_sdk_model.py
@@ -39,6 +39,7 @@ class EconometricsRoot(Category):
`re`: The random effects model is virtually identical to the pooled OLS model except that is accounts for the\n
`root`: Calculate test statistics for unit roots\n
`root_chart`: Determine the normality of a timeseries.\n
+ `vif`: Determine the vif, which tests for collinearity.\n
"""
_location_path = "econometrics"
@@ -75,3 +76,4 @@ class EconometricsRoot(Category):
self.re = lib.econometrics_regression_model.get_re
self.root = lib.econometrics_model.get_root
self.root_chart = lib.econometrics_view.display_root
+ self.vif = lib.econometrics_model.get_vif
diff --git a/openbb_terminal/core/sdk/trail_map.csv b/openbb_terminal/core/sdk/trail_map.csv
index eb85b6579d6..e339b496bbf 100644
--- a/openbb_terminal/core/sdk/trail_map.csv
+++ b/openbb_terminal/core/sdk/trail_map.csv
@@ -183,6 +183,7 @@ econometrics.panel,econometrics_regression_model.get_regressions_results,econome
econometrics.pols,econometrics_regression_model.get_pols,
econometrics.re,econometrics_regression_model.get_re,
econometrics.root,econometrics_model.get_root,econometrics_view.display_root
+econometrics.vif,econometrics_model.get_vif,
econometrics.garch,econometrics_model.get_garch,econometrics_view.display_garch
economy.available_indices,economy_yfinance_model.get_available_indices,
economy.balance,economy_oecd_model.get_balance,economy_oecd_view.plot_balance
diff --git a/openbb_terminal/econometrics/econometrics_controller.py b/openbb_terminal/econometrics/econometrics_controller.py
index 8602eeabec2..03b1a9a538d 100644
--- a/openbb_terminal/econometrics/econometrics_controller.py
+++ b/openbb_terminal/econometrics/econometrics_controller.py
@@ -77,6 +77,7 @@ class EconometricsController(BaseController):
"garch",
"granger",
"coint",
+ "vif",
]
CHOICES_MENUS: List[str] = [
"qa",
@@ -189,6 +190,7 @@ class EconometricsController(BaseController):
"corr",
"season",
"lag",
+ "vif",
]:
choices[feature] = dict()
@@ -235,12 +237,14 @@ class EconometricsController(BaseController):
]:
self.choices[feature] = {c: {} for c in self.files}
- self.choices["type"] = {
- c: {} for c in self.files + list(dataset_columns.keys())
- }
- self.choices["desc"] = {
- c: {} for c in self.files + list(dataset_columns.keys())
- }
+ for feature in ["type", "desc", "vif"]:
+ self.choices[feature] = {
+ c: {} for c in self.files + list(dataset_columns.keys())
+ }
+ self.choices["vif"] = dict(
+ self.choices["vif"],
+ **{"-d": self.choices["vif"], "--data": self.choices["vif"]},
+ )
pairs_timeseries = list()
for dataset_col in list(dataset_columns.keys()):
@@ -286,20 +290,21 @@ class EconometricsController(BaseController):
mt.add_cmd("lag", self.files)
mt.add_cmd("ret", self.files)
mt.add_cmd("export", self.files)
- mt.add_info("time_series_")
+ mt.add_info("_assumption_testing_")
mt.add_cmd("norm", self.files)
- mt.add_cmd("ols", self.files)
mt.add_cmd("granger", self.files)
mt.add_cmd("root", self.files)
mt.add_cmd("coint", self.files)
+ mt.add_cmd("vif", self.files)
+ mt.add_cmd("dwat", self.files and self.regression["OLS"]["model"])
+ mt.add_cmd("bgod", self.files and self.regression["OLS"]["model"])
+ mt.add_cmd("bpag", self.files and self.regression["OLS"]["model"])
+ mt.add_info("_time_series_")
+ mt.add_cmd("ols", self.files)
mt.add_cmd("garch", self.files)
mt.add_info("_panel_")
mt.add_cmd("panel", self.files)
mt.add_cmd("compare", self.files)
- mt.add_info("_residuals_")
- mt.add_cmd("dwat", self.files and self.regression["OLS"]["model"])
- mt.add_cmd("bgod", self.files and self.regression["OLS"]["model"])
- mt.add_cmd("bpag", self.files and self.regression["OLS"]["model"])
console.print(text=mt.menu_text, menu="Econometrics")
console.print()
@@ -1884,7 +1889,7 @@ class EconometricsController(BaseController):
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
prog="bgod",
description=(
- "Show Breusch-Godfrey autocorrelation test results."
+ "Show Breusch-Godfrey autocorrelation test results. "
"Needs OLS to be run in advance with independent and dependent variables"
),
)
@@ -2182,3 +2187,78 @@ class EconometricsController(BaseController):
console.print(
"[red]More than one dataset.column must be provided.\n[/red]"
)
+
+ @log_start_end(log=logger)
+ def call_vif(self, other_args: List[str]):
+ """Process vif command"""
+ parser = argparse.ArgumentParser(
+ add_help=False,
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ prog="vif",
+ description=r"""Calculates VIF (variance inflation factor), which tests collinearity.
+
+ It quantifies the severity of multicollinearity in an ordinary least squares regression analysis. The square
+ root of the variance inflation factor indicates how much larger the standard error increases compared to if
+ that variable had 0 correlation to other predictor variables in the model.
+
+ It is defined as:
+
+ $ VIF_i = 1 / (1 - R_i^2) $
+ where $ R_i $ is the coefficient of determination of the regression equation with the column i being the
+ result from the i:th series being the exogenous variable.
+
+ A VIF over 5 indicates a high collinearity and correlation. Values over 10 indicates causes problems,
+ while a value of 1 indicates no correlation. Thus VIF values between 1 and 5 are most commonly considered
+ acceptable. In order to improve the results one can often remove a column with high VIF.
+
+ For further information see: https://en.wikipedia.org/wiki/Variance_inflation_factor""",
+ )
+ parser.add_argument(
+ "-d",
+ "--data",
+ help="The datasets and columns we want to add <dataset>,<dataset2.column>,<dataset2.column2>",
+ dest="data",
+ type=check_list_values(self.choices["vif"]),
+ default=None,
+ )
+ if other_args and "-" not in other_args[0][0]:
+ other_args.insert(0, "-d")
+ ns_parser = self.parse_known_args_and_warn(
+ parser, other_args, EXPORT_ONLY_RAW_DATA_ALLOWED
+ )
+
+ data = pd.DataFrame()
+ if ns_parser:
+ if ns_parser.data is None:
+ console.print("[red]Please enter a dataset to calculate vif for.[/red]")
+ return
+ if len(ns_parser.data) == 1 and "." in ns_parser.data[0]:
+ console.print(
+ "[red]Please enter at least a dataset or two columns to calculate vif for."
+ "vif can only be calculated for at least two columns.[/red]"
+ )
+ for option in ns_parser.data:
+ if "." in option:
+ dataset, column = option.split(".")
+ else:
+ dataset = option
+ column = None
+
+ if dataset not in self.datasets:
+ console.print(
+ f"[red]Not able to find the dataset {dataset}. Please choose one of "
+ f"the following: {', '.join(self.datasets)}[/red]"
+ )
+ elif column is not None:
+ if column not in self.datasets[dataset]:
+ console.print(
+ f"[red]Not able to find the column {column}. Please choose one of "
+ f"the following: {', '.join(self.datasets[dataset].data)}[/red]"
+ )
+ else:
+ data[f"{dataset}_{column}"] = self.datasets[dataset][column]
+ else:
+ for column in list(self.datasets[dataset].columns):
+ data[f"{dataset}_{column}"] = self.datasets[dataset][column]
+
+ econometrics_view.display_vif(data)
diff --git a/openbb_terminal/econometrics/econometrics_model.py b/openbb_terminal/econometrics/econometrics_model.py
index fdcabb4c68e..b1c6dde473a 100644
--- a/openbb_terminal/econometrics/econometrics_model.py
+++ b/openbb_terminal/econometrics/econometrics_model.py
@@ -13,6 +13,8 @@ import pandas as pd
import statsmodels.api as sm
from arch import arch_model
from scipy import stats
+from statsmodels.stats.outliers_influence import variance_inflation_factor
+from statsmodels.tools.tools import add_constant
from statsmodels.tsa.stattools import adfuller, grangercausalitytests, kpss
from openbb_terminal.rich_config import console
@@ -482,3 +484,51 @@ def get_engle_granger_two_step_cointegration_test(
adfstat, pvalue, _, _, _ = adfuller(z, maxlag=1, autolag=None)
return c, gamma, alpha, z, adfstat, pvalue
+
+
+def get_vif(dataset: pd.DataFrame, columns: Optional[list] = None) -> pd.DataFrame:
+ r"""Calculates VIF (variance inflation factor), which tests collinearity.
+
+ It quantifies the severity of multicollinearity in an ordinary least squares regression analysis. The square
+ root of the variance inflation factor indicates how much larger the standard error increases compared to if
+ that variable had 0 correlation to other predictor variables in the model.
+
+ It is defined as:
+
+ $ VIF_i = 1 / (1 - R_i^2) $
+ where $ R_i $ is the coefficient of determination of the regression equation with the column i being the result
+ from the i:th series being the exogenous variable.
+
+ A VIF over 5 indicates a high collinearity and correlation. Values over 10 indicates causes problems, while a
+ value of 1 indicates no correlation. Thus VIF values between 1 and 5 are most commonly considered acceptable.
+ In order to improve the results one can often remove a column with high VIF.
+
+ For further information see: https://en.wikipedia.org/wiki/Variance_inflation_factor
+
+ Parameters
+ ----------
+ dataset: pd.Series
+ Dataset to calculate VIF on
+ columns: Optional[list]
+ The columns to calculate to test for collinearity
+
+ Returns
+ -------
+ pd.DataFrame
+ Dataframe with the resulting VIF values for the selected columns
+ Examples
+ --------
+ >>> from openbb_terminal.sdk import openbb
+ >>> longley = openbb.econometrics.load("longley")
+ >>> openbb.econometrics.vif(longley, ["TOTEMP","UNEMP","ARMED"])
+ """
+ df = add_constant(dataset if columns is None else dataset[columns])
+ vif = pd.DataFrame(
+ {
+ "VIF Values": [
+ variance_inflation_factor(df.values, i) for i in range(df.shape[1])
+ ][1:]
+ },
+ index=df.columns[1:],
+ )
+ return vif
diff --git a/openbb_terminal/econometrics/econometrics_view.py b/openbb_terminal/econometrics/econometrics_view.py
index c13342c8944..6bebc81065a 100644
--- a/openbb_terminal/econometrics/econometrics_view.py
+++ b/openbb_terminal/econometrics/econometrics_view.py
@@ -621,3 +621,47 @@ def display_cointegration_test(
return fig.show(external=external_axes)
return None
+
+
+@log_start_end(log=logger)
+def display_vif(
+ dataset: pd.DataFrame,
+ columns: Optional[list] = None,
+ export: str = "",
+ sheet_name: Optional[str] = None,
+):
+ """Displays the VIF (variance inflation factor), which tests for collinearity, values for each column.
+
+ Parameters
+ ----------
+ dataset: pd.Series
+ Dataset to calculate VIF on
+ columns: Optional[list]
+ The columns to calculate to test for collinearity
+ sheet_name: Optional[str]
+ Optionally specify the name of the sheet the data is exported to.
+ export: str
+ Format to export data.
+ """
+ columns = dataset.columns if columns is None else columns
+ if any(dataset[column].dtype not in [int, float] for column in columns):
+ console.print(
+ "All column types must be numeric. Consider using the command 'type' to change this.\n"
+ )
+ else:
+ results = econometrics_model.get_vif(dataset, columns)
+
+ print_rich_table(
+ results,
+ headers=list(results.columns),
+ show_index=True,
+ title="Collinearity Test",
+ )
+
+ export_data(
+ export,
+ os.path.dirname(os.path.abspath(__file__)),
+ f"{dataset}_{','.join(columns)}_vif",
+ results,
+ sheet_name,
+ )
diff --git a/openbb_terminal/miscellaneous/i18n/en.yml b/openbb_terminal/miscellaneous/i18n/en.yml
index f06570a2d4f..71633e8f9ce 100644
--- a/openbb_terminal/miscellaneous/i18n/en.yml
+++ b/openbb_terminal/miscellaneous/i18n/en.yml
@@ -981,6 +981,8 @@ en:
econometrics/coint: co-integration test on a multitude of columns
econometrics/garch: estimate future volatility with GARCH
econometrics/ret: calculate returns for the given time series
+ econometrics/vif: perform collinearity test (VIF)
+ econometrics/_assumption_testing_: Assumption Testing
portfolio/bro: brokers holdings supports robinhood, ally, degiro, coinbase
portfolio/po: portfolio optimization optimize your portfolio weights efficiently
portfolio/load: load transactions into the portfolio (use load --example for an example)
diff --git a/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics.openbb b/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics.openbb
index 2e0ae729b31..6f9bbd6d662 100644
--- a/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics.openbb
+++ b/openbb_terminal/miscellaneous/integration_tests_scripts/econometrics/test_econometrics.openbb
@@ -51,6 +51,7 @@ panel -d wp.lwage -i wp.black,wp.hisp,wp.exper,wp.married,wp.educ,wp.union -r bo
load longley -a ll
ols -d ll.totemp -i ll.gnpdefl,ll.gnp,ll.unemp,ll.armed,ll.pop,ll.year
+vif ll.totemp,ll.unemp,ll.armed
## Regression Tests
diff --git a/openbb_terminal/sdk.py b/openbb_terminal/sdk.py
index d41133a5b38..74bb632fa43 100644
--- a/openbb_terminal/sdk.py
+++ b/openbb_terminal/sdk.py
@@ -140,6 +140,7 @@ class OpenBBSDK:
`re`: The random effects model is virtually identical to the pooled OLS model except that is accounts for the\n
`root`: Calculate test statistics for unit roots\n
`root_chart`: Determine the normality of a timeseries.\n
+ `vif`: Determine the vif, which tests for collinearity.\n
"""
return model.EconometricsRoot()