diff options
author | Danglewood <85772166+deeleeramone@users.noreply.github.com> | 2024-02-21 04:37:09 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-21 12:37:09 +0000 |
commit | 754e14ca71a8d08ac002036b0fc9edfa1459e44a (patch) | |
tree | 2dd770333e6e057738524df0c40931a2a62deabd | |
parent | 5f9d958260aabe2e1f4fe0f6571ea2b53c1ff803 (diff) |
[BugFix] Fixes to_df() where the date series contains multiple TZ-offsets. (#6099)
* allow mixed utc offset in a tz-aware date column
* enforce ascending from all providers for price.historical
* fix tests
* add comments and apply treatment to basemodel_from_df before JSON output
* yfinance consistency
* add unit test
* add unit test
* better test
* ruff
---------
Co-authored-by: Igor Radovanovic <74266147+IgorWounds@users.noreply.github.com>
22 files changed, 174 insertions, 76 deletions
diff --git a/openbb_platform/core/openbb_core/app/model/obbject.py b/openbb_platform/core/openbb_core/app/model/obbject.py index 7251dc9969b..9f2a2da8746 100644 --- a/openbb_platform/core/openbb_core/app/model/obbject.py +++ b/openbb_platform/core/openbb_core/app/model/obbject.py @@ -113,13 +113,13 @@ class OBBject(Tagged, Generic[T]): return f"OBBject[{cls.results_type_repr(params)}]" def to_df( - self, index: Optional[str] = None, sort_by: Optional[str] = None + self, index: Optional[Union[str, None]] = "date", sort_by: Optional[str] = None ) -> pd.DataFrame: """Alias for `to_dataframe`.""" return self.to_dataframe(index=index, sort_by=sort_by) def to_dataframe( - self, index: Optional[str] = None, sort_by: Optional[str] = None + self, index: Optional[Union[str, None]] = "date", sort_by: Optional[str] = None ) -> pd.DataFrame: """Convert results field to pandas dataframe. @@ -173,7 +173,7 @@ class OBBject(Tagged, Generic[T]): for k, v in r.items(): # Dict[str, List[BaseModel]] if is_list_of_basemodel(v): - dict_of_df[k] = basemodel_to_df(v, index or "date") + dict_of_df[k] = basemodel_to_df(v, index) sort_columns = False # Dict[str, Any] else: @@ -184,14 +184,14 @@ class OBBject(Tagged, Generic[T]): # List[BaseModel] elif is_list_of_basemodel(res): dt: Union[List[Data], Data] = res # type: ignore - df = basemodel_to_df(dt, index or "date") + df = basemodel_to_df(dt, index) sort_columns = False # List[List | str | int | float] | Dict[str, Dict | List | BaseModel] else: try: df = pd.DataFrame(res) # Set index, if any - if index and index in df.columns: + if index is not None and index in df.columns: df.set_index(index, inplace=True) except ValueError: @@ -234,11 +234,11 @@ class OBBject(Tagged, Generic[T]): "Please install polars: `pip install polars pyarrow` to use this method." ) from exc - return from_pandas(self.to_dataframe().reset_index()) + return from_pandas(self.to_dataframe(index=None)) def to_numpy(self) -> ndarray: """Convert results field to numpy array.""" - return self.to_dataframe().reset_index().to_numpy() + return self.to_dataframe(index=None).to_numpy() def to_dict( self, @@ -259,7 +259,7 @@ class OBBject(Tagged, Generic[T]): Dict[str, List] Dictionary of lists. """ - df = self.to_dataframe() # type: ignore + df = self.to_dataframe(index=None) # type: ignore transpose = False if orient == "list": transpose = True diff --git a/openbb_platform/core/openbb_core/app/utils.py b/openbb_platform/core/openbb_core/app/utils.py index b1a563131ac..4004e3060a9 100644 --- a/openbb_platform/core/openbb_core/app/utils.py +++ b/openbb_platform/core/openbb_core/app/utils.py @@ -2,6 +2,7 @@ import ast import json +from datetime import time from typing import Dict, Iterable, List, Optional, Union import numpy as np @@ -16,7 +17,7 @@ from openbb_core.provider.abstract.data import Data def basemodel_to_df( data: Union[List[Data], Data], - index: Optional[Union[str, Iterable]] = None, + index: Optional[Union[None, str, Iterable]] = None, ) -> pd.DataFrame: """Convert list of BaseModel to a Pandas DataFrame.""" if isinstance(data, list): @@ -32,12 +33,20 @@ def basemodel_to_df( df = df.set_index(col_names) df = df.drop(["is_multiindex", "multiindex_names"], axis=1) + # If the date column contains dates only, convert them to a date to avoid encoding time data. + if "date" in df.columns: + df["date"] = df["date"].apply(pd.to_datetime) + if all(t.time() == time(0, 0) for t in df["date"]): + df["date"] = df["date"].apply(lambda x: x.date()) + if index and index in df.columns: - df = df.set_index(index) - # TODO: This should probably check if the index can be converted to a datetime instead of just assuming - if df.index.name == "date": - df.index = pd.to_datetime(df.index) + if index == "date": + df.set_index("date", inplace=True) df.sort_index(axis=0, inplace=True) + else: + df = ( + df.set_index(index) if index is not None and index in df.columns else df + ) return df @@ -59,6 +68,12 @@ def df_to_basemodel( df["multiindex_names"] = str(df.index.names) df = df.reset_index() + # Converting to JSON will add T00:00:00.000 to all dates with no time element unless we format it as a string first. + if "date" in df.columns: + df["date"] = df["date"].apply(pd.to_datetime) + if all(t.time() == time(0, 0) for t in df["date"]): + df["date"] = df["date"].apply(lambda x: x.date().strftime("%Y-%m-%d")) + return [ Data(**d) for d in json.loads(df.to_json(orient="records", date_format="iso")) ] diff --git a/openbb_platform/core/openbb_core/provider/standard_models/crypto_historical.py b/openbb_platform/core/openbb_core/provider/standard_models/crypto_historical.py index eeeda10a212..f3385d4932d 100644 --- a/openbb_platform/core/openbb_core/provider/standard_models/crypto_historical.py +++ b/openbb_platform/core/openbb_core/provider/standard_models/crypto_historical.py @@ -45,7 +45,9 @@ class CryptoHistoricalQueryParams(QueryParams): class CryptoHistoricalData(Data): """Crypto Historical Price Data.""" - date: datetime = Field(description=DATA_DESCRIPTIONS.get("date", "")) + date: Union[dateType, datetime] = Field( + description=DATA_DESCRIPTIONS.get("date", "") + ) open: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("open", "")) high: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("high", "")) low: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("low", "")) @@ -59,4 +61,6 @@ class CryptoHistoricalData(Data): @classmethod def date_validate(cls, v): # pylint: disable=E0213 """Return formatted datetime.""" - return parser.isoparse(str(v)) + if ":" in str(v): + return parser.isoparse(str(v)) + return parser.parse(str(v)).date() diff --git a/openbb_platform/core/openbb_core/provider/standard_models/currency_historical.py b/openbb_platform/core/openbb_core/provider/standard_models/currency_historical.py index b7cedc20860..196ce3e1c37 100644 --- a/openbb_platform/core/openbb_core/provider/standard_models/currency_historical.py +++ b/openbb_platform/core/openbb_core/provider/standard_models/currency_historical.py @@ -46,7 +46,9 @@ class CurrencyHistoricalQueryParams(QueryParams): class CurrencyHistoricalData(Data): """Currency Historical Price Data.""" - date: datetime = Field(description=DATA_DESCRIPTIONS.get("date", "")) + date: Union[dateType, datetime] = Field( + description=DATA_DESCRIPTIONS.get("date", "") + ) open: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("open", "")) high: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("high", "")) low: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("low", "")) @@ -61,4 +63,6 @@ class CurrencyHistoricalData(Data): @field_validator("date", mode="before", check_fields=False) def date_validate(cls, v): # pylint: disable=E0213 """Return formatted datetime.""" - return parser.isoparse(str(v)) + if ":" in str(v): + return parser.isoparse(str(v)) + return parser.parse(str(v)).date() diff --git a/openbb_platform/core/openbb_core/provider/standard_models/equity_historical.py b/openbb_platform/core/openbb_core/provider/standard_models/equity_historical.py index 2789e57ba35..e888928e683 100644 --- a/openbb_platform/core/openbb_core/provider/standard_models/equity_historical.py +++ b/openbb_platform/core/openbb_core/provider/standard_models/equity_historical.py @@ -61,7 +61,6 @@ class EquityHistoricalData(Data): @field_validator("date", mode="before", check_fields=False) def date_validate(cls, v): # pylint: disable=E0213 """Return formatted datetime.""" - v = parser.isoparse(str(v)) - if v.hour == 0 and v.minute == 0: - return v.date() - return v + if ":" in str(v): + return parser.isoparse(str(v)) + return parser.parse(str(v)).date() diff --git a/openbb_platform/core/openbb_core/provider/standard_models/etf_historical.py b/openbb_platform/core/openbb_core/provider/standard_models/etf_historical.py index 6a2eb9d5dee..f9a4b5448ed 100644 --- a/openbb_platform/core/openbb_core/provider/standard_models/etf_historical.py +++ b/openbb_platform/core/openbb_core/provider/standard_models/etf_historical.py @@ -1,7 +1,10 @@ """ETF Historical Price Standard Model.""" -from datetime import date as dateType -from typing import Optional +from datetime import ( + date as dateType, + datetime, +) +from typing import Optional, Union from dateutil import parser from pydantic import Field, NonNegativeInt, PositiveFloat, field_validator @@ -37,7 +40,9 @@ class EtfHistoricalQueryParams(QueryParams): class EtfHistoricalData(Data): """ETF Historical Price Data.""" - date: dateType = Field(description=DATA_DESCRIPTIONS.get("date", "")) + date: Union[dateType, datetime] = Field( + description=DATA_DESCRIPTIONS.get("date", "") + ) open: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("open", "")) high: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("high", "")) low: PositiveFloat = Field(description=DATA_DESCRIPTIONS.get("low", "")) @@ -49,4 +54,6 @@ class EtfHistoricalData(Data): @field_validator("date", mode="before", check_fields=False) def date_validate(cls, v): # pylint: disable=E0213 """Return formatted datetime.""" - return parser.isoparse(str(v)) + if ":" in str(v): + return parser.isoparse(str(v)) + return parser.parse(str(v)).date() diff --git a/openbb_platform/core/openbb_core/provider/standard_models/index_historical.py b/openbb_platform/core/openbb_core/provider/standard_models/index_historical.py index e6183bf9921..f9a381698cb 100644 --- a/openbb_platform/core/openbb_core/provider/standard_models/index_historical.py +++ b/openbb_platform/core/openbb_core/provider/standard_models/index_historical.py @@ -73,7 +73,6 @@ class IndexHistoricalData(Data): @classmethod def date_validate(cls, v): """Return formatted datetime.""" - v = parser.isoparse(str(v)) - if v.hour == 0 and v.minute == 0: - return v.date() - return v + if ":" in str(v): + return parser.isoparse(str(v)) + return parser.parse(str(v)).date() diff --git a/openbb_platform/core/tests/app/model/test_obbject.py b/openbb_platform/core/tests/app/model/test_obbject.py index 0e9692b8f70..ffe5b8368b2 100644 --- a/openbb_platform/core/tests/app/model/test_obbject.py +++ b/openbb_platform/core/tests/app/model/test_obbject.py @@ -5,6 +5,7 @@ from unittest.mock import MagicMock import pandas as pd import pytest from openbb_core.app.model.obbject import Chart, OBBject, OpenBBError +from openbb_core.app.utils import basemodel_to_df from openbb_core.provider.abstract.data import Data from pandas.testing import assert_frame_equal @@ -48,6 +49,13 @@ class MockMultiData(Data): value: float +class MockDataFrame(Data): + """Test helper.""" + + date: str + value: float + + @pytest.mark.parametrize( "results, expected_df", [ @@ -171,27 +179,27 @@ class MockMultiData(Data): "df1": pd.DataFrame( { "date": [ - pd.to_datetime("1956-01-01"), - pd.to_datetime("1956-02-01"), - pd.to_datetime("1956-03-01"), + pd.to_datetime("1956-01-01").date(), + pd.to_datetime("1956-02-01").date(), + pd.to_datetime("1956-03-01").date(), ], "another_date": ["2023-09-01", "2023-09-01", "2023-09-01"], "value": [0.0, 0.0, 0.0], }, columns=["date", "another_date", "value"], - ).set_index("date"), + ), "df2": pd.DataFrame( { "date": [ - pd.to_datetime("1955-03-01"), - pd.to_datetime("1955-04-01"), - pd.to_datetime("1955-05-01"), + pd.to_datetime("1955-03-01").date(), + pd.to_datetime("1955-04-01").date(), + pd.to_datetime("1955-05-01").date(), ], "another_date": ["2023-09-01", "2023-09-01", "2023-09-01"], "value": [0.0, 0.0, 0.0], }, columns=["date", "another_date", "value"], - ).set_index("date"), + ), }, axis=1, sort=True, @@ -210,11 +218,11 @@ def test_to_dataframe(results, expected_df): # Act and Assert if isinstance(expected_df, pd.DataFrame): - result = co.to_dataframe() + result = co.to_dataframe(index=None) assert_frame_equal(result, expected_df) else: with pytest.raises(expected_df.__class__) as exc_info: - co.to_dataframe() + co.to_dataframe(index=None) assert str(exc_info.value) == str(expected_df) @@ -257,6 +265,32 @@ def test_to_dataframe_w_args(results, index, sort_by): @pytest.mark.parametrize( + "results", + # Test case 1: List of models with daylight savings crossover. + ( + [ + MockDataFrame(date="2023-11-03 00:00:00-04:00", value=10), + MockDataFrame(date="2023-11-03 08:00:00-04:00", value=9), + MockDataFrame(date="2023-11-03 16:00:00-04:00", value=8), + MockDataFrame(date="2023-11-06 00:00:00-05:00", value=11), + MockDataFrame(date="2023-11-06 08:00:00-05:00", value=7), + MockDataFrame(date="2023-11-06 16:00:00-05:00", value=12), + ], + ), +) +def test_to_df_daylight_savings(results): + """Test helper.""" + # Arrange + co = OBBject(results=results) + + # Act and Assert + expected_df = basemodel_to_df(results, index="date") + result = co.to_dataframe(index="date") + assert isinstance(result, pd.DataFrame) + assert_frame_equal(expected_df, result) + + +@pytest.mark.parametrize( "results, expected_dict", [ # Case 1: Normal results with "date" column ( diff --git a/openbb_platform/providers/alpha_vantage/openbb_alpha_vantage/models/equity_historical.py b/openbb_platform/providers/alpha_vantage/openbb_alpha_vantage/models/equity_historical.py index 972d563c7aa..98a5194b955 100644 --- a/openbb_platform/providers/alpha_vantage/openbb_alpha_vantage/models/equity_historical.py +++ b/openbb_platform/providers/alpha_vantage/openbb_alpha_vantage/models/equity_historical.py @@ -1,12 +1,11 @@ """Alpha Vantage Equity Historical Price Model.""" -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, Dict, List, Literal, Optional from dateutil.relativedelta import relativedelta from openbb_alpha_vantage.utils.helpers import ( extract_key_name, - filter_by_dates, get_interval, ) from openbb_core.provider.abstract.fetcher import Fetcher @@ -19,6 +18,7 @@ from openbb_core.provider.utils.descriptions import ( QUERY_DESCRIPTIONS, ) from openbb_core.provider.utils.helpers import amake_request, get_querystring +from pandas import to_datetime from pydantic import ( Field, NonNegativeFloat, @@ -201,7 +201,17 @@ class AVEquityHistoricalFetcher( } for date, values in content.items() ] - filter_by_dates(d, query.start_date, query.end_date) - transformed_data += d - - return [AVEquityHistoricalData.model_validate(d) for d in transformed_data] + if query.start_date and query.end_date: + transformed_data = list( + filter( + lambda x: to_datetime(query.start_date) + <= to_datetime(x["date"]) + <= to_datetime(query.end_date) + timedelta(days=1), + d, + ) + ) + + return [ + AVEquityHistoricalData.model_validate(d) + for d in sorted(transformed_data, key=lambda x: x["date"], reverse=False) + ] diff --git a/openbb_platform/providers/fmp/openbb_fmp/models/crypto_historical.py b/openbb_platform/providers/fmp/openbb_fmp/models/crypto_historical.py index a9a0c1fceaa..69018d01003 100644 --- a/openbb_platform/providers/fmp/openbb_fmp/models/crypto_historical.py +++ b/openbb_platform/providers/fmp/openbb_fmp/models/crypto_historical.py @@ -107,4 +107,7 @@ class FMPCryptoHistoricalFetcher( query: FMPCryptoHistoricalQueryParams, data: List[Dict], **kwargs: Any ) -> List[FMPCryptoHistoricalData]: """Return the transformed data.""" - return [FMPCryptoHistoricalData.model_validate(d) for d in data] + return [ + FMPCryptoHistoricalData.model_validate(d) + for d in sorted(data, key=lambda x: x["date"], reverse=False) + ] diff --git a/openbb_platform/providers/fmp/openbb_fmp/models/currency_historical.py b/openbb_platform/providers/fmp/openbb_fmp/models/currency_historical.py index e54de3b4fa0..11b4aa780ed 100644 --- a/openbb_platform/providers/fmp/openbb_fmp/models/currency_historical.py +++ b/openbb_platform/providers/fmp/openbb_fmp/models/currency_historical.py @@ -107,4 +107,7 @@ class FMPCurrencyHistoricalFetcher( and d.get("change") != 0 and d.get("changeOverTime") != 0 ] - return [FMPCurrencyHistoricalData.model_validate(d) for d in data] + return [ + FMPCurrencyHistoricalData.model_validate(d) + for d in sorted(data, key=lambda x: x["date"], reverse=False) + ] diff --git a/openbb_platform/providers/fmp/openbb_fmp/models/equity_historical.py b/openbb_platform/providers/fmp/openbb_fmp/models/equity_historical.py index 105ce3c72e1..86cb7635e83 100644 --- a/openbb_platform/providers/fmp/openbb_fmp/models/equity_historical.py +++ b/openbb_platform/providers/fmp/openbb_fmp/models/equity_historical.py @@ -132,4 +132,7 @@ class FMPEquityHistoricalFetcher( query: FMPEquityHistoricalQueryParams, data: List[Dict], **kwargs: Any ) -> List[FMPEquityHistoricalData]: """Return the transformed data.""" - return [FMPEquityHistoricalData.model_validate(d) for d in data] + return [ + FMPEquityHistoricalData.model_validate(d) + for d in sorted(data, key=lambda x: x["date"], reverse=False) + ] diff --git a/openbb_platform/providers/fmp/openbb_fmp/models/index_historical.py b/openbb_platform/providers/fmp/openbb_fmp/models/index_historical.py index 01de38af1a8..8ebf3db2f38 100644 --- a/openbb_platform/providers/fmp/openbb_fmp/models/index_historical.py +++ b/openbb_platform/providers/fmp/openbb_fmp/models/index_historical.py @@ -109,7 +109,7 @@ class FMPIndexHistoricalFetcher( query: FMPIndexHistoricalQueryParams, data: List[Dict], **kwargs: Any ) -> List[FMPIndexHistoricalData]: """Return the transformed data.""" - if query.sort == "asc": - data = sorted(data, key=lambda x: x["date"], reverse=True) - - return [FMPIndexHistoricalData.model_validate(d) for d in data] + return [ + FMPIndexHistoricalData.model_validate(d) + for d in sorted(data, key=lambda x: x["date"], reverse=False) + ] diff --git a/openbb_platform/providers/intrinio/openbb_intrinio/models/equity_historical.py b/openbb_platform/providers/intrinio/openbb_intrinio/models/equity_historical.py index c4db7c181e5..6b98eb5a53a 100644 --- a/openbb_platform/providers/intrinio/openbb_intrinio/models/equity_historical.py +++ b/openbb_platform/providers/intrinio/openbb_intrinio/models/equity_historical.py @@ -229,4 +229,7 @@ class IntrinioEquityHistoricalFetcher( **kwargs: Any, ) -> List[IntrinioEquityHistoricalData]: """Return the transformed data.""" - return [IntrinioEquityHistoricalData.model_validate(d) for d in data] + return [ + IntrinioEquityHistoricalData.model_validate(d) + for d in sorted(data, key=lambda x: x["date"], reverse=False) + ] diff --git a/openbb_platform/providers/polygon/openbb_polygon/models/crypto_historical.py b/openbb_platform/providers/polygon/openbb_polygon/models/crypto_historical.py index 9c19a47579a..4239e7cbab9 100644 --- a/openbb_platform/providers/polygon/openbb_polygon/models/crypto_historical.py +++ b/openbb_platform/providers/polygon/openbb_polygon/models/crypto_historical.py @@ -152,7 +152,7 @@ class PolygonCryptoHistoricalFetcher( |