generate_holidays_file.py (prophet-0.7) | : | generate_holidays_file.py (prophet-1.0) | ||
---|---|---|---|---|
skipping to change at line 19 | skipping to change at line 19 | |||
from __future__ import unicode_literals | from __future__ import unicode_literals | |||
import inspect | import inspect | |||
import unicodedata | import unicodedata | |||
import warnings | import warnings | |||
import pandas as pd | import pandas as pd | |||
import numpy as np | import numpy as np | |||
import holidays as hdays_part1 | import holidays as hdays_part1 | |||
import fbprophet.hdays as hdays_part2 | import prophet.hdays as hdays_part2 | |||
from fbprophet.make_holidays import make_holidays_df | from prophet.make_holidays import make_holidays_df | |||
def utf8_to_ascii(text): | def utf8_to_ascii(text): | |||
"""Holidays often have utf-8 characters. These are not allowed in R | """Holidays often have utf-8 characters. These are not allowed in R | |||
package data (they generate a NOTE). | package data (they generate a NOTE). | |||
TODO: revisit whether we want to do this lossy conversion. | TODO: revisit whether we want to do this lossy conversion. | |||
""" | """ | |||
ascii_text = ( | ascii_text = ( | |||
unicodedata.normalize('NFD', text) | unicodedata.normalize('NFD', text) | |||
.encode('ascii', 'ignore') | .encode('ascii', 'ignore') | |||
.decode('ascii') | .decode('ascii') | |||
skipping to change at line 47 | skipping to change at line 47 | |||
return ascii_text | return ascii_text | |||
def generate_holidays_file(): | def generate_holidays_file(): | |||
"""Generate csv file of all possible holiday names, ds, | """Generate csv file of all possible holiday names, ds, | |||
and countries, year combination | and countries, year combination | |||
""" | """ | |||
year_list = np.arange(1995, 2045, 1).tolist() | year_list = np.arange(1995, 2045, 1).tolist() | |||
all_holidays = [] | all_holidays = [] | |||
# class names in holiday packages which are not countries | # class names in holiday packages which are not countries | |||
# Also cut out countries with utf-8 holidays that don't parse to ascii | # Also cut out countries with utf-8 holidays that don't parse to ascii | |||
class_to_exclude = set(['rd', 'BY', 'BG', 'JP', 'RS', 'UA', 'KR']) | class_to_exclude = {'rd', 'BY', 'BG', 'JP', 'RS', 'UA', 'KR'} | |||
class_list2 = inspect.getmembers(hdays_part2, inspect.isclass) | class_list2 = inspect.getmembers(hdays_part2, inspect.isclass) | |||
country_set = set([name for name in list(zip(*class_list2))[0] if len(name) == 2]) | country_set = {name for name in list(zip(*class_list2))[0] if len(name) == 2 } | |||
class_list1 = inspect.getmembers(hdays_part1, inspect.isclass) | class_list1 = inspect.getmembers(hdays_part1, inspect.isclass) | |||
country_set1 = set([name for name in list(zip(*class_list1))[0] if len(name) == 2]) | country_set1 = {name for name in list(zip(*class_list1))[0] if len(name) == 2} | |||
country_set.update(country_set1) | country_set.update(country_set1) | |||
country_set -= class_to_exclude | country_set -= class_to_exclude | |||
for country in country_set: | for country in country_set: | |||
df = make_holidays_df(year_list=year_list, country=country) | df = make_holidays_df(year_list=year_list, country=country) | |||
df['country'] = country | df['country'] = country | |||
all_holidays.append(df) | all_holidays.append(df) | |||
generated_holidays = pd.concat(all_holidays, axis=0, ignore_index=True) | generated_holidays = pd.concat(all_holidays, axis=0, ignore_index=True) | |||
generated_holidays['year'] = generated_holidays.ds.apply(lambda x: x.year) | generated_holidays['year'] = generated_holidays.ds.apply(lambda x: x.year) | |||
End of changes. 4 change blocks. | ||||
5 lines changed or deleted | 5 lines changed or added |