Period
Period
properties and methods
One second equals one thousand milliseconds. In other words, 1 millisecond = .001 seconds. To count milliseconds, you would therefore need at least three digits to the right of the decimal point.
One second equals one million microseconds. In other words, 1 microsecond = .000001 seconds. To count microseconds, you would therefore need at least six digits to the right of the decimal point.
One second equals one billion nanoseconds. In other words, 1 nanosecond = .000000001 seconds. To count nanoseconds, you would therefore need at least nine digits to the right of the decimal point.
How long is a nanosecond is the real world? The speed of light is 299,792.458 kilometers per second, or 299,792,458 meters per second. Therefore light travels .299792458 meters per nanosecond, or 29.9792458 centimeters per nanosecond. That’s about one foot per nanosecond.
How long would it take to beam a signal to a communications satellite that’s 40,000 kilometers away? (The speed of radio is equal to the speed of light.) Is it easier to measure this interval in seconds, milliseconds, microseconds, or nanoseconds?
A
pd.Timestamp
contains a
np.datetime64.
"""
Create a datetime.date, datetime.datetime, np.datetime64, and a pd.Timestamp.
"""
import sys
import datetime
import numpy as np
import pandas as pd
#A datetime.date can't hold hours, minutes, seconds, etc.
d = datetime.date(year = 2020, month = 12, day = 31)
print(f"d = {d}")
#A datetime.datetime can't hold nanoseconds.
dt = datetime.datetime(
year = 2020,
month = 12,
day = 31,
hour = 12,
minute = 59,
second = 59,
microsecond = 123_456
)
print(f"dt = {dt}")
print()
nd = np.datetime64('2020-12-31T12:59:59.123456789')
print(f"nd = {nd}")
ts = pd.Timestamp(
year = 2020,
month = 12,
day = 31,
hour = 12,
minute = 59,
second = 59,
microsecond = 123_456, #integer in the range to 999_999 inclusive
nanosecond = 789 #integer in the range 0 to 999 inclusive
)
print(f"ts = {ts}")
print(f"m8 = {ts.asm8}")
sys.exit(0)
d = 2020-12-31 dt = 2020-12-31 12:59:59.123456 nd = 2020-12-31T12:59:59.123456789 ts = 2020-12-31 12:59:59.123456789 m8 = 2020-12-31T12:59:59.123456789
import sys
import datetime
import pandas as pd
dt = datetime.datetime(
year = 2020,
month = 12,
day = 31,
hour = 12,
minute = 59,
second = 59,
microsecond = 123_456
)
print(f"dt = {dt}")
ts = pd.Timestamp(dt)
print(f"ts = {ts}")
dt = pd.to_datetime(ts)
print(f"dt = {dt}")
sys.exit(0)
dt = 2020-12-31 12:59:59.123456 ts = 2020-12-31 12:59:59.123456 dt = 2020-12-31 12:59:59.123456
"""
Third Friday of each month. Earliest is 15th, latest is 21st.
"""
import sys
import pandas as pd
index = pd.date_range("2019-01-01", "2020-01-01", freq = "WOM-3FRI")
for timestamp in index.to_list():
print(f'{timestamp.strftime("%A, %Y-%m-%d")}')
sys.exit(0)
Friday, 2019-01-18 Friday, 2019-02-15 Friday, 2019-03-15 Friday, 2019-04-19 Friday, 2019-05-17 Friday, 2019-06-21 Friday, 2019-07-19 Friday, 2019-08-16 Friday, 2019-09-20 Friday, 2019-10-18 Friday, 2019-11-15 Friday, 2019-12-20
Election day: earliest is 2nd (if November starts on a Monday), latest is 8th (if November starts on a Tuesday).
"""
Create a Series whose index is all the election days
(the first Tuesday after the first Monday of each November).
"""
import sys
import numpy as np
import pandas as pd
#First day of each November.
index = pd.date_range("2010-11-01", "2019-11-01", freq = "AS-NOV")
series = pd.Series(data = np.arange(len(index)), index = index)
#First Monday of each November. Earliest is 1st, latest is 7th.
offset = pd.tseries.offsets.WeekOfMonth(week = 0, weekday = 0)
series.index = series.index.map(lambda ts: offset.rollforward(ts))
#The Tuesday immediately after the first Monday of each November.
series = series.shift(periods = 1, freq = pd.tseries.offsets.Day(n = 1)) #or freq = "D"
print(series)
print()
for timestamp in series.index.to_list():
print(f'{timestamp.strftime("%A, %Y-%m-%d")}')
sys.exit(0)
2010-11-02 0 2011-11-08 1 2012-11-06 2 2013-11-05 3 2014-11-04 4 2015-11-03 5 2016-11-08 6 2017-11-07 7 2018-11-06 8 2019-11-05 9 dtype: int64 Tuesday, 2010-11-02 Tuesday, 2011-11-08 Tuesday, 2012-11-06 Tuesday, 2013-11-05 Tuesday, 2014-11-04 Tuesday, 2015-11-03 Tuesday, 2016-11-08 Tuesday, 2017-11-07 Tuesday, 2018-11-06 Tuesday, 2019-11-05
"C"
for “custom”
import sys
import pandas as pd
weekmask = "Tue Thu" #or " ".join(["Tue", "Thu"])
start = pd.Timestamp(year = 2019, month = 8, day = 13)
end = pd.Timestamp(year = 2019, month = 11, day = 21)
index = pd.bdate_range(start = start, end = end, freq = "C", weekmask = weekmask)
for timestamp in index.to_list():
print(f'{timestamp.strftime("%a %Y-%m-%d")}')
sys.exit(0)
Tue 2019-08-13 Thu 2019-08-15 Tue 2019-08-20 Thu 2019-08-22 Tue 2019-08-27 Thu 2019-08-29 Tue 2019-09-03 Thu 2019-09-05 Tue 2019-09-10 Thu 2019-09-12 Tue 2019-09-17 Thu 2019-09-19 Tue 2019-09-24 Thu 2019-09-26 Tue 2019-10-01 Thu 2019-10-03 Tue 2019-10-08 Thu 2019-10-10 Tue 2019-10-15 Thu 2019-10-17 Tue 2019-10-22 Thu 2019-10-24 Tue 2019-10-29 Thu 2019-10-31 Tue 2019-11-05 Thu 2019-11-07 Tue 2019-11-12 Thu 2019-11-14 Tue 2019-11-19 Thu 2019-11-21
Try
n = 12
and
freq = "MS"
(“month start”).
Also try
n = 10
and
freq = "AS_JAN"
(“annual start”).
import sys import datetime import numpy as np import pandas as pd n = 7 start = datetime.date(year = 2020, month = 1, day = 1) index = pd.date_range(start = start, periods = n, freq = "D", name = "Date") #index is a DatetimeIndex data = np.arange(n) series = pd.Series(data = data, index = index, name = "Temperature") print(series) sys.exit(0)
Date 2020-01-01 0 2020-01-02 1 2020-01-03 2 2020-01-04 3 2020-01-05 4 2020-01-06 5 2020-01-07 6 Freq: D, Name: Temperature, dtype: int64
Intervals of equal length (“fixed frequency”):
import sys
import datetime
import numpy as np
import pandas as pd
n = 8
start = datetime.date(year = 2020, month = 1, day = 1)
end = datetime.date(year = 2020, month = 1, day = 31)
index = pd.date_range(start = start, end = end, periods = n, name = "Date") #index is a DatetimeIndex
data = np.arange(n)
series0 = pd.Series(data = data, index = index, name = "Temperature")
print(series0)
print()
series1 = pd.Series(data = data, index = index.round("D"), name = "Temperature")
print(series1)
sys.exit(0)
Date 2020-01-01 00:00:00.000000000 0 2020-01-05 06:51:25.714285714 1 2020-01-09 13:42:51.428571428 2 2020-01-13 20:34:17.142857143 3 2020-01-18 03:25:42.857142857 4 2020-01-22 10:17:08.571428571 5 2020-01-26 17:08:34.285714286 6 2020-01-31 00:00:00.000000000 7 Name: Temperature, dtype: int64 Date 2020-01-01 0 2020-01-05 1 2020-01-10 2 2020-01-14 3 2020-01-18 4 2020-01-22 5 2020-01-27 6 2020-01-31 7 Name: Temperature, dtype: int64
Irregularly spaced dates:
import sys
import datetime
import numpy as np
import pandas as pd
data = [
datetime.date(year = 2020, month = 1, day = 1),
datetime.date(year = 2020, month = 1, day = 3),
datetime.date(year = 2020, month = 1, day = 4),
datetime.date(year = 2020, month = 1, day = 7)
]
n = len(data)
index = pd.DatetimeIndex(data = data, name = "Date")
data = np.arange(n)
series = pd.Series(data = data, index = index, name = "Temperature")
print(series)
sys.exit(0)
Date 2020-01-01 0 2020-01-03 1 2020-01-04 2 2020-01-07 3 Name: Temperature, dtype: int64
import sys import datetime import numpy as np import pandas as pd n = 7 start = datetime.date(year = 2020, month = 1, day = 1) index = pd.date_range(start = start, periods = n, name = "Date") data = np.arange(n) series = pd.Series(data = data, index = index, name = "Temperature") print(series) print() #Shift the data down. print(series.shift(periods = 1)) print() #Percent change: 100 * (today/yesterday) - 1) print(100 * (series / series.shift(periods = 1) - 1)) print() #Shift the index up. print(series.shift(periods = 1, freq = "D")) print() sys.exit(0)
Date 2020-01-01 0 2020-01-02 1 2020-01-03 2 2020-01-04 3 2020-01-05 4 2020-01-06 5 2020-01-07 6 Freq: D, Name: Temperature, dtype: int64 Date 2020-01-01 NaN 2020-01-02 0.0 2020-01-03 1.0 2020-01-04 2.0 2020-01-05 3.0 2020-01-06 4.0 2020-01-07 5.0 Freq: D, Name: Temperature, dtype: float64 Date 2020-01-01 NaN 2020-01-02 inf 2020-01-03 100.000000 2020-01-04 50.000000 2020-01-05 33.333333 2020-01-06 25.000000 2020-01-07 20.000000 Freq: D, Name: Temperature, dtype: float64 Date 2020-01-02 0 2020-01-03 1 2020-01-04 2 2020-01-05 3 2020-01-06 4 2020-01-07 5 2020-01-08 6 Freq: D, Name: Temperature, dtype: int64
import sys
import pandas as pd
timestamp = pd.Timestamp(ts_input = 0, unit = "ns")
print(f"timestamp = {timestamp}")
print(f"timestamp.value = {timestamp.value:,} nanoseconds")
print()
timestamp = pd.Timestamp.now()
nanosecondsPerYear = 1_000_000_000 * 60 * 60 * 24 * 365.25
years = timestamp.value / nanosecondsPerYear
print(f"timestamp = {timestamp}")
print(f"timestamp.value = {timestamp.value:,} nanoseconds = {years} years")
sys.exit(0)
timestamp = 1970-01-01 00:00:00 timestamp.value = 0 nanoseconds timestamp = 2019-11-25 12:19:21.763944 timestamp.value = 1,574,684,361,763,944,000 nanoseconds = 49.89873633495399 years
"""
Give a timezone to a Timestamp.
"""
import sys
import pandas as pd
import pytz
def f(s, timestamp):
print(s)
print(f"timestamp = {timestamp}")
v = timestamp.value
nanosecondsPerDay = 1_000_000_000 * 60 * 60 * 24
nanosecondsPerHour = 1_000_000_000 * 60 * 60
days = v // nanosecondsPerDay
v = v % nanosecondsPerDay
hours = v / nanosecondsPerHour
print(f"timestamp.value = {v:,} nanoseconds = {days:,} days {hours} hours")
timestamp = pd.Timestamp("2020-01-01")
f("Time zone naive", timestamp)
print()
timezone = pytz.timezone("US/Eastern")
f(f"Time zone aware, belonging to {timezone}", timestamp.tz_localize(timezone))
sys.exit(0)
Time zone naive timestamp = 2020-01-01 00:00:00 timestamp.value = 0 nanoseconds = 18,262 days 0.0 hours Time zone aware, belonging to US/Eastern timestamp = 2020-01-01 00:00:00-05:00 timestamp.value = 18,000,000,000,000 nanoseconds = 18,262 days 5.0 hours
import sys
import calendar
import pandas as pd
period = pd.Period(2020, freq = "A-DEC") #"A" for "annual", "D" for "December"
print(f"period = {period}")
print(f"period.year = {period.year}")
print(f"period.freq.name = {period.freq.name}")
m = period.freq.month
print(f"The period ends at the end of month number {m}, which is {calendar.month_name[m]}.")
print(f"The duration of the period is {period.end_time - period.start_time}")
print()
print("The start and end Timestamps of the Period are")
for timestamp in [period.start_time, period.end_time]:
print(f'{timestamp} ({timestamp.strftime("%A, %B %-d, %Y %I:%M:%S.%f %p")})')
sys.exit(0)
period = 2020 period.year = 2020 period.freq.name = A-DEC The period ends at the end of month number 12, which is December. The duration of the period is 365 days 23:59:59.999999 The start and end Timestamps of the Period are 2020-01-01 00:00:00 (Wednesday, January 1, 2020 12:00:00.000000 AM) 2020-12-31 23:59:59.999999999 (Thursday, December 31, 2020 11:59:59.999999 PM)
import sys import numpy as np import pandas as pd n = 10 start = pd.Period(2020, freq = "A-DEC") index = pd.period_range(start, periods = n, name = "Period") #index is a PeriodIndex data = np.arange(n) series = pd.Series(data = data, index = index, name = "Temperature") print(series) sys.exit(0)
If the index of the
Series
is a
PeriodIndex,
the
freq
of the
PeriodIndex
gets printed out automatically at the bottom.
Period 2020 0 2021 1 2022 2 2023 3 2024 4 2025 5 2026 6 2027 7 2028 8 2029 9 Freq: A-DEC, Name: Temperature, dtype: int64
"""
Copy columns of a DataFrame into the index.
"""
import sys
import numpy as np
import pandas as pd
columns = ["year", "quarter", "yearq"]
data = [
[2020, 1, 20201],
[2020, 2, 20202],
[2020, 3, 20203],
[2020, 4, 20204],
[2021, 1, 20211],
[2021, 2, 20212],
[2021, 3, 20213],
[2021, 4, 20214]
]
df = pd.DataFrame(data = data, columns = columns)
print(df)
print()
df.index = pd.PeriodIndex(year = df.year, quarter = df.quarter, freq = "Q-DEC")
del [df["year"], df["quarter"]]
print(df)
print()
n = 5
index = pd.PeriodIndex(year = n * [2020], month = n * [1], day = np.arange(1, n + 1), freq = "D")
print(index.dtype)
for period in index:
print(period, period.start_time, period.end_time)
sys.exit(0)
year quarter yearq
0 2020 1 20201
1 2020 2 20202
2 2020 3 20203
3 2020 4 20204
4 2021 1 20211
5 2021 2 20212
6 2021 3 20213
7 2021 4 20214
yearq
2020Q1 20201
2020Q2 20202
2020Q3 20203
2020Q4 20204
2021Q1 20211
2021Q2 20212
2021Q3 20213
2021Q4 20214
period[D]
2020-01-01 2020-01-01 00:00:00 2020-01-01 23:59:59.999999999
2020-01-02 2020-01-02 00:00:00 2020-01-02 23:59:59.999999999
2020-01-03 2020-01-03 00:00:00 2020-01-03 23:59:59.999999999
2020-01-04 2020-01-04 00:00:00 2020-01-04 23:59:59.999999999
2020-01-05 2020-01-05 00:00:00 2020-01-05 23:59:59.999999999