Humans would rather number the days starting at 1, not 0.
Call
reset_index
to revert to the default index.
"Provide an explicit index for a pd.Series. Give the index a name."
import sys
import pandas as pd
index = [1, 2, 3, 4, 5] #or index = range(1, 6) or index = np.arange(1, 6)
data = [10.0, 20.0, 30.0, 40.0, 50.0] #or data = np.arange(10.0, 60.0, 10.0)
series = pd.Series(data = data, index = index, name = "temperature")
series.index.name = "day"
print(series)
print()
print(f"{series[1] = }") #In the brackets you write one of the items in the index.
print(f"{series.loc[1] = }") #same as previous statement
print(f"{series.iloc[1] = }") #In the brackets you write an integer.
print()
print(f"{series.index = }")
print(f"{type(series.index) = }")
print(f"{series.index.name = }")
print(f"{series.index.dtype.name = }")
print(f"{len(series.index) = }")
sys.exit(0)
day 1 10.0 2 20.0 3 30.0 4 40.0 5 50.0 Name: temperature, dtype: float64 series[1] = 10.0 series.loc[1] = 10.0 series.iloc[1] = 20.0 series.index = Int64Index([1, 2, 3, 4, 5], dtype='int64', name='day') type(series.index) = <class 'pandas.core.indexes.numeric.Int64Index'> series.index.name = 'day' series.index.dtype.name = 'int64' len(series.index) = 5
In the above program, change
index = [1, 2, 3, 4, 5] data = [10.0, 20.0, 30.0, 40.0, 50.0] series = pd.Series(data = data, index = index, name = "temperature") series.index.name = "day"to
#Create the pd.Index, complete with a name. data = [1, 2, 3, 4, 5] index = pd.Index(data = data, name = "day") #Create the pd.Series, and put the pd.Index into it. data = [10.0, 20.0, 30.0, 40.0, 50.0] series = pd.Series(data = data, index = index, name = "temperature")or to
#Make sure the data of the pd.Series and the index of the pd.Series are the same length. data = [10.0, 20.0, 30.0, 40.0, 50.0] index = pd.RangeIndex(1, len(data) + 1, name = "day") series = pd.Series(data = data, index = index, name = "temperature")
You might want to create the index as a separate object
if you were planning to put the same index into several
pd.Serieses.
"Put the same pd.Index into two pd.Serieses." import sys import pandas as pd index = pd.RangeIndex(1, 6, name = "day") data = [10.0, 20.0, 30.0, 40.0, 50.0] temperatureSeries = pd.Series(data = data, index = index, name = "temperature") data = [11.0, 21.0, 31.0, 41.0, 51.0] humiditySeries = pd.Series(data = data, index = index, name = "humidity") print(temperatureSeries) print() print(humiditySeries) print() sys.exit(0)
day 1 10.0 2 20.0 3 30.0 4 40.0 5 50.0 Name: temperature, dtype: float64 day 1 11.0 2 21.0 3 31.0 4 41.0 5 51.0 Name: humidity, dtype: float64
Now that we have two
pd.Serieses
sharing the same
pd.Index,
we can put the two
pd.Serieses
side by side into a single
pd.DataFrame.
The
axis = 1
means
“side by side”.
If you change it to
axis = 0,
do you still get a
pd.DataFrame
or do you get a
pd.Series?
df = pd.concat([temperatureSeries, humiditySeries], axis = 1) #Create a pd.DataFrame. print(df)
temperature humidity
day
1 10.0 11.0
2 20.0 21.0
3 30.0 31.0
4 40.0 41.0
5 50.0 51.0
"Create an index containing non-consecutive integers."
import sys
import pandas as pd
data = [
242,
238,
231,
225,
215,
207
]
index = pd.Index(data = data, name = "street")
data = [
"Van Cortlandt Park",
"Chipotle",
"Loeser's Kosher Deli",
"Marble Hill",
"Baker Field",
"Dyckman Farmhouse"
]
series = pd.Series(data = data, index = index, name = "landmark")
print(series)
print()
sys.exit(0)
street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object
Combine the two
lists
in the above program into a single
list.
(There’s a tricky way we could have used
zip
instead of a pair of list comprehensions.)
"An index containing non-consecutive integers: the #1 subway."
import sys
import pandas as pd
data = [
[242, "Van Cortlandt Park"],
[238, "Chipotle"],
[231, "Loeser's Kosher Deli"],
[225, "Marble Hill"],
[215, "Baker Field"],
[207, "Dyckman Farmhouse"]
]
index = pd.Index( data = [datum[0] for datum in data], name = "street")
series = pd.Series(data = [datum[1] for datum in data], index = index, name = "landmark")
print(series)
sys.exit(0)
street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object
It’s even easier to take the data from a Python
dict.
But you can do this only if the the numbers are unique.
"Create a pd.Series from a Python dict: the #1 subway."
import sys
import pandas as pd
data = {
242: "Van Cortlandt Park",
238: "Chipotle",
231: "Loeser's Kosher Deli",
225: "Marble Hill",
215: "Baker Field",
207: "Dyckman Farmhouse"
}
series = pd.Series(data = data, name = "landmark")
series.index.name = "street"
print(series)
print()
#Left-justify the column of strings.
s = series.to_string(dtype = True, name = True) #s is one big string
lines = s.splitlines() #lines is a list of strings.
space = 3 * " "
lines[1:-1] = [space.join(line.split(maxsplit = 1)) for line in lines[1:-1]]
s = "\n".join(lines)
print(s)
sys.exit(0)
street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object street 242 Van Cortlandt Park 238 Chipotle 231 Loeser's Kosher Deli 225 Marble Hill 215 Baker Field 207 Dyckman Farmhouse Name: landmark, dtype: object
"Provide an explicit index of strings for a pd.Series."
import sys
import pandas as pd
data = [
"Sunday",
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday"
]
index = pd.Index(data = data, name = "weekday")
data = [
10.0,
20.0,
30.0,
35.0,
30.0,
25.0,
20.0
]
series = pd.Series(data = data, index = index, name = "temperature")
print(series) #or print(series.to_string(dtype = True, length = True, name = True))
print()
print(f"{series.index = }")
print()
print(f"{type(series.index) = }")
print(f"{series.index.name = }")
print(f"{series.index.dtype.name = }")
print(f"{len(series.index) = }")
sys.exit(0)
weekday
Sunday 10.0
Monday 20.0
Tuesday 30.0
Wednesday 35.0
Thursday 30.0
Friday 25.0
Saturday 20.0
Name: temperature, dtype: float64
series.index = Index(['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
'Saturday'],
dtype='object', name='weekday')
type(series.index) = <class 'pandas.core.indexes.base.Index'>
series.index.name = 'weekday'
series.index.dtype.name = 'object'
len(series.index) = 7
Combine the above two
lists
of data into one
list
"Provide an explicit index of strings for a pd.Series."
import sys
import pandas as pd
days = [
["Sunday", 10.0],
["Monday", 20.0],
["Tuesday", 30.0],
["Wednesday", 35.0],
["Thursday", 30.0],
["Friday", 25.0],
["Saturday", 20.0]
]
index = pd.Index(data = [day[0] for day in days], name = "weekday")
series = pd.Series(data = [day[1] for day in days], index = index, name = "temperature")
print(series)
sys.exit(0)
#Another way to create the above Series, #without having to write the two list comprehensions. index, data = zip(*days) series = pd.Series(data = data, index = index, name = "temperature") series.index.name = "weekday"
weekday Sunday 10.0 Monday 20.0 Tuesday 30.0 Wednesday 35.0 Thursday 30.0 Friday 25.0 Saturday 20.0 Name: temperature, dtype: float64
or into a
dict.
"Provide an explicit index of strings for a pd.Series."
import sys
import pandas as pd
data = {
"Sunday": 10.0,
"Monday": 20.0,
"Tuesday": 30.0,
"Wednesday": 35.0,
"Thursday": 30.0,
"Friday": 25.0,
"Saturday": 20.0
}
series = pd.Series(data = data, name = "temperature")
series.index.name = "weekday"
print(series)
sys.exit(0)
weekday Sunday 10.0 Monday 20.0 Tuesday 30.0 Wednesday 35.0 Thursday 30.0 Friday 25.0 Saturday 20.0 Name: temperature, dtype: float64
A Python
datetime.datetime
has microsecond precision;
a
pd.Timestamp
has nanosecond precision.
That’s a thousand times more precise.
"Provide an explicit index of pd.Timestamps for a pd.Series."
import sys
import pandas as pd
data = {
pd.Timestamp(year = 2020, month = 12, day = 25): 25.0,
pd.Timestamp(year = 2020, month = 12, day = 26): 26.0,
pd.Timestamp(year = 2020, month = 12, day = 27): 27.0,
pd.Timestamp(year = 2020, month = 12, day = 28): 28.0,
pd.Timestamp(year = 2020, month = 12, day = 29): 29.0,
pd.Timestamp(year = 2020, month = 12, day = 30): 30.0,
pd.Timestamp(year = 2020, month = 12, day = 31): 31.0
}
series = pd.Series(data = data, name = "temperature")
series.index.name = "timestamps"
print(series)
print()
print(f"{series.index.dtype.name = }")
sys.exit(0)
timestamps 2020-12-25 25.0 2020-12-26 26.0 2020-12-27 27.0 2020-12-28 28.0 2020-12-29 29.0 2020-12-30 30.0 2020-12-31 31.0 Name: temperature, dtype: float64 series.index.dtype.name = 'datetime64[ns]'
"Provide an explicit pd.DatetimeIndex for a pd.Series."
import sys
import pandas as pd
start = pd.Timestamp(year = 2020, month = 12, day = 25) #or start = pd.Timestamp("2020-12-25")
end = pd.Timestamp(year = 2020, month = 12, day = 31)
index = pd.date_range(start = start, end = end, freq = "1D", name = "date")
#or index = pd.date_range("2020-12-25", "2020-12-31", name = "date")
data = [25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0] #or data = np.arange(25.0, 32.0)
series = pd.Series(data = data, index = index, name = "temperature")
print(series)
print()
#Examine the index in greater detail.
print(f"{series.index = }")
print()
print(f"{type(series.index) = }")
print(f"{series.index.dtype.name = }")
print(f"{series.index.freqstr = }")
sys.exit(0)
date
2020-12-25 25.0
2020-12-26 26.0
2020-12-27 27.0
2020-12-28 28.0
2020-12-29 29.0
2020-12-30 30.0
2020-12-31 31.0
Freq: D, Name: temperature, dtype: float64
series.index = DatetimeIndex(['2020-12-25', '2020-12-26', '2020-12-27', '2020-12-28',
'2020-12-29', '2020-12-30', '2020-12-31'],
dtype='datetime64[ns]', name='date', freq='D')
type(series.index) = <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
series.index.dtype.name = 'datetime64[ns]'
series.index.freqstr = 'D'
data = [10.0, 20.0, 30.0, 40.0, 50.0] #data is of length 5 index = pd.RangeIndex(1, len(data), name = "day") #index is of length 4 series = pd.Series(data = data, index = index, name = "temperature")
DatetimeIndex
whose
frequency
is every 10 days.
Also try
freq = "W"
for “weekly”.
"A pd.DatetimeIndex whose frequency is every 10 days." import sys import numpy as np import pandas as pd start = pd.Timestamp(year = 2020, month = 1, day = 1) end = pd.Timestamp(year = 2020, month = 12, day = 31) index = pd.date_range(start = start, end = end, freq = "10D", name = "date") data = np.arange(1, len(index) + 1) series = pd.Series(data = data, index = index, name = "temperature") print(series) print() print(f"{series.index.freqstr = }") print(f"{series.index.freq.n = }") print(f"{series.index.freq.base.name = }") sys.exit(0)
date 2020-01-01 1 2020-01-11 2 2020-01-21 3 2020-01-31 4 2020-02-10 5 2020-02-20 6 2020-03-01 7 2020-03-11 8 2020-03-21 9 2020-03-31 10 2020-04-10 11 2020-04-20 12 2020-04-30 13 2020-05-10 14 2020-05-20 15 2020-05-30 16 2020-06-09 17 2020-06-19 18 2020-06-29 19 2020-07-09 20 2020-07-19 21 2020-07-29 22 2020-08-08 23 2020-08-18 24 2020-08-28 25 2020-09-07 26 2020-09-17 27 2020-09-27 28 2020-10-07 29 2020-10-17 30 2020-10-27 31 2020-11-06 32 2020-11-16 33 2020-11-26 34 2020-12-06 35 2020-12-16 36 2020-12-26 37 Freq: 10D, Name: temperature, dtype: int64 series.index.freqstr = '10D' series.index.freq.n = 10 series.index.freq.base.name = 'D'