感谢Dr.fish的耐心讲解和细致回答。
本次课的随堂作业如下:
有100个房屋面积的样本,均值300.85㎡,并已知总体标准差为86㎡
用t分布求房屋平均面积在95%的置信区间
导入分析包及数据
import scipy.stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
house = pd.read_csv('house_size.csv', header=None)
取全部数据
house_size = house.iloc[:,0] # 取全部数据
计算T分布,置信度95%下房屋平均面积的置信区间
# 计算T分布,置信度95%下房屋平均面积的置信区间
house_std = house.std() # 计算样本标准差
sample_mean = house_size.mean() # 计算样本均值
sample_size = len(house_size)
t_score = scipy.stats.t.pdf(0.025 , sample_size - 1)
margin_error = t_score * house_std / np.sqrt(sample_size)
lower_limit = sample_mean - margin_error
upper_limit = sample_mean + margin_error
print '95%% Confidence Interval: ( %.1f, %.1f)' % (lower_limit, upper_limit)
# 输出结果
95% Confidence Interval: ( 297.3, 304.4)
另一种方法--定义函数计算置信区间
# 定义函数计算置信区间
def ci_t(data, house_std, confidence):
sample_mean = np.mean(data)
sample_size = len(data)
alpha = (1 - confidence) / 2
t_score = scipy.stats.t.pdf(alpha , sample_size - 1)
ME = t_score * house_std / np.sqrt(sample_size)
lower_limit = sample_mean - ME
upper_limit = sample_mean + ME
return (lower_limit , upper_limit)
输入数据
# 设置95%置信区间
ci_t(house_size, house_std, 0.95)
# 输出结果
(297.311149,304.388851)