Kernel Density Estimation
๊ฐ ๋ฐ์ดํฐ ํฌ์ธํธ๋ค์ ํ๊ท ์ผ๋ก ๊ฐ์ง๋ ์ ๊ท๋ถํฌ๋ค์ ๋ํ ํ์ ๋ฐ์ดํฐ์ ๊ฐฏ์๋งํผ ๋๋ ๊ฒ์ด Kernel Density Estimation ๊ฒฐ๊ณผ์ด๋ค. ์ด๋ F^์ ฯฮปโ์ convolution์ผ๋ก ๋ณผ ์ ์๊ณ ์ด๋ ๊ฒ ๋ณด๋ ๊ฒฝ์ฐ์ F^์ ๊ฐ ๊ด์ธก์น์ ๋ํด 1/N์ mass๋ฅผ ํ ๋นํ๋ค๊ณ ํด์ํ ์ ์๋ค. ์๋์ discrete convolution์ ์ ์๋ฅผ ์ด์ฉํ ๊ฒ์ด๋ค.
LDA์ Logistic์ ๋น๊ต์ ๋ํด ์ดํด๋ณด์.(p127) Logistic๋ชจํ์ P(X) ๊ฐ์ ์ด ๋ค์ด๊ฐ์์ง ์๋ค. ๋์ P(G|X)๋ฅผ ํผํ
์์ผ์ conditional likelihood๋ฅผ ๊ณ ๋ฅด๋ ๊ฒ์ด๋ค.
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import seaborn as sns
## ๋ฐ์ดํฐ๊ฐ ๋ค์ด์ค๋ฉด ํด๋น ๋ฐ์ดํฐ ๊ทผ์ฒ lambda ๋ฒ์๋งํผ ํ์ํ๋ค.
def kernel(x,y,x0,lamb,type=['quad','tri']):
dist = np.where(abs(x-x0)>1,lamb,abs(x-x0))/lamb
quad = (3/4*(1-dist**2)); tri=((1-abs(dist)**3)**3)
message = 'You have to choose type'
dens = {type=='quad':quad, type=='tri':tri}.get(True, message)
hat = (dens*y).sum(axis=1)/(dens).sum(axis=1)
return(hat)
dataset = load_boston()
x = dataset['data'].transpose()[-1]
y = dataset['target']
x0 = np.reshape(np.linspace(1,35,100),(-1,1)) #min(x)๊ทผ์ฒ๋ก ์ฃผ์
y_hat = kernel(x=x,y=y,x0=x0,lamb=3, type='quad')
fig, ax = plt.subplots()
sns.lineplot(x=x,y=y)
sns.lineplot(x=x0.reshape(1,-1)[0],y=y_hat)
def kde(x,x0,lamb,type=['quad','tri'])
dist = np.where(abs(x-x0)>1,lamb,abs(x-x0))/lamb
quad = (3/4*(1-dist**2)); tri=((1-abs(dist)**3)**3)
message = 'You have to choose type'
dens = {type=='quad':quad, type=='tri':tri}.get(True, message)
hat = (dens).sum(axis=1) / len(x)*lamb
return(hat)
fig, ax = plt.subplots()
sns.lineplot(x=x,y=y)
sns.lineplot(x=x0.reshape(1,-1)[0],y=y_hat)