存放一些在機器學習中常會用到的Python指令。

tensorflow / Keras

use tensorboard in keras

from keras.callbacks import TensorBoard
autoencoder.fit(x_train, x_train,
                epochs=50,
                batch_size=128,
                shuffle=True,
                validation_data=(x_test, x_test),
                callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])

run in cmd

1	tensorboard --logdir=./tmp/cnnsae

obtain the output of an intermediate layerfrom keras

from keras import backend as K
##with a Sequential model

input_layer = 0 
output_layer = 3
get_layer_output = K.function([model.layers[input_layer].input],
                                  [model.layers[output_layer].output])
layer_output = get_layer_output([x_train])[0]
layer_output.shape

plot_model

1 2	from keras.utils import plot_model plot_model(model, to_file='model.png')

kfold

Split 5 folds for stacking


trainx=np.array(trainx)
testx=np.array(testx)
trainy=np.array(trainy)

ypred=copy.copy(trainy)
record=copy.copy(ypred)

out=np.zeros(29376)
for train_index, test_index in skf.split(trainx, trainy):
    X_train, X_test = trainx[train_index], trainx[test_index]
    y_train, y_test = trainy[train_index], trainy[test_index]
    train_data = lgb.Dataset(X_train, label=y_train)
    test_data = lgb.Dataset(X_test, label=y_test)
    bst = lgb.train(params, train_data, num_round, valid_sets=test_data, early_stopping_rounds=50)
    ypred[test_index] = bst.predict(X_test, num_iteration=bst.best_iteration)
    out=out+bst.predict(testx, num_iteration=bst.best_iteration)
record=copy.copy(ypred)
out=out*0.2
out1=copy.copy(out)

Pandas

.iloc

選擇元素
Pandas 透過使用中括號 [] 與 .iloc 可以很靈活地從 data frame 中選擇想要的元素。要注意的是 Python 在指定 0:1 時不包含 1，在指定 0:2 時不包含 2，這一點是跟 R 語言有很大的不同之處。

1 2	Y = pd.read_csv('ind.csv', header=None) trainy=Y.iloc[:52518,1]

numpy

argsort 排列

x = np.array([3,2,1,0,4])
a = np.argsort(x) 由小到大
>>>array([3, 2, 1, 0, 4], dtype=int64)
b = np.argsort(-x) 由大到小
>>>array([4, 0, 1, 2, 3], dtype=int64)

csv save & laod


np.savetxt('A600.csv',A600 ,delimiter=',')
numpy.genfromtxt('C:/localpath/test.csv', delimiter=',') #load

#or 
prediction = pd.DataFrame(predictions, columns=['predictions']).to_csv('prediction.csv')

print shape

def shape(X):
    for i in range(len(X)):
        print(X[i].shape)

shape((x_train, x_test, y_train, y_test))

np.c_ / np.r_

用 [] not ()

a = np.array([1,2,3])
b = np.array([4,5,6])
np.c_[a,b]

>>>array([[1, 4],
       [2, 5],
       [3, 6]])
np.r_[a,b]

>>>array([1, 2, 3, 4, 5, 6])

random.seed()

1	np.random.seed()

兩點取最大距離

dMax = max([np.linalg.norm(c1 - c2) for c1 in self.centers for c2 in self.centers])

#最小
X = np.array([[3,4],[6,8],[1,1]])
w = np.array([[0,0],[1,1]])
q = min([np.linalg.norm(xi - wj) for xi in X for wj in w])
>>>q = 0

w = np.array([[0,0]])
q = min([np.linalg.norm(xi - wj) for xi in X for wj in w])
>>>q = 1.4142135623730951

np.array([]) 的shape


a = np.array([0,0,0]) #向量
a.shape
>>>(3,)

a = np.array([[0,0,0]]) #矩陣
a.shape
>>>(1,3)

array 合併

np.hstack
np.vstack

np.concatenate([a,b],axis=0)
"""行(直)"""
axis=0
"""列(橫)"""
axis=1

Arg max/min index

b = np.arange(6)
b
>>>array([0, 5, 2, 3, 4, 5])
np.argmax(b)
>>>1

norm

1	np.linalg.norm(x-c))

解決 nam

1	A = np.nan_to_num(A)

矩陣乘:

矩陣相乘 matirx multiplication

#A : (3,2) , B : (2,3)

c = np.dot(A,B)  or    A@B
>>>C.shape : (3,3)

元素相乘 element wise

1 2	D = A*b.T >>>D.shape : (3,2)

matplotlib

import matplotlib

import matplotlib.pyplot as plt

畫矩陣

1 2	plt.matshow(b) plt.show

畫圖 plot / fig size

fig.set_size_inches(50, 500)

#or 
fig = plt.figure(dpi = 100)

subplot

1	http://blog.topspeedsnail.com/archives/760

others

回上一層路徑

import os
parent_path = os.path.abspath("..")
#或是
parent_path = os.path.abspath(os.pardir)
#其中 os.pardir 就只是長數代表 ".." 而已
#醬就可以很順利的拿到上一層的目錄~~感謝 Vic 大大的提供。


拿到目前工作的目錄，然後將字串拆開，捨棄最後一段
在將其組合～～
import os 
FullPath = os.getcwd()
FullPath = FullPath.split("\\")[:-1]
FullPath = "\\".join(FullPath)
print FullPath

requirements

生成requirements.txt文件

pip freeze > requirements.txt

安装requirements.txt依赖

pip install -r requirements.txt