2019年5月28日 星期二

install chinese and japanese fonts for matplotlib and seaborn plots

import matplotlib
from matplotlib.font_manager import FontProperties

### 下載日中字型檔
### install japanese font
!apt-get -y install fonts-ipafont-gothic
font_jp = FontProperties(fname=r'/usr/share/fonts/opentype/ipafont-gothic/ipagp.ttf',size=20)
print(font_jp.get_family())
print(font_jp.get_name())

### install chinese font
!apt-get -y install fonts-moe-standard-kai 
font_tw = FontProperties(fname=r'/usr/share/fonts/truetype/moe/MoeStandardKai.ttf',size=20)
print(font_tw.get_family())
print(font_tw.get_name())

### install chinese,japanese,korean font
#!apt-get install fonts-noto-cjk  ## .ttc
#font_cjk = FontProperties(fname=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc',size=20)
#print(font_cjk.get_family())
#print(font_cjk.get_name())

!apt-get install ttf-unifont
font_uni = FontProperties(fname=r'/usr/share/fonts/truetype/unifont/unifont.ttf',size=20)
print(font_uni.get_family())
print(font_uni.get_name())

### 設定畫圖啟用 sans-serif 系列字型
!grep font.family /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc
!sed -i "s/#font.family/font.family/" /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc
!grep font.family /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc

### 設定屬於 sans-serif 系列字型包含日中字型
!grep font.sans-serif /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc
!sed -i "s/#font.sans-serif.*DejaVu Sans/font.sans-serif     : IPAPGothic, TW-MOE-Std-Kai, Unifont, DejaVu Sans/" /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc
#!sed -i "s/font.sans-serif.*DejaVu Sans/font.sans-serif     : IPAPGothic, TW-MOE-Std-Kai, Unifont, DejaVu Sans/" /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc
!grep font.sans-serif /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/matplotlibrc

### 連結日中字型檔到畫圖字型目錄
!ln -s /usr/share/fonts/opentype/ipafont-gothic/ipagp.ttf /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/
!ln -s /usr/share/fonts/truetype/moe/MoeStandardKai.ttf /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/
#!ln -s /usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/
!ln -s /usr/share/fonts/truetype/unifont/unifont.ttf /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/
!ls /usr/local/lib/python3.6/dist-packages/matplotlib/mpl-data/fonts/ttf/[Miu]*

### 重建畫圖字型快取,納入日中字型檔
matplotlib.font_manager._rebuild()
flist = matplotlib.font_manager.get_fontconfig_fonts()
names = [matplotlib.font_manager.FontProperties(fname=fname).get_name() for fname in flist]
print(names)

### 確認sans-serif系列清單有日中字型
print(matplotlib.rcParams['font.sans-serif'])  ## 確認sans-serif系列清單是否有日中字型
if 'IPAPGothic' not in matplotlib.rcParams['font.sans-serif']:
  matplotlib.rcParams['font.sans-serif'] = ['IPAPGothic', 'TW-MOE-Std-Kai', 'Unifont'] + matplotlib.rcParams['font.sans-serif']
print(matplotlib.rcParams['font.sans-serif'])

### 測試畫圖字型顯示
#!!!!! 若X軸標示字型有誤,表示引擎快取仍未更新,請【重新啟動並運行所有單元格】
import matplotlib.pyplot as plt

testString = u"喜欢 海灘 散步 걷기 好き"   ## 簡中,繁中,日文,韓文,日文
plt.title(testString, fontproperties=font_uni)
plt.xlabel(testString)   # 利用sans-serif第一個字型顯示
plt.ylabel(testString, fontproperties=font_tw)
plt.show()
import seaborn as sns
emotion_counter = [('愉快', 200), ('高興', 180), ('開心', 160), ('歡喜', 140), ('生氣', 130), ('憤怒', 120), ('悲傷', 110), ('難過', 100), ('哀愁', 90), ('傷感', 80)]
sns.set_color_codes("pastel")
sns.barplot(x=[k for k, _ in emotion_counter], y=[v for _, v in emotion_counter])
參考: 解決Python 3 Matplotlib與Seaborn視覺化套件中文顯示問題 link

flask-based web interface deployment for pytorch chatbot

### folder structure and flask setup
> ls 
data/  pytorch_chatbot/  save/  templates/  web.py

> ls templates/
template.html

> conda install Flask

> python web.py
 * Serving Flask app "web" (lazy loading)
 * Environment: production
   WARNING: Do not use the development server in a production environment.
   Use a production WSGI server instead.
 * Debug mode: off
 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)

  1.  
  2. <html>
  3. <title>template.html</title>
  4. <body>
  5. <pre>
  6. Test page for pytorch chatbot on seq2seq dataset
  7. <form action='translate' method='post'>
  8. model: <input type='text' name='model' value='{{param["model"]}}' />
  9. epoch: <input type='text' name='epoch' value='{{param["epoch"]}}' />
  10. topn: <input type='text' name='topn' value='{{param["topn"]}}' />
  11. query: <input type='text' name='query' value='{{param["query"]}}'/>
  12. <input type='submit' value='translate' />
  13. </form>
  14. {{param['result']}}
  15. </pre>
  16. </body>
  17. </html>
  1.  
  2. ##########################
  3. # web.py
  4. # > python web.py
  5. #########################
  6. from flask import Flask, request, render_template
  7.  
  8. import torch
  9. import random
  10. import pytorch_chatbot.main as pcm
  11. import pytorch_chatbot.evaluate as pce
  12. from pytorch_chatbot.train import indexesFromSentence
  13. from pytorch_chatbot.load import loadPrepareData
  14. from pytorch_chatbot.model import nn, EncoderRNN, LuongAttnDecoderRNN
  15.  
  16. import subprocess
  17. import json
  18.  
  19. def predictLoad(corpus, modelFile, n_layers=1, hidden_size=512):
  20. print('corpus={}\nmodelFile={}'.format(corpus,modelFile))
  21.  
  22. torch.set_grad_enabled(False)
  23. voc, pairs = loadPrepareData(corpus)
  24. embedding = nn.Embedding(voc.n_words, hidden_size)
  25. encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers)
  26. attn_model = 'dot'
  27. decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers)
  28.  
  29. checkpoint = torch.load(modelFile)
  30. encoder.load_state_dict(checkpoint['en'])
  31. decoder.load_state_dict(checkpoint['de'])
  32.  
  33. # train mode set to false, effect only on dropout, batchNorm
  34. encoder.train(False)
  35. decoder.train(False)
  36.  
  37. #try:
  38. encoder = encoder.to(device)
  39. decoder = decoder.to(device)
  40. #except:
  41. # print('cannot get encoder/decoder')
  42. return encoder, decoder, voc
  43.  
  44. def predict(encoder, decoder, voc, question, top):
  45. result_list = []
  46.  
  47. if(top==1):
  48. beam_size = 1
  49. output_words, _ = pce.evaluate(encoder, decoder, voc, question, beam_size)
  50. answer = ' '.join(output_words)
  51. answer = answer.replace('<EOS>','')
  52. result_list.append(answer)
  53. #print(output_words)
  54. else:
  55. beam_size = top
  56. output_words_list = pce.evaluate(encoder, decoder, voc, question, beam_size)
  57. count = 0;
  58. for output_words, score in output_words_list:
  59. count = count + 1
  60. if(count <= top):
  61. output_sentence = ' '.join(output_words)
  62. output_sentence = output_sentence.replace('<EOS>','')
  63. result_list.append(output_sentence)
  64. #print(" {:.3f} < {}".format(score, output_sentence))
  65. return result_list
  66.  
  67. def filter(voc, question):
  68. words = question.split()
  69. result = []
  70. for w in words:
  71. if(w in voc.word2index):
  72. result.append(w)
  73. return ' '.join(result)
  74.  
  75. # -------------------------------
  76. def sentence_test(voc,en,de,top,sentence):
  77. source = sentence.rstrip()
  78. seg_source = source
  79. fil_source = filter(voc, seg_source)
  80. target = predict(en, de, voc, fil_source, top)
  81. result = "\nsource: '%s'\nfilter: '%s'\n" % (seg_source,fil_source)
  82. for answer in target:
  83. result = result + "\t'%s'\n" % (answer)
  84. result = result + '\n'
  85. return result
  86.  
  87. def sentence_test_model(seg_corpus_name, iteration, top, sentence):
  88. n_layers = 1
  89. hidden_size = 512
  90.  
  91. modelFile = home_path + 'save/model/' + seg_corpus_name + '/1-1_512/' + str(iteration) + '_backup_bidir_model.tar'
  92.  
  93. en, de, voc = predictLoad(seg_corpus_name, modelFile, n_layers, hidden_size)
  94.  
  95. return sentence_test(voc,en,de,top,sentence)
  96.  
  97. def file_test(voc,en,de,top,test_file_name):
  98. with open(test_file_name,"r") as f:
  99. jp_data = f.readlines()
  100.  
  101. for i,source in enumerate(jp_data):
  102. source = source.rstrip()
  103. seg_source = source
  104. fil_source = filter(voc, seg_source)
  105. target = predict(en, de, voc, fil_source, top)
  106. print("%d:\nsource: '%s'\nfilter: '%s'" % (i+1,seg_source,fil_source))
  107. for answer in target:
  108. print("\t'%s'" % (answer))
  109.  
  110. def file_test_model(seg_corpus_name, iteration, top, test_file_name):
  111. n_layers = 1
  112. hidden_size = 512
  113.  
  114. modelFile = home_path + 'save/model/' + seg_corpus_name + '/1-1_512/' + str(iteration) + '_backup_bidir_model.tar'
  115.  
  116. en, de, voc = predictLoad(seg_corpus_name, modelFile, n_layers, hidden_size)
  117.  
  118. file_test(voc,en,de,top,test_file_name)
  119. def print_voc(voc):
  120. print('tw+jp voc size=%d' % (len(voc.word2index)))
  121. print(voc.index2word)
  122.  
  123. def list_models(seg_corpus_name=''):
  124. if seg_corpus_name=='':
  125. modelPath = home_path + 'save/model/'
  126. else:
  127. modelPath = home_path + 'save/model/' + seg_corpus_name + '/1-1_512'
  128.  
  129. out_bytes = subprocess.check_output(['ls','-l',modelPath],
  130. stderr=subprocess.STDOUT)
  131. out_text = out_bytes.decode('utf-8')
  132. return out_text
  133.  
  134. def load_source(seg_corpus_name):
  135. path = home_path + 'data/' + seg_corpus_name + '.txt'
  136. with open(path) as inp:
  137. data = inp.readlines()
  138.  
  139. print(len(data), len(data[0::2]), len(data[1::2]))
  140.  
  141. data = { 'source': data[0::2], 'target': data[1::2] }
  142. return data
  143.  
  144. # --------------------------
  145. app = Flask(__name__)
  146.  
  147. param0 = { 'model': 'translation2019_train_83k',
  148. 'epoch': 6000,
  149. 'topn' : 10,
  150. 'query' : 'what time is it?',
  151. 'result' : 'result area'
  152. }
  153.  
  154. @app.route('/')
  155. def forms():
  156. return render_template('translate.html', param=param0)
  157.  
  158. @app.route('/translate/<model>/<int:epoch>/<int:topn>', methods=['GET', 'POST'])
  159. def translate_long(model,epoch,topn):
  160. if request.method == 'POST':
  161. query = request.values['query']
  162. elif request.method == 'GET':
  163. query = request.args.get('query')
  164.  
  165. return translate(model,epoch,topn,query)
  166.  
  167.  
  168. @app.route('/translate', methods=['GET', 'POST'])
  169. def translate_short():
  170. if request.method == 'POST':
  171. query = request.values['query']
  172. model = request.values['model']
  173. epoch = request.values['epoch']
  174. topn = request.values['topn']
  175. elif request.method == 'GET':
  176. query = request.args.get('query')
  177. model = request.args.get('model')
  178. epoch = request.args.get('epoch')
  179. topn = request.args.get('topn')
  180.  
  181. return translate(model,epoch,topn,query)
  182.  
  183. def translate(model,epoch,topn,query):
  184. epoch = int(epoch)
  185. topn = int(topn)
  186.  
  187. try:
  188. target = sentence_test_model(model,epoch,topn,query)
  189. except:
  190. target = 'internal error, retry a again'
  191.  
  192. result = 'query="{}"\nresult="{}"\n'.format(query,target)
  193. param2 = { 'model': model,
  194. 'epoch': epoch,
  195. 'topn' : topn,
  196. 'query' : query,
  197. 'result' : result
  198. }
  199. return render_template('translate.html', param=param2)
  200.  
  201. @app.route('/list/<model>')
  202. def list_model(model):
  203. mlist = list_models(model)
  204. return '<pre>{}</pre>'.format(mlist)
  205.  
  206. @app.route('/list/')
  207. def list():
  208. mlist = list_models()
  209. return '<pre>{}</pre>'.format(mlist)
  210.  
  211. #######################################
  212.  
  213. USE_CUDA = torch.cuda.is_available()
  214. device = torch.device("cuda" if USE_CUDA else "cpu")
  215. home_path = './'
  216.  
  217. if __name__ == '__main__':
  218. app.run(host='0.0.0.0',port=8080)
註: 本程式使用 GitHub JavaScript code prettifier 工具標示顏色。其方法如下: 1.參考 [Blogger] 如何在 Blogger 顯示程式碼 - Google Code Prettify 於【Blogger 版面配置 HTML/JavaScript小工具】安裝如下套件 <script src="https://cdn.jsdelivr.net/gh/google/code-prettify@master/loader/run_prettify.js"></script> 2.文章編輯再以HTML模式為程式包上如下標籤。 <code class="prettyprint lang-html linenums"> ... </code> <code class="prettyprint lang-python linenums"> ... </code>

2019年5月17日 星期五

memo for quota setup on Ubuntu/Linux

Ubuntu 設定帳號的硬碟配額方法

############# 一次性安裝及設定指令 =======================
# 安裝 quota 配額套件

$ sudo apt install quota

# 修改檔案系統表 fstab,針對套用配額的掛載點,加上usrquota, grpquota

$ sudo vi /etc/fstab
UUID=xxxx /home ext4 defaults,usrquota,grpquota 0 2

$ sudo mount -o remount /home

$ grep /home /etc/mtab
/dev/sdb1 /home ext4 rw,relatime,quota,usrquota,grpquota,data-ordered 0 0


# 產生權限設定檔
$ sudo quotacheck -cug /home
$ sudo quotacheck -ugvmca
$ ls /home

# 啟用配額管制
$ sudo quotaon -a
$ sudo quotaon -ap

# 修改配額超用免責期,預設為資料區塊數及索引區塊數皆享有7日超過免責期
$ sudo edquota -t
Grace period before enforcing soft limits for users:
Time units may be: days, hours, minutes, or seconds
  Filesystem    Block grace period   Inode grace period
  /dev/sdxy       7days                7days

############ 經常性檢視及設定用戶配額指令 ###################
# 修改user1用戶的資料/索引區塊的軟/硬配額
#   資料區塊用於存放檔案內容,blocks顯示目前資料區塊用量
#   索引區塊用於存放目錄內容,inodes顯示目前索引區塊用量
#   軟(soft)配額可以超過,但超過將進入寬限期
#   硬(hard)配額不可超過
#   寬限期(grace)預設7天,超過後硬碟無法新增檔案,直到刪除用量,降到軟配額以下
$ sudo edquota -u user1
Disk quotas for user user1 (uid xxx):
  Filesystem   blocks  soft hard inodes soft hard 
  /dev/sdxy    yyyy      0     0  zzzz    0     0

# 將user1用戶的配額設定套用到user2,user3
$ sudo edquota -p user1 user2 user3

# 列出user1,user2用戶的配額設定
$ sudo quota user1 user2 ...

# 列出所有用戶的配額設定
$ sudo repquota -avus
*** report for user quotas on device /dev/sdxy
Block grace time: 7days: Inode grace time: 7days

                 Space limits          File limits
User       used  soft  hard grace   used soft hard grace
--------------------------------------------------------
root  --  1088k   0k   0k            188  0  0 
.....

2019年5月8日 星期三

environment setup for running pytorch chatbot

PyTorch框架有很多深度學習範例,例如Chatbot聊天機器人展示。
以下記錄如何在Ubuntu環境,已安裝anaconda套件管理工具下,
建置適合PyTorch Chatbot執行的環境。

=== 設定顯示 conda環境,只要設定一次即可,以後登入會自動顯示
user@gpu:~/jupyter$ /usr/local/anaconda3/bin/conda init bash
user@gpu:~/jupyter$ source ~/.bashrc

=== 以後登入會自動顯示如下提示符號
(base) user@gpu:~/jupyter$
    conda create --name chatbot python=3.6 # 建立chatbot環境,執行一次即可
    conda activate chatbot # 進入chatbot環境

(chatbot) user@gpu:~/jupyter$
    -- 以下只要設定一次即可
    conda list # 列出目前環境安裝套件
    --
    conda install jupyter pytorch tensorflow-gpu torchvision tqdm [-c pytorch] # 安裝套件
    --
    jupyter notebook --generate-config # 產生jupyter notebook設定檔
    vi ~/.jupyter/jupyter_notebook_config.py # 修改設定檔
     c.NotebookApp.port = xxxx   # 選擇埠號xxxx
     c.NotebookApp.ip = '*'      # 允許外部連入
    jupyter notebook password       # 設定密碼

    -- 以上只要設定一次即可,以後只要進入chatbot環境,如下啟動jupyter notebook即可
    jupyter notebook                # 啟動jupyter notebook
    [Ctrl-C]
    --
    /usr/bin/lsof -i [:xxxx]  # 查看那個行程佔用那個埠號,或特定xxxx埠號
    /usr/bin/nvidia-smi   # 查看那個行程佔用GPU及其記憶體
    /usr/bin/top   # 查看那個行程佔用CPU及記憶體
    /usr/bin/kill -9 yyy  # 砍掉pid=yyy的行程
    --
    conda deactivate  # 離開chatbot,回到base環境

(base) user@gpu:~/jupyter$

註1: 使用上的注意事項
1. /usr/bin/xfce4-terminal 為命令列終端機,位於選單【應用程式/系統/Xfce終端機】
2. /snap/bin/pycharm-community 為PyCharm IDE,位於選單【應用程式/開發/PyCharm Community Edition】
3. /home/user/.conda/envs/chatbot/pkgs/ 為實際每個人利用conda安裝個人套件後的套件位置
4. /home/user/.conda/envs/chatbot/bin/ 為實際每個人利用conda安裝個人套件後的執行檔位置,例如jupyter指令
5. C:\Users\user\AppData\Local\conda\conda\envs\chatbot 為Windows上chatbot環境位置


註2: 假設 Ubuntu 18.04.1 LTS Kernel 4.15.0-47-generic #50-Ubuntu SMP 已裝好如下套件:
1. /usr/local/cuda <- cuda_10.0.130_410.48_linux.run
2. /usr/lib/x86_64-linux-gnu/libcudnn.so.7 <- libcudnn7_7.5.0.56-1+cuda10.0_amd64.deb
3. /usr/local/anaconda3/bin/conda <- Anaconda3-2019.03-Linux-x86_64.sh
4. /snap/bin/pycharm-community <- pycharm-community-2019.1.1.tar.gz