/*
Indexer.java
本程式利用 mmseg4j.jar 套件作中文斷詞,可從給定目錄自動對所有以下各層文字檔案編製索引。
索引表以物件串流存到硬碟invert.dat檔案,下回可以自動復原,方便檢索某詞出現在哪些檔案。
> javac -cp mmseg4j.jar;. Indexer.java
> java -cp mmseg4j.jar;. Indexer \data
init: path=\data
chars loaded time=110ms, line=13060, on file=\data\chars.dic
words loaded time=125ms, line=137450, on file=!/data/words.dic
unit loaded time=0ms, line=22, on file=file:\mmseg4j.jar!\data\units.dic
\data\L1\F1.txt: [,:41,的:24,沈船:17,計畫:8,澎湖:8, :7,。:7,發掘:7,為:7,初:6,進行:6,該:6,勘:6,古:6]
\data\L1\F1.txt: [,:41,的:24,沈船:17,計畫:8,澎湖:8, :7,。:7,發掘:7,為:7,初:6,進行:6,該:6,勘:6,古:6]
\data\L1\F2.txt: [,:88,的:74,、:31, :25,。:25,在:16,海底:16,尋:11,寶:11,﹁:10,沈船:10]
\data\L1\F3.txt: [,:29,的:14,沈船:9,打撈:8,澎湖:6, :5,。:5,工作:5,進行:5,館:5,後:5,古:5]
\data\L1\F4.txt: [,:21,。:13,的:11,船:7,、:6,去年:6,工作:6,澎湖:6,探勘:6,初:5,進行:5,沉:5,包括:5,勘:5,博:5,將軍:5,史:5]
\data\L1\L2\E1.txt: [,:51,的:16,與:9,。:7,主:6,老街:6, :5,做:5,拆:5,三峽:4]
\data\L1\L2\E2.txt: [,:49,的:26,三峽:11,老街:10,。:8,與:7,古蹟:7,、:6,文化:6,而:5,祖師廟:5,保留:5]
\data\L1\L2\E3.txt: [,:36,的:14,。:13,三峽:13,「:7,」:7,主:7,老街:7,協調會:5,發展:5]
\data\L1\L2\E4.txt: [,:53,的:19,。:8,三峽:6,主:6, :5,不:5,拆除:5,在:5,而:4,老街:4,財產:4,住戶:4,改建:4,古蹟:4,保留:4,排除:4,派:4,介入:4]
\data\L1\L2\E5.txt: [,:30, :18,。:10,三峽:10,老街:7,文建會:7,立:7,的:5,派:5,面:5,騎樓:5]
\data\L1\L2\E6.txt: [,:52,的:17,。:9,民眾:8,老街:7,三峽:6,拆:6, :5,而:5,文建會:5]
\data\L1\L2\E7.txt: [,:27,老街:12,。:7,屋:6,街:6, :5,的:5,、:4,與:4,三峽:4,是:4,住戶:4,古蹟:4]
\data\L1\L2\L3\D1.txt: [,:47,「:35,」:35,的:29,、:20,布袋戲:15,。:14,宛然:13,祿:10,天:9,李:9]
\data\L1\L2\L3\D2.txt: [,:23,「:17,」:17,的:14,、:12,。:8,壇:8,藝術:7,儀式:5, :4,主:4,-:4,露天劇場:4,開荒:4,啟用:4]
\data\L1\L2\L3\D3.txt: [,:52,畫:20,的:18,作:17,館:14,。:10,「:10,」:10,資料:10,這些:10]
\data\L1\L2\L3\D4.txt: [,:28,。:12,她:11,、:6,「:6,」:6,貝:6,文:6,王:6,音樂:5,的:5]
\data\L1\L2\M3\C1.txt: [,:27,的:22,」:18,「:17,。:11,中:8,柴可夫斯基:7,盛:7,能:7,余:7]
\data\L1\L2\M3\C2.txt: [,:38,的:27,舞:20,。:19,「:18,」:17,德國:11,能:11,團:11,、:10,舞蹈:10]
save and load 'invert.dat'
Input the query word:
三峽
query '三峽' occurs in articles [\data\L1\L2\E1.txt, \data\L1\L2\E2.txt, \data\L1\L2\E3.txt,\data\L1\L2\E4.txt, \data\L1\L2\E5.txt, \data\L1\L2\E6.txt, \data\L1\L2\E7.txt]
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Scanner;
import com.chenlb.mmseg4j.example.Simple;
public class Indexer
{
private static Simple segmenter;
private static HashMap<String, ArrayList<String>> invertList;
public static void statArticle(File file, HashMap<String, Integer> frequency)
throws FileNotFoundException,UnsupportedEncodingException,IOException
{
FileInputStream fis = new FileInputStream(file); // FileNotFoundException
InputStreamReader isr = new InputStreamReader(fis, "big5"); //
BufferedReader br = new BufferedReader(isr);
// fill hashmap frequency
while(br.ready()==true) // IOException
{
String text = br.readLine();
text = text.replaceAll(" ", ""); // remove interference from blanks
String seg_text = segmenter.segWords(text, " ");
//System.out.println(text);
//System.out.println(seg_text);
String words[] = seg_text.split(" ");
for(String w : words)
{
//System.out.println(w.length() + ":" + w);
if(w.length()==0) continue;
if(frequency.containsKey(w)==true)
{
int count = frequency.get(w);
count++;
frequency.put(w,count);
}
else
{
frequency.put(w,1);
}
}
}
br.close();
// process hashmap frequency
int numberWords = frequency.size();
/*
Collection<Integer> counts = frequency.values();
List<Integer> counts_list = new ArrayList<>(counts);
Collections.sort(counts_list);
int count_threshold = counts_list.get(counts_list.size() - 10);
String fullName = file.getCanonicalPath();
System.out.printf("%s: [", fullName);
for(String word : frequency.keySet())
{
int count = frequency.get(word);
if(count >= count_threshold)
{
System.out.printf(",%s:%d", word, count);
}
}
System.out.printf("]\n\n");
*/
Set<Map.Entry<String,Integer>> entries = frequency.entrySet();
List<Map.Entry<String,Integer>> entryList = new ArrayList<Map.Entry<String,Integer>>(entries);
Comparator<Map.Entry<String,Integer>> cmp = new Comparator<Map.Entry<String,Integer>>() {
public int compare(Map.Entry<String,Integer> e1, Map.Entry<String,Integer> e2)
{
return e2.getValue() - e1.getValue(); // descending order
}
};
Collections.sort(entryList, cmp);
int count_threshold = entryList.get(9).getValue();
String fullName = file.getCanonicalPath();
System.out.printf("%s: [", fullName);
boolean first=true;
for(Map.Entry<String,Integer> e : entryList)
{
if(e.getValue() >= count_threshold)
{
System.out.printf("%s%s:%d", (first==false)?",":"", e.getKey(), e.getValue());
if(first==true) first = false;
}
}
System.out.printf("]\n\n");
}
public static void indexArticleWords(File dirFile,
HashMap<String, ArrayList<String>> invertList)
throws FileNotFoundException,UnsupportedEncodingException,IOException
{
File list[] = dirFile.listFiles();
String fullName;
for(File f : list)
{
fullName = f.getCanonicalPath(); // throws IOException
if(f.isFile()==true)
{
HashMap<String, Integer> frequency
= new HashMap<String, Integer>();
statArticle(f, frequency);
for(String word : frequency.keySet())
{
if(invertList.containsKey(word)==true)
{
ArrayList<String> oldList = invertList.get(word);
oldList.add(fullName);
}
else
{
ArrayList<String> newList = new ArrayList<>();
newList.add(fullName);
invertList.put(word, newList);
}
}
}
else if(f.isDirectory()==true)
{
indexArticleWords(f, invertList);
}
}
}
@SuppressWarnings("unchecked")
public static void main(String args[])
throws FileNotFoundException,UnsupportedEncodingException,
IOException,ClassNotFoundException
{
// set up root folder
String rootName = "data/";
String testName = "data/L1/F1.txt";
File root;
if(args.length==1)
{
rootName = args[0];
}
root = new File(rootName);
// set up segmenter and hashmap frequency
segmenter = new Simple();
HashMap<String, Integer> frequency = new HashMap<>();
statArticle(new File(testName), frequency); // 測試文章斷詞
if(new File("invert.dat").exists()==false)
{
// set up invertList
invertList = new HashMap<String, ArrayList<String>>();
indexArticleWords(root, invertList);
//save invertList to file 'invert.dat' and load from file 'invert.dat'
ObjectOutputStream oos = new ObjectOutputStream(
new FileOutputStream("invert.dat"));
oos.writeObject(invertList);
oos.close();
}
ObjectInputStream ois = new ObjectInputStream(
new FileInputStream("invert.dat"));
HashMap<String, ArrayList<String>>
invertList2 = (HashMap<String, ArrayList<String>>) ois.readObject();
ois.close();
// do the query test on the loaded invertList
System.out.println("save and load 'invert.dat'\n");
System.out.print("Input the query word: ");
Scanner sc = new Scanner(System.in);
String query = sc.next();
//print the file paths which have the query word
ArrayList<String> list = invertList2.get(query);
System.out.printf("query '%s' occurs in articles %s\n", query, list);
}
}
2015年11月17日 星期二
auto recursive indexing of chinese articles for later query use
2015年11月12日 星期四
an example to use the modbus rtu protocol to query/read/write the motor drivers
Modbus應用層協定常作為工業設備之間的溝通語言, RS485實體層協定則常作為電腦及設備之間較長距離、耐雜訊的溝通工具。 因此,若想由電腦對馬達設備進行控制,常見利用 Modbus及RS485建立電腦和馬達驅動器之間的連線。 電腦和馬達驅動器之間的溝通關係如下: 電腦 馬達驅動器 ------ -------------- 客戶應用程式 MODBUS應用層<--------------------->MODBUS應用層 COM1序列埠 RS485實體層<---------------------->RS485實體層 電腦端應用層對實體層的介面就是COM1序列埠。 電腦及馬達兩者間的應用層溝通就靠MODBUS RTU協定。 MODBUS RTU (remote terminal unit)協定可針對特定id的馬達驅動器, 利用序列埠發出指令,讀寫不同暫存器,達成驅動馬達運轉的目的。 以下將以東方馬達的 AZ 系列驅動器,其支援的modbus指令為例, 介紹其利用modbus rtu協定讀、寫、查詢驅動器的溝通過程, 可作為撰寫序列埠相關程式之參考。 範例出自手冊 pp224-228, HM-60260C(AZ).pdf。 --------- 讀取一段連續暫存器 ----------- A.欲讀取馬達驅動器一段連續暫存器的資料: A1.先循序對COM1寫出8 bytes如下: id: 01h 表 馬達驅動器id code: 03h 表 讀取一段連續暫存器 指令 start_addr: 18h,40h 表 1840h 暫存器起始住址 addr_count: 00h,06h 表 0006h 連續暫存器個數 crc: c2h,bch 表驗證碼 bcc2h 表示對馬達驅動器id=01h,下03h號指令讀取一段連續暫存器, 回傳起始位址1840h,連續0006h個16bit暫存器的內容 A2.再從COM1循序接收結果如下: 01h 表馬達驅動器id,應為剛才id 03h 表讀取指令之回應結果,應為剛才code 0ch 表後續的byte個數,應為addr_count兩倍 00h, 00h 表 1840h 內容 0000h 00h, 02h 表 1841h 內容 0002h ffh, ffh 表 1842h 內容 ffffh d8h, f0h 表 1843h 內容 d8f0h 00h, 00h,表 1844h 內容 0000h 27h, 10h,表 1845h 內容 2710h 82h, eah 表 驗證碼ea82h --------- 寫入一段連續暫存器 ----------- B.欲寫入資料到馬達驅動器的一段連續暫存器: B1.先循序對COM1寫出21 bytes如下: id: 04h 表 馬達驅動器id code: 10h 表 寫入一段連續暫存器 指令 start_addr: 18h,c6h 表 18c6h 暫存器起始住址 addr_count: 00h,06h 表 0006h 連續暫存器個數 byte_count: 0ch 表後續的byte個數,應為addr_count兩倍 data: 00h,00h 表 18c6h 內容 0000h 27h,10h 表 18c7h 內容 2710h 00h,00h 表 18c8h 內容 0000h 4eh,20h 表 18c9h 內容 4e20h 00h,00h, 表 1844h 內容 0000h 01h,f4h, 表 1845h 內容 01f4h crc: 6ch,a0h 表 驗證碼a06ch 表示對馬達驅動器id=04h,下10h號指令寫入一段連續暫存器, 將後續0ch個byte,依序寫入起始位址18c6h,連續0006h個16bit暫存器 B2.再從COM1接收回應結果如下 04h 表馬達驅動器id,應為剛才id 10h 表讀取指令之回應結果,應為剛才code 18h,c6h 表 18c6h 起始住址,應為剛才start_addr 00h,06h 表 0006h 連續住址個數,應為剛才addr_count a6h,c3h 表 驗證碼c3a6h --------- 寫入單一暫存器 ----------- C.欲寫出資料到馬達驅動器的單一暫存器: C1.先循序對COM1寫出8 bytes如下: id: 02h 表 馬達驅動器id code: 06h 表 寫入單一暫存器 指令 addr: 02h,55h 表寫入住址0255h data: 00h,50h 表寫入內容0050h crc: 98h,6dh 表驗證碼6d98h 表示對馬達驅動器id=02h,下06h號指令寫入單一暫存器, 將data=0050h,寫入addr=0255h的16bit暫存器 C2.再從COM1接收回應結果如下: 02h 表馬達驅動器id,應為剛才id 06h 表讀取指令之回應結果,應為剛才code 02h,55h 表 0255h 暫存器住址,應為剛才addr 00h,50h 表 0006h 寫入內容,應為剛才data 98h, 6dh 表 驗證碼6d98h --------- 診斷驅動器 ----------- D.欲診斷馬達驅動器: D1.先循序對COM1寫出8 bytes如下: id: 03h 表 馬達驅動器id code: 08h 表 診斷 指令 subcode: 00h,00h 表子功能0000h data: 12h,34h 表任意測試資料1234h crc: ech,9eh 表驗證碼9eech 表示對馬達驅動器id=03h,下08h號指令進行0000h號子功能診斷, 將隨意資料data=1234h送出,看會不會回傳該資料回來 D2.再從COM1接收回應結果如下: 應和剛才送出內容完全一樣 id: 03h 表 馬達驅動器id code: 08h 表 診斷 指令 subcode: 00h,00h 表子功能0000h data: 12h,34h 表任意測試資料1234h crc: ech,9eh 表驗證碼9eech - 幾種環境的 Serial Port 寫法: Linux/Cygwin C寫法: https://www.cmrr.umn.edu/~strupp/serial.html http://www.teuniz.net/RS-232/ Windows Win32 C寫法: http://cboard.cprogramming.com/windows-programming/141173-windows-serial-programming.html Windows C#/C++/VB .NET寫法: https://msdn.microsoft.com/zh-tw/library/system.io.ports.serialport(v=vs.110).aspx SerialPort存取原理 http://www.dotblogs.com.tw/billchung/category/5702.aspx PS: https://en.wikipedia.org/wiki/Modbus http://www.modbus.org/tech.php 標準文件及多種平台之程式碼
2015年11月10日 星期二
how to extract hand drawn figures from photos
當你用手機拍照手畫圖案時,常常會因為採光關係,出現深淺不一色調,如左圖所示。這時若用photoshop之類的影像處理軟體,其提供的單一臨界值調整(threshold adjustment)工具,很難單獨萃取手畫圖案出來。
若會python程式,建議改用opencv套件提供的adaptiveThreshold方法,可自動調整臨界值,萃取手畫圖案。經適當調整參數,發現效果不錯,如右圖所示。其程式寫法如下:
去處圖destin = adaptiveThreshold(src來源圖, maxValue像素最大值, adaptiveMethod自動調整法, thresholdType臨界值套用法, blockSize參考方塊邊長_像素為單位, C方塊內像素加權和扣掉常數值當成臨界值) -- import cv2 import matplotlib.pyplot as plt %matplotlib inline input = 'c:/path/source.jpg' output = 'c:/path/destin.jpg' img = cv2.imread(input,0) img = cv2.medianBlur(img,5) newimg = cv2.adaptiveThreshold(img, 255,\ cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, \ 11, 10) cv2.imwrite( output, newimg )
2015年11月3日 星期二
weka.classifiers.functions.Winnow
weka.classifiers.functions.Winnow 屬錯誤驅動型學習器, 只處理文字屬性,將之轉成二元屬性,用來預測二元類別值。可線上累進學習。 適用在案例集屬性眾多,卻多數和預測不相關情況,可以快速鎖定相關屬性作預測。 給定案例屬性(a0, a1, ..., ak),門檻 theta,權重升級係數alpha,權重降級係數beta, 權重向量(w0, w1, ..., wk)或(w0+ - w0-, w1+ - w1-, ..., wk+ - wk-) 其中,所有符號皆為正數,擴充屬性 a0 恆為 1。則預測式有二: 不平衡版: 權重向量各維度只能正數 w0 * a0 + w1 * a1 + ... + wk * ak > theta 表類別1; 否則類別2 平衡版: 權重向量各維度允許負數 (w0+ - w0-) * a0 + (w1+ - w1-) * a1 + ... + (wk+ - wk-) * ak > theta 表類別1; 否則類別2 學習過程若遇預測錯誤,則權重向量調整法如下: 類別2誤為類別1: w *= beta 或 w+ *= beta and w- *= alpha 讓權重變小 類別1誤為類別2: w *= alpha 或 w+ *= alpha and w- *= beta 讓權重變大 參數說明: -L使用平衡版。預設值false -I 套用訓練集學習權重的輪數。預設值1 -A 權重升級係數alpha,需>1。預設值2.0 -B 權重降級係數beta,需<1。預設值0.5 -H 預測門檻theta。預設值-1,表示屬性個數 -W 權重初始值,需>0。預設值2.0 -S 亂數種子,影響訓練集的案例訓練順序。預設值1 > java weka.classifiers.functions.Winnow -t data\weather.nominal.arff Winnow Attribute weights w0 8.0 w1 1.0 w2 2.0 w3 4.0 w4 2.0 w5 2.0 w6 1.0 w7 1.0 Cumulated mistake count: 7 Time taken to build model: 0 seconds Time taken to test model on training data: 0 seconds === Error on training data === Correctly Classified Instances 10 71.4286 % Incorrectly Classified Instances 4 28.5714 % Kappa statistic 0.3778 Mean absolute error 0.2857 Root mean squared error 0.5345 Relative absolute error 61.5385 % Root relative squared error 111.4773 % Total Number of Instances 14 === Confusion Matrix === a b <-- classified as 7 2 | a = yes 2 3 | b = no === Stratified cross-validation === Correctly Classified Instances 7 50 % Incorrectly Classified Instances 7 50 % Kappa statistic -0.2564 Mean absolute error 0.5 Root mean squared error 0.7071 Relative absolute error 105 % Root relative squared error 143.3236 % Total Number of Instances 14 === Confusion Matrix === a b <-- classified as 7 2 | a = yes 5 0 | b = no 如下 weather.nominal.arff 案例集的14個案例利用4個文字屬性,預測文字屬性。 參考: 1.weka.classifiers.functions.Winnow code | doc
outlook temperature humidity windy play sunny hot high FALSE no sunny hot high TRUE no overcast hot high FALSE yes rainy mild high FALSE yes rainy cool normal FALSE yes rainy cool normal TRUE no overcast cool normal TRUE yes sunny mild high FALSE no sunny cool normal FALSE yes rainy mild normal FALSE yes sunny mild normal TRUE yes overcast mild high TRUE yes overcast hot normal FALSE yes rainy mild high TRUE no
weka.classifiers.functions.VotedPerceptron
weka.classifiers.functions.VotedPerceptron 為投票型感知器,屬錯誤驅動型學習器。 先全域性取代缺值,再轉換文字屬性為二元屬性,適用於預測二元類別值,可線上累進學習。 給定案例屬性 a=(a0, a1, ..., ak),權重向量 w=(w0, w1, ..., wk) 其中,a 屬性值為二元值 0 或 1,擴充屬性 a0 恆為 1。 預測式為 w0 * a0 + w1 * a1 + ... + wk * ak > 0 表類別1; 否則類別2 學習過程若遇預測錯誤,則權重向量調整法如下: 類別2誤為類別1: w -= a 讓權重變小 類別1誤為類別2: w += a 讓權重變大 參數說明: -I套用訓練集學習權重的輪數。預設值1 -E 多項式核函數(polynomial kernel)之次方。預設值1 -S 亂數種子,影響訓練集的案例訓練順序。預設值1 -M 最大允許權重修正次數。預設值10000 > java weka.classifiers.functions.VotedPerceptron -t data\weather.numeric.arff VotedPerceptron: Number of perceptrons=5 Time taken to build model: 0 seconds Time taken to test model on training data: 0 seconds === Error on training data === Correctly Classified Instances 9 64.2857 % Incorrectly Classified Instances 5 35.7143 % Kappa statistic 0 Mean absolute error 0.3623 Root mean squared error 0.587 Relative absolute error 78.0299 % Root relative squared error 122.4306 % Total Number of Instances 14 === Confusion Matrix === a b <-- classified as 9 0 | a = yes 5 0 | b = no === Stratified cross-validation === Correctly Classified Instances 9 64.2857 % Incorrectly Classified Instances 5 35.7143 % Kappa statistic 0 Mean absolute error 0.3736 Root mean squared error 0.589 Relative absolute error 78.4565 % Root relative squared error 119.3809 % Total Number of Instances 14 === Confusion Matrix === a b <-- classified as 9 0 | a = yes 5 0 | b = no 如下 weather.numeric.arff 案例集的14個案例利用2個文字屬性及2個數字屬性,預測文字屬性。 參考: 1.weka.classifiers.functions.VotedPerceptron code | doc
outlook temperature humidity windy play sunny 85 85 FALSE no sunny 80 90 TRUE no rainy 65 70 TRUE no sunny 72 95 FALSE no rainy 71 91 TRUE no overcast 83 86 FALSE yes rainy 70 96 FALSE yes rainy 68 80 FALSE yes overcast 64 65 TRUE yes sunny 69 70 FALSE yes rainy 75 80 FALSE yes sunny 75 70 TRUE yes overcast 72 90 TRUE yes overcast 81 75 FALSE yes
weka.classifiers.functions.Logistic
weka.classifiers.functions.Logistic 為羅吉斯迴歸學習器,
建立多類別羅吉斯迴歸模型,含嶺迴歸估計量(ridge estimator)參數,可用來預測類別值。
缺值由ReplaceMissingValuesFilter過濾器補值,文字屬性由NominalToBinaryFilter過濾器轉為數字。
參數說明:
-R <ridge> 設定log相似度的嶺迴歸估計量。預設值1e-8
-M <number> 設定最大迭代次數。預設值 -1 表示直到收斂為止
> java weka.classifiers.functions.Logistic -t data\weather.numeric.arff
Logistic Regression with ridge parameter of 1.0E-8
Coefficients...
Class
Variable yes
===============================
outlook=sunny -6.4257
outlook=overcast 13.5922
outlook=rainy -5.6562
temperature -0.0776
humidity -0.1556
windy 3.7317
Intercept 22.234
Odds Ratios...
Class
Variable yes
===============================
outlook=sunny 0.0016
outlook=overcast 799848.4279
outlook=rainy 0.0035
temperature 0.9254
humidity 0.8559
windy 41.7508
Time taken to build model: 0 seconds
Time taken to test model on training data: 0 seconds
=== Error on training data ===
Correctly Classified Instances 11 78.5714 %
Incorrectly Classified Instances 3 21.4286 %
Kappa statistic 0.5532
Mean absolute error 0.2066
Root mean squared error 0.3273
Relative absolute error 44.4963 %
Root relative squared error 68.2597 %
Total Number of Instances 14
=== Confusion Matrix ===
a b <-- classified as
7 2 | a = yes
1 4 | b = no
=== Stratified cross-validation ===
Correctly Classified Instances 8 57.1429 %
Incorrectly Classified Instances 6 42.8571 %
Kappa statistic 0.0667
Mean absolute error 0.4548
Root mean squared error 0.6576
Relative absolute error 95.5132 %
Root relative squared error 133.2951 %
Total Number of Instances 14
=== Confusion Matrix ===
a b <-- classified as
6 3 | a = yes
3 2 | b = no
如下 weather.numeric.arff 案例集的14個案例利用2個文字屬性及2個數字屬性,預測文字屬性。
outlook | temperature | humidity | windy | play |
sunny | 85 | 85 | FALSE | no |
sunny | 80 | 90 | TRUE | no |
rainy | 65 | 70 | TRUE | no |
sunny | 72 | 95 | FALSE | no |
rainy | 71 | 91 | TRUE | no |
overcast | 83 | 86 | FALSE | yes |
rainy | 70 | 96 | FALSE | yes |
rainy | 68 | 80 | FALSE | yes |
overcast | 64 | 65 | TRUE | yes |
sunny | 69 | 70 | FALSE | yes |
rainy | 75 | 80 | FALSE | yes |
sunny | 75 | 70 | TRUE | yes |
overcast | 72 | 90 | TRUE | yes |
overcast | 81 | 75 | FALSE | yes |
weka.classifiers.functions.LinearRegression
weka.classifiers.functions.LinearRegression 為標準線性迴歸學習器,
學習各數值屬性的權重,建立線性方程式模型,預測數值類別。
參數說明:
-S select_attribute_code 屬性挑選法代碼,0 表M5',1 表無,2 表Greedy。預設值 0。
> java weka.classifiers.functions.LinearRegression -t data\cpu.arff
Linear Regression Model
class =
0.0491 * MYCT +
0.0152 * MMIN +
0.0056 * MMAX +
0.6298 * CACH +
1.4599 * CHMAX +
-56.075
Time taken to build model: 0.02 seconds
Time taken to test model on training data: 0.02 seconds
=== Error on training data ===
Correlation coefficient 0.93
Mean absolute error 37.9748
Root mean squared error 58.9899
Relative absolute error 39.592 %
Root relative squared error 36.7663 %
Total Number of Instances 209
=== Cross-validation ===
Correlation coefficient 0.9012
Mean absolute error 41.0886
Root mean squared error 69.556
Relative absolute error 42.6943 %
Root relative squared error 43.2421 %
Total Number of Instances 209
cpu.arff 資料集有209案例,每個案例由6個數值屬性預測1個數值屬性。
MYCT | MMIN | MMAX | CACH | CHMIN | CHMAX | class |
125 | 256 | 6000 | 256 | 16 | 128 | 198 |
29 | 8000 | 32000 | 32 | 8 | 32 | 269 |
29 | 8000 | 32000 | 32 | 8 | 32 | 220 |
29 | 8000 | 32000 | 32 | 8 | 32 | 172 |
29 | 8000 | 16000 | 32 | 8 | 16 | 132 |
26 | 8000 | 32000 | 64 | 8 | 32 | 318 |
23 | 16000 | 32000 | 64 | 16 | 32 | 367 |
23 | 16000 | 32000 | 64 | 16 | 32 | 489 |
23 | 16000 | 64000 | 64 | 16 | 32 | 636 |
..... |
weka.classifiers.functions.SimpleLinearRegression
weka.classifiers.functions.SimpleLinearRegression 為簡單線性迴歸學習器,
簡單指的是只挑一個平方誤差最小的屬性作線性預測。
只適用數值對數值的預測,不接受缺值案例。
> java weka.classifiers.functions.SimpleLinearRegression -t data\cpu.arff
Linear regression on MMAX
0.01 * MMAX - 34
Time taken to build model: 0 seconds
Time taken to test model on training data: 0.02 seconds
=== Error on training data ===
Correlation coefficient 0.863
Mean absolute error 50.8658
Root mean squared error 81.0566
Relative absolute error 53.0319 %
Root relative squared error 50.5197 %
Total Number of Instances 209
=== Cross-validation ===
Correlation coefficient 0.7844
Mean absolute error 53.8054
Root mean squared error 99.5674
Relative absolute error 55.908 %
Root relative squared error 61.8997 %
Total Number of Instances 209
cpu.arff 資料集有209案例,每個案例由6個數值屬性預測1個數值屬性。
MYCT | MMIN | MMAX | CACH | CHMIN | CHMAX | class |
125 | 256 | 6000 | 256 | 16 | 128 | 198 |
29 | 8000 | 32000 | 32 | 8 | 32 | 269 |
29 | 8000 | 32000 | 32 | 8 | 32 | 220 |
29 | 8000 | 32000 | 32 | 8 | 32 | 172 |
29 | 8000 | 16000 | 32 | 8 | 16 | 132 |
26 | 8000 | 32000 | 64 | 8 | 32 | 318 |
23 | 16000 | 32000 | 64 | 16 | 32 | 367 |
23 | 16000 | 32000 | 64 | 16 | 32 | 489 |
23 | 16000 | 64000 | 64 | 16 | 32 | 636 |
..... |
訂閱:
文章 (Atom)