main
hy.kim 1 year ago
parent db40f89c94
commit 4cdf00d425

1
.gitignore vendored

@ -3,3 +3,4 @@
################################################################################ ################################################################################
/06_Decision_Tree/x64/Debug /06_Decision_Tree/x64/Debug
/.vs/06_Decision_Tree/v16

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.33423.256
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "06_Decision_Tree", "06_Decision_Tree\06_Decision_Tree.vcxproj", "{076B5AB8-29AA-4315-8A04-83615B0648AB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Debug|x64.ActiveCfg = Debug|x64
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Debug|x64.Build.0 = Debug|x64
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Debug|x86.ActiveCfg = Debug|Win32
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Debug|x86.Build.0 = Debug|Win32
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Release|x64.ActiveCfg = Release|x64
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Release|x64.Build.0 = Release|x64
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Release|x86.ActiveCfg = Release|Win32
{076B5AB8-29AA-4315-8A04-83615B0648AB}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {7F800D7A-0319-402B-9B4D-F299D10520F3}
EndGlobalSection
EndGlobal

@ -0,0 +1,150 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{076b5ab8-29aa-4315-8a04-83615b0648ab}</ProjectGuid>
<RootNamespace>My06DecisionTree</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>C:\opencv-4.6.0_X64\build\include</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>C:\opencv-4.6.0_X64\build\x64\vc15\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>opencv_world460d.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="dtree-mushroom.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="소스 파일">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="헤더 파일">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="리소스 파일">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="dtree-mushroom.cpp">
<Filter>소스 파일</Filter>
</ClCompile>
</ItemGroup>
</Project>

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup />
</Project>

File diff suppressed because it is too large Load Diff

@ -0,0 +1,148 @@
1. Title: Mushroom Database
2. Sources:
(a) Mushroom records drawn from The Audubon Society Field Guide to North
American Mushrooms (1981). G. H. Lincoff (Pres.), New York: Alfred
A. Knopf
(b) Donor: Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
(c) Date: 27 April 1987
3. Past Usage:
1. Schlimmer,J.S. (1987). Concept Acquisition Through Representational
Adjustment (Technical Report 87-19). Doctoral disseration, Department
of Information and Computer Science, University of California, Irvine.
--- STAGGER: asymptoted to 95% classification accuracy after reviewing
1000 instances.
2. Iba,W., Wogulis,J., & Langley,P. (1988). Trading off Simplicity
and Coverage in Incremental Concept Learning. In Proceedings of
the 5th International Conference on Machine Learning, 73-79.
Ann Arbor, Michigan: Morgan Kaufmann.
-- approximately the same results with their HILLARY algorithm
3. In the following references a set of rules (given below) were
learned for this data set which may serve as a point of
comparison for other researchers.
Duch W, Adamczak R, Grabczewski K (1996) Extraction of logical rules
from training data using backpropagation networks, in: Proc. of the
The 1st Online Workshop on Soft Computing, 19-30.Aug.1996, pp. 25-30,
available on-line at: http://www.bioele.nuee.nagoya-u.ac.jp/wsc1/
Duch W, Adamczak R, Grabczewski K, Ishikawa M, Ueda H, Extraction of
crisp logical rules using constrained backpropagation networks -
comparison of two new approaches, in: Proc. of the European Symposium
on Artificial Neural Networks (ESANN'97), Bruge, Belgium 16-18.4.1997,
pp. xx-xx
Wlodzislaw Duch, Department of Computer Methods, Nicholas Copernicus
University, 87-100 Torun, Grudziadzka 5, Poland
e-mail: duch@phys.uni.torun.pl
WWW http://www.phys.uni.torun.pl/kmk/
Date: Mon, 17 Feb 1997 13:47:40 +0100
From: Wlodzislaw Duch <duch@phys.uni.torun.pl>
Organization: Dept. of Computer Methods, UMK
I have attached a file containing logical rules for mushrooms.
It should be helpful for other people since only in the last year I
have seen about 10 papers analyzing this dataset and obtaining quite
complex rules. We will try to contribute other results later.
With best regards, Wlodek Duch
________________________________________________________________
Logical rules for the mushroom data sets.
Logical rules given below seem to be the simplest possible for the
mushroom dataset and therefore should be treated as benchmark results.
Disjunctive rules for poisonous mushrooms, from most general
to most specific:
P_1) odor=NOT(almond.OR.anise.OR.none)
120 poisonous cases missed, 98.52% accuracy
P_2) spore-print-color=green
48 cases missed, 99.41% accuracy
P_3) odor=none.AND.stalk-surface-below-ring=scaly.AND.
(stalk-color-above-ring=NOT.brown)
8 cases missed, 99.90% accuracy
P_4) habitat=leaves.AND.cap-color=white
100% accuracy
Rule P_4) may also be
P_4') population=clustered.AND.cap_color=white
These rule involve 6 attributes (out of 22). Rules for edible
mushrooms are obtained as negation of the rules given above, for
example the rule:
odor=(almond.OR.anise.OR.none).AND.spore-print-color=NOT.green
gives 48 errors, or 99.41% accuracy on the whole dataset.
Several slightly more complex variations on these rules exist,
involving other attributes, such as gill_size, gill_spacing,
stalk_surface_above_ring, but the rules given above are the simplest
we have found.
4. Relevant Information:
This data set includes descriptions of hypothetical samples
corresponding to 23 species of gilled mushrooms in the Agaricus and
Lepiota Family (pp. 500-525). Each species is identified as
definitely edible, definitely poisonous, or of unknown edibility and
not recommended. This latter class was combined with the poisonous
one. The Guide clearly states that there is no simple rule for
determining the edibility of a mushroom; no rule like ``leaflets
three, let it be'' for Poisonous Oak and Ivy.
5. Number of Instances: 8124
6. Number of Attributes: 22 (all nominally valued)
7. Attribute Information: (classes: edible=e, poisonous=p)
1. cap-shape: bell=b,conical=c,convex=x,flat=f,
knobbed=k,sunken=s
2. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
3. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,
pink=p,purple=u,red=e,white=w,yellow=y
4. bruises?: bruises=t,no=f
5. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,
musty=m,none=n,pungent=p,spicy=s
6. gill-attachment: attached=a,descending=d,free=f,notched=n
7. gill-spacing: close=c,crowded=w,distant=d
8. gill-size: broad=b,narrow=n
9. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,
green=r,orange=o,pink=p,purple=u,red=e,
white=w,yellow=y
10. stalk-shape: enlarging=e,tapering=t
11. stalk-root: bulbous=b,club=c,cup=u,equal=e,
rhizomorphs=z,rooted=r,missing=?
12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
14. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
pink=p,red=e,white=w,yellow=y
15. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
pink=p,red=e,white=w,yellow=y
16. veil-type: partial=p,universal=u
17. veil-color: brown=n,orange=o,white=w,yellow=y
18. ring-number: none=n,one=o,two=t
19. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,
none=n,pendant=p,sheathing=s,zone=z
20. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,
orange=o,purple=u,white=w,yellow=y
21. population: abundant=a,clustered=c,numerous=n,
scattered=s,several=v,solitary=y
22. habitat: grasses=g,leaves=l,meadows=m,paths=p,
urban=u,waste=w,woods=d
8. Missing Attribute Values: 2480 of them (denoted by "?"), all for
attribute #11.
9. Class Distribution:
-- edible: 4208 (51.8%)
-- poisonous: 3916 (48.2%)
-- total: 8124 instances

@ -0,0 +1,125 @@
//Example 21-1. Creating and training a decision tree
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include <iostream>
using namespace std;
using namespace cv;
void help(char **argv) {
cout << "\n\n"
<< "Using binary decision trees to learn to recognize poisonous\n"
<< " from edible mushrooms based on visible attributes.\n"
<< " This program demonstrates how to create and a train a \n"
<< " decision tree using ml library in OpenCV.\n"
<< "Call:\n" << argv[0] << " <csv-file-path>\n\n"
<< "\nIf you don't enter a file, it defaults to agaricus-lepiota.data\n"
<< endl;
}
int main(int argc, char *argv[]) {
// If the caller gave a filename, great. Otherwise, use a default.
//
//const char *csv_file_name = argc >= 2 ? argv[1] : "../mushroom/agaricus-lepiota.data";
const char *csv_file_name = "agaricus-lepiota.data";
cout << "OpenCV Version: " << CV_VERSION << endl;
help(argv);
// Read in the CSV file that we were given.
//
cv::Ptr<cv::ml::TrainData> data_set =
cv::ml::TrainData::loadFromCSV(csv_file_name, // Input file name
0, // Header lines (ignore this many)
0, // Responses are (start) at thie column
1, // Inputs start at this column
"cat[0-22]" // All 23 columns are categorical
);
// Use defaults for delimeter (',') and missch ('?')
// Verify that we read in what we think.
//
int n_samples = data_set->getNSamples();
if (n_samples == 0) {
cerr << "Could not read file: " << csv_file_name << endl;
exit(-1);
}
else {
cout << "Read " << n_samples << " samples from " << csv_file_name << endl;
}
// Split the data, so that 90% is train data
//
data_set->setTrainTestSplitRatio(0.90, false);
int n_train_samples = data_set->getNTrainSamples();
int n_test_samples = data_set->getNTestSamples();
cout << "Found " << n_train_samples << " Train Samples, and "
<< n_test_samples << " Test Samples" << endl;
// Create a DTrees classifier.
//
cv::Ptr<cv::ml::RTrees> dtree = cv::ml::RTrees::create();
// set parameters
//
// These are the parameters from the old mushrooms.cpp code
// Set up priors to penalize "poisonous" 10x as much as "edible"
//
float _priors[] = { 1.0, 10.0 };
cv::Mat priors(1, 2, CV_32F, _priors);
dtree->setMaxDepth(8);
dtree->setMinSampleCount(10);
dtree->setRegressionAccuracy(0.01f);
dtree->setUseSurrogates(false /* true */);
dtree->setMaxCategories(15);
dtree->setCVFolds(0 /*10*/); // nonzero causes core dump
dtree->setUse1SERule(true);
dtree->setTruncatePrunedTree(true);
dtree->setPriors( priors );
//dtree->setPriors(cv::Mat()); // ignore priors for now...
// Now train the model
// NB: we are only using the "train" part of the data set
//
dtree->train(data_set);
// Having successfully trained the data, we should be able
// to calculate the error on both the training data, as well
// as the test data that we held out.
//
cv::Mat results;
float train_performance = dtree->calcError(data_set,
false, // use train data
results // cv::noArray()
);
std::vector<cv::String> names;
data_set->getNames(names);
Mat flags = data_set->getVarSymbolFlags();
// Compute some statistics on our own:
//
{
cv::Mat expected_responses = data_set->getResponses();
int good = 0, bad = 0, total = 0;
for (int i = 0; i < data_set->getNTrainSamples(); ++i) {
float received = results.at<float>(i, 0);
float expected = expected_responses.at<float>(i, 0);
cv::String r_str = names[(int)received];
cv::String e_str = names[(int)expected];
cout << "Expected: " << e_str << ", got: " << r_str << endl;
if (received == expected)
good++;
else
bad++;
total++;
}
cout << "Correct answers: " << (float(good) / total) << " % " << endl;
cout << "Incorrect answers: " << (float(bad) / total) << "%"
<< endl;
}
float test_performance = dtree->calcError(data_set,
true, // use test data
results // cv::noArray()
);
cout << "Performance on training data: " << train_performance << "%" << endl;
cout << "Performance on test data: " << test_performance << " % " << endl;
return 0;
}

Binary file not shown.

Binary file not shown.

Binary file not shown.
Loading…
Cancel
Save