Skip to content

Commit c2e3d37

Browse files
Everything is running :-)
1 parent 3164f33 commit c2e3d37

File tree

1 file changed

+75
-48
lines changed

1 file changed

+75
-48
lines changed

notebooks/Alhazen.ipynb

Lines changed: 75 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@
214214
"outputs": [],
215215
"source": [
216216
"# Load initial input files\n",
217-
"sample_list = ['sqrt(-16)', 'sqrt(4)']"
217+
"initial_sample_list = ['sqrt(-16)', 'sqrt(4)']"
218218
]
219219
},
220220
{
@@ -604,7 +604,11 @@
604604
" @abstractmethod\n",
605605
" def get_feature_value(self, derivation_tree) -> float:\n",
606606
" '''Returns the feature value for a given derivation tree of an input.'''\n",
607-
" pass"
607+
" pass\n",
608+
"\n",
609+
" def replace(self, new_key: str) -> 'Feature':\n",
610+
" '''Returns a new feature with the same name but a different key.'''\n",
611+
" return self.__class__(self.name, self.rule, new_key)"
608612
]
609613
},
610614
{
@@ -1113,7 +1117,6 @@
11131117
"source": [
11141118
"from sklearn.tree import DecisionTreeClassifier\n",
11151119
"from sklearn.feature_extraction import DictVectorizer\n",
1116-
"from sklearn import tree\n",
11171120
"\n",
11181121
"import graphviz"
11191122
]
@@ -1220,11 +1223,15 @@
12201223
"metadata": {},
12211224
"outputs": [],
12221225
"source": [
1223-
"dot_data = tree.export_graphviz(clf, out_file=None,\n",
1224-
" feature_names=vec.get_feature_names_out(),\n",
1225-
" class_names=[\"BUG\", \"NO BUG\"],\n",
1226-
" filled=True, rounded=True)\n",
1227-
"graph = graphviz.Source(dot_data)"
1226+
"import graphviz\n",
1227+
"import sklearn\n",
1228+
"\n",
1229+
"def show_decision_tree(clf, feature_names):\n",
1230+
" dot_data = sklearn.tree.export_graphviz(clf, out_file=None, \n",
1231+
" feature_names=feature_names,\n",
1232+
" class_names=[\"BUG\", \"NO_BUG\"], \n",
1233+
" filled=True, rounded=True) \n",
1234+
" return graphviz.Source(dot_data)"
12281235
]
12291236
},
12301237
{
@@ -1233,7 +1240,7 @@
12331240
"metadata": {},
12341241
"outputs": [],
12351242
"source": [
1236-
"display(graph)"
1243+
"show_decision_tree(clf, vec.get_feature_names_out())"
12371244
]
12381245
},
12391246
{
@@ -1309,6 +1316,7 @@
13091316
"source": [
13101317
"def train_tree(data):\n",
13111318
" sample_bug_count = len(data[(data[\"oracle\"].astype(str) == \"BUG\")])\n",
1319+
" assert sample_bug_count > 0, \"No bug samples found\"\n",
13121320
" sample_count = len(data)\n",
13131321
"\n",
13141322
" clf = DecisionTreeClassifier(min_samples_leaf=1,\n",
@@ -1392,7 +1400,7 @@
13921400
"clf = clf.fit(X_data, oracle)\n",
13931401
"\n",
13941402
"import graphviz\n",
1395-
"dot_data = tree.export_graphviz(clf, out_file=None, \n",
1403+
"dot_data = sklearn.tree.export_graphviz(clf, out_file=None, \n",
13961404
" feature_names=feature_names,\n",
13971405
" class_names=[\"BUG\", \"NO BUG\"], \n",
13981406
" filled=True, rounded=True) \n",
@@ -1697,13 +1705,19 @@
16971705
" mini = row['min']\n",
16981706
" maxi = row['max']\n",
16991707
" if (not np.isinf(mini)) or (not np.isinf(maxi)):\n",
1700-
" requirements.append(Requirement(feature, mini, maxi))\n",
1708+
" requirements.append(TreeRequirement(feature, mini, maxi))\n",
17011709
" paths.append(TreePath(None, is_bug, requirements))\n",
17021710
"\n",
1703-
" return paths\n",
1704-
"\n",
1705-
"\n",
1706-
"class Requirement:\n",
1711+
" return paths\n"
1712+
]
1713+
},
1714+
{
1715+
"cell_type": "code",
1716+
"execution_count": null,
1717+
"metadata": {},
1718+
"outputs": [],
1719+
"source": [
1720+
"class TreeRequirement:\n",
17071721
"\n",
17081722
" def __init__(self, feature: Feature, mini, maxi):\n",
17091723
" self.__feature: Feature = feature\n",
@@ -1788,10 +1802,10 @@
17881802
"\n",
17891803
"class TreePath:\n",
17901804
"\n",
1791-
" def __init__(self, samplefile: Optional[Path], is_bug: bool, requirements: List[Requirement]):\n",
1805+
" def __init__(self, samplefile: Optional[Path], is_bug: bool, requirements: List[TreeRequirement]):\n",
17921806
" self.__sample = samplefile\n",
17931807
" self.__is_bug = is_bug\n",
1794-
" self.__requirements: List[Requirement] = requirements\n",
1808+
" self.__requirements: List[TreeRequirement] = requirements\n",
17951809
"\n",
17961810
" def is_bug(self) -> bool:\n",
17971811
" return self.__is_bug\n",
@@ -2152,7 +2166,7 @@
21522166
"from typing import List\n",
21532167
"from fuzzingbook.GrammarFuzzer import DerivationTree\n",
21542168
"\n",
2155-
"class Requirement:\n",
2169+
"class SpecRequirement:\n",
21562170
" '''\n",
21572171
" This class represents a requirement for a new input sample that should be generated.\n",
21582172
" This class contains the feature that should be fullfiled (Feature), a quantifier\n",
@@ -2171,18 +2185,24 @@
21712185
" self.value = value\n",
21722186
"\n",
21732187
" def __str__(self):\n",
2174-
" return f\"Requirement({self.feature.name} {self.quant} {self.value})\"\n",
2175-
"\n",
2176-
"\n",
2188+
" return f\"Requirement({self.feature.name} {self.quant} {self.value})\""
2189+
]
2190+
},
2191+
{
2192+
"cell_type": "code",
2193+
"execution_count": null,
2194+
"metadata": {},
2195+
"outputs": [],
2196+
"source": [
21772197
"class InputSpecification:\n",
21782198
" '''\n",
21792199
" This class represents a complet input specification of a new input. A input specification\n",
21802200
" consists of one or more requirements.\n",
21812201
" requirements : Is a list of all requirements that must be used.\n",
21822202
" '''\n",
21832203
"\n",
2184-
" def __init__(self, requirements: List[Requirement]):\n",
2185-
" self.requirements: List[Reqirement] = requirements\n",
2204+
" def __init__(self, requirements: List[SpecRequirement]):\n",
2205+
" self.requirements: List[SpecRequirement] = requirements\n",
21862206
"\n",
21872207
" def __str__(self):\n",
21882208
" # Handle first element\n",
@@ -2227,7 +2247,7 @@
22272247
" if f.name == feature_name:\n",
22282248
" feature_class = f\n",
22292249
"\n",
2230-
" requirement_list.append(Requirement(feature_class, quant, value))\n",
2250+
" requirement_list.append(SpecRequirement(feature_class, quant, value))\n",
22312251
"\n",
22322252
" return InputSpecification(requirement_list)\n",
22332253
"\n",
@@ -2489,6 +2509,15 @@
24892509
" return final_samples"
24902510
]
24912511
},
2512+
{
2513+
"cell_type": "code",
2514+
"execution_count": null,
2515+
"metadata": {},
2516+
"outputs": [],
2517+
"source": [
2518+
"generate_samples = generate_samples_advanced"
2519+
]
2520+
},
24922521
{
24932522
"cell_type": "markdown",
24942523
"metadata": {},
@@ -2524,16 +2553,16 @@
25242553
"exsqrt = ExistenceFeature('exists(<function>@0)', '<function>', 'sqrt')\n",
25252554
"exdigit = ExistenceFeature('exists(<digit>)', '<digit>', '<digit>')\n",
25262555
"\n",
2527-
"reqDigit = Requirement(exdigit, '>', '0.5')\n",
2528-
"fbdDigit = Requirement(exdigit, '<=', '0.5')\n",
2556+
"reqDigit = SpecRequirement(exdigit, '>', '0.5')\n",
2557+
"fbdDigit = SpecRequirement(exdigit, '<=', '0.5')\n",
25292558
"\n",
2530-
"req0 = Requirement(exsqrt, '>', '-6.0')\n",
2559+
"req0 = SpecRequirement(exsqrt, '>', '-6.0')\n",
25312560
"testspec0 = InputSpecification([req0, reqDigit])\n",
2532-
"req1 = Requirement(exsqrt, '<=', '-6.0')\n",
2561+
"req1 = SpecRequirement(exsqrt, '<=', '-6.0')\n",
25332562
"testspec1 = InputSpecification([req1, fbdDigit])\n",
25342563
"\n",
25352564
"numterm = NumericInterpretation('num(<term>)', '<term>')\n",
2536-
"req2 = Requirement(numterm, '<', '-31.0')\n",
2565+
"req2 = SpecRequirement(numterm, '<', '-31.0')\n",
25372566
"testspec2 = InputSpecification([req2, req0, reqDigit])\n",
25382567
"\n",
25392568
"print('--generating samples--')\n",
@@ -2663,7 +2692,8 @@
26632692
"\n",
26642693
"# let's initialize Alhazen\n",
26652694
"# let's use the previously used sample_list (['sqrt(-16)', 'sqrt(4)'])\n",
2666-
"alhazen = Alhazen(sample_list, CALC_GRAMMAR, MAX_ITERATION, GENERATOR_TIMEOUT)\n",
2695+
"alhazen = Alhazen(initial_sample_list,\n",
2696+
" CALC_GRAMMAR, MAX_ITERATION, GENERATOR_TIMEOUT)\n",
26672697
"\n",
26682698
"# and run it\n",
26692699
"# Alhazen returns a list of all the iteratively learned decision trees\n",
@@ -2674,10 +2704,7 @@
26742704
"cell_type": "markdown",
26752705
"metadata": {},
26762706
"source": [
2677-
"\n",
2678-
"</hr>\n",
2679-
"\n",
2680-
"Let's display the final decision tree learned by Alhazen. You can use the function `show_tree(decison_tree, features)` to display the final tree."
2707+
"Let's display the final decision tree learned by Alhazen. You can use the function `show_tree(decision_tree, features)` to display the final tree."
26812708
]
26822709
},
26832710
{
@@ -2686,12 +2713,8 @@
26862713
"metadata": {},
26872714
"outputs": [],
26882715
"source": [
2689-
"def show_tree(clf, feature_names):\n",
2690-
" dot_data = tree.export_graphviz(clf, out_file=None, \n",
2691-
" feature_names= feature_names,\n",
2692-
" class_names=[\"BUG\", \"NO_BUG\"], \n",
2693-
" filled=True, rounded=True) \n",
2694-
" return graphviz.Source(dot_data)"
2716+
"final_tree = trees[MAX_ITERATION-1]\n",
2717+
"final_tree"
26952718
]
26962719
},
26972720
{
@@ -2701,14 +2724,23 @@
27012724
"outputs": [],
27022725
"source": [
27032726
"all_features = extract_existence(CALC_GRAMMAR) + extract_numeric(CALC_GRAMMAR)\n",
2704-
"# show_tree(trees[MAX_ITERATION-1], all_features)"
2727+
"all_feature_names = [f.name for f in all_features]"
2728+
]
2729+
},
2730+
{
2731+
"cell_type": "code",
2732+
"execution_count": null,
2733+
"metadata": {},
2734+
"outputs": [],
2735+
"source": [
2736+
"show_decision_tree(final_tree, all_feature_names)"
27052737
]
27062738
},
27072739
{
27082740
"cell_type": "markdown",
27092741
"metadata": {},
27102742
"source": [
2711-
"**Info:** The decision tree may contain unnecessary long paths, where the bug-class does not change. You can use the function 'remove_unequal_decisions(decision_tree)' to remove those nodes."
2743+
"**Info:** The decision tree may contain unnecessary long paths, where the bug-class does not change. You can use the function `remove_unequal_decisions(decision_tree)` to remove those nodes."
27122744
]
27132745
},
27142746
{
@@ -2717,14 +2749,9 @@
27172749
"metadata": {},
27182750
"outputs": [],
27192751
"source": [
2720-
"show_tree(remove_unequal_decisions(trees[MAX_ITERATION-1]), all_features)"
2752+
"show_decision_tree(remove_unequal_decisions(final_tree), all_feature_names)"
27212753
]
27222754
},
2723-
{
2724-
"cell_type": "markdown",
2725-
"metadata": {},
2726-
"source": []
2727-
},
27282755
{
27292756
"cell_type": "markdown",
27302757
"metadata": {},

0 commit comments

Comments
 (0)