|
214 | 214 | "outputs": [], |
215 | 215 | "source": [ |
216 | 216 | "# Load initial input files\n", |
217 | | - "sample_list = ['sqrt(-16)', 'sqrt(4)']" |
| 217 | + "initial_sample_list = ['sqrt(-16)', 'sqrt(4)']" |
218 | 218 | ] |
219 | 219 | }, |
220 | 220 | { |
|
604 | 604 | " @abstractmethod\n", |
605 | 605 | " def get_feature_value(self, derivation_tree) -> float:\n", |
606 | 606 | " '''Returns the feature value for a given derivation tree of an input.'''\n", |
607 | | - " pass" |
| 607 | + " pass\n", |
| 608 | + "\n", |
| 609 | + " def replace(self, new_key: str) -> 'Feature':\n", |
| 610 | + " '''Returns a new feature with the same name but a different key.'''\n", |
| 611 | + " return self.__class__(self.name, self.rule, new_key)" |
608 | 612 | ] |
609 | 613 | }, |
610 | 614 | { |
|
1113 | 1117 | "source": [ |
1114 | 1118 | "from sklearn.tree import DecisionTreeClassifier\n", |
1115 | 1119 | "from sklearn.feature_extraction import DictVectorizer\n", |
1116 | | - "from sklearn import tree\n", |
1117 | 1120 | "\n", |
1118 | 1121 | "import graphviz" |
1119 | 1122 | ] |
|
1220 | 1223 | "metadata": {}, |
1221 | 1224 | "outputs": [], |
1222 | 1225 | "source": [ |
1223 | | - "dot_data = tree.export_graphviz(clf, out_file=None,\n", |
1224 | | - " feature_names=vec.get_feature_names_out(),\n", |
1225 | | - " class_names=[\"BUG\", \"NO BUG\"],\n", |
1226 | | - " filled=True, rounded=True)\n", |
1227 | | - "graph = graphviz.Source(dot_data)" |
| 1226 | + "import graphviz\n", |
| 1227 | + "import sklearn\n", |
| 1228 | + "\n", |
| 1229 | + "def show_decision_tree(clf, feature_names):\n", |
| 1230 | + " dot_data = sklearn.tree.export_graphviz(clf, out_file=None, \n", |
| 1231 | + " feature_names=feature_names,\n", |
| 1232 | + " class_names=[\"BUG\", \"NO_BUG\"], \n", |
| 1233 | + " filled=True, rounded=True) \n", |
| 1234 | + " return graphviz.Source(dot_data)" |
1228 | 1235 | ] |
1229 | 1236 | }, |
1230 | 1237 | { |
|
1233 | 1240 | "metadata": {}, |
1234 | 1241 | "outputs": [], |
1235 | 1242 | "source": [ |
1236 | | - "display(graph)" |
| 1243 | + "show_decision_tree(clf, vec.get_feature_names_out())" |
1237 | 1244 | ] |
1238 | 1245 | }, |
1239 | 1246 | { |
|
1309 | 1316 | "source": [ |
1310 | 1317 | "def train_tree(data):\n", |
1311 | 1318 | " sample_bug_count = len(data[(data[\"oracle\"].astype(str) == \"BUG\")])\n", |
| 1319 | + " assert sample_bug_count > 0, \"No bug samples found\"\n", |
1312 | 1320 | " sample_count = len(data)\n", |
1313 | 1321 | "\n", |
1314 | 1322 | " clf = DecisionTreeClassifier(min_samples_leaf=1,\n", |
|
1392 | 1400 | "clf = clf.fit(X_data, oracle)\n", |
1393 | 1401 | "\n", |
1394 | 1402 | "import graphviz\n", |
1395 | | - "dot_data = tree.export_graphviz(clf, out_file=None, \n", |
| 1403 | + "dot_data = sklearn.tree.export_graphviz(clf, out_file=None, \n", |
1396 | 1404 | " feature_names=feature_names,\n", |
1397 | 1405 | " class_names=[\"BUG\", \"NO BUG\"], \n", |
1398 | 1406 | " filled=True, rounded=True) \n", |
|
1697 | 1705 | " mini = row['min']\n", |
1698 | 1706 | " maxi = row['max']\n", |
1699 | 1707 | " if (not np.isinf(mini)) or (not np.isinf(maxi)):\n", |
1700 | | - " requirements.append(Requirement(feature, mini, maxi))\n", |
| 1708 | + " requirements.append(TreeRequirement(feature, mini, maxi))\n", |
1701 | 1709 | " paths.append(TreePath(None, is_bug, requirements))\n", |
1702 | 1710 | "\n", |
1703 | | - " return paths\n", |
1704 | | - "\n", |
1705 | | - "\n", |
1706 | | - "class Requirement:\n", |
| 1711 | + " return paths\n" |
| 1712 | + ] |
| 1713 | + }, |
| 1714 | + { |
| 1715 | + "cell_type": "code", |
| 1716 | + "execution_count": null, |
| 1717 | + "metadata": {}, |
| 1718 | + "outputs": [], |
| 1719 | + "source": [ |
| 1720 | + "class TreeRequirement:\n", |
1707 | 1721 | "\n", |
1708 | 1722 | " def __init__(self, feature: Feature, mini, maxi):\n", |
1709 | 1723 | " self.__feature: Feature = feature\n", |
|
1788 | 1802 | "\n", |
1789 | 1803 | "class TreePath:\n", |
1790 | 1804 | "\n", |
1791 | | - " def __init__(self, samplefile: Optional[Path], is_bug: bool, requirements: List[Requirement]):\n", |
| 1805 | + " def __init__(self, samplefile: Optional[Path], is_bug: bool, requirements: List[TreeRequirement]):\n", |
1792 | 1806 | " self.__sample = samplefile\n", |
1793 | 1807 | " self.__is_bug = is_bug\n", |
1794 | | - " self.__requirements: List[Requirement] = requirements\n", |
| 1808 | + " self.__requirements: List[TreeRequirement] = requirements\n", |
1795 | 1809 | "\n", |
1796 | 1810 | " def is_bug(self) -> bool:\n", |
1797 | 1811 | " return self.__is_bug\n", |
|
2152 | 2166 | "from typing import List\n", |
2153 | 2167 | "from fuzzingbook.GrammarFuzzer import DerivationTree\n", |
2154 | 2168 | "\n", |
2155 | | - "class Requirement:\n", |
| 2169 | + "class SpecRequirement:\n", |
2156 | 2170 | " '''\n", |
2157 | 2171 | " This class represents a requirement for a new input sample that should be generated.\n", |
2158 | 2172 | " This class contains the feature that should be fullfiled (Feature), a quantifier\n", |
|
2171 | 2185 | " self.value = value\n", |
2172 | 2186 | "\n", |
2173 | 2187 | " def __str__(self):\n", |
2174 | | - " return f\"Requirement({self.feature.name} {self.quant} {self.value})\"\n", |
2175 | | - "\n", |
2176 | | - "\n", |
| 2188 | + " return f\"Requirement({self.feature.name} {self.quant} {self.value})\"" |
| 2189 | + ] |
| 2190 | + }, |
| 2191 | + { |
| 2192 | + "cell_type": "code", |
| 2193 | + "execution_count": null, |
| 2194 | + "metadata": {}, |
| 2195 | + "outputs": [], |
| 2196 | + "source": [ |
2177 | 2197 | "class InputSpecification:\n", |
2178 | 2198 | " '''\n", |
2179 | 2199 | " This class represents a complet input specification of a new input. A input specification\n", |
2180 | 2200 | " consists of one or more requirements.\n", |
2181 | 2201 | " requirements : Is a list of all requirements that must be used.\n", |
2182 | 2202 | " '''\n", |
2183 | 2203 | "\n", |
2184 | | - " def __init__(self, requirements: List[Requirement]):\n", |
2185 | | - " self.requirements: List[Reqirement] = requirements\n", |
| 2204 | + " def __init__(self, requirements: List[SpecRequirement]):\n", |
| 2205 | + " self.requirements: List[SpecRequirement] = requirements\n", |
2186 | 2206 | "\n", |
2187 | 2207 | " def __str__(self):\n", |
2188 | 2208 | " # Handle first element\n", |
|
2227 | 2247 | " if f.name == feature_name:\n", |
2228 | 2248 | " feature_class = f\n", |
2229 | 2249 | "\n", |
2230 | | - " requirement_list.append(Requirement(feature_class, quant, value))\n", |
| 2250 | + " requirement_list.append(SpecRequirement(feature_class, quant, value))\n", |
2231 | 2251 | "\n", |
2232 | 2252 | " return InputSpecification(requirement_list)\n", |
2233 | 2253 | "\n", |
|
2489 | 2509 | " return final_samples" |
2490 | 2510 | ] |
2491 | 2511 | }, |
| 2512 | + { |
| 2513 | + "cell_type": "code", |
| 2514 | + "execution_count": null, |
| 2515 | + "metadata": {}, |
| 2516 | + "outputs": [], |
| 2517 | + "source": [ |
| 2518 | + "generate_samples = generate_samples_advanced" |
| 2519 | + ] |
| 2520 | + }, |
2492 | 2521 | { |
2493 | 2522 | "cell_type": "markdown", |
2494 | 2523 | "metadata": {}, |
|
2524 | 2553 | "exsqrt = ExistenceFeature('exists(<function>@0)', '<function>', 'sqrt')\n", |
2525 | 2554 | "exdigit = ExistenceFeature('exists(<digit>)', '<digit>', '<digit>')\n", |
2526 | 2555 | "\n", |
2527 | | - "reqDigit = Requirement(exdigit, '>', '0.5')\n", |
2528 | | - "fbdDigit = Requirement(exdigit, '<=', '0.5')\n", |
| 2556 | + "reqDigit = SpecRequirement(exdigit, '>', '0.5')\n", |
| 2557 | + "fbdDigit = SpecRequirement(exdigit, '<=', '0.5')\n", |
2529 | 2558 | "\n", |
2530 | | - "req0 = Requirement(exsqrt, '>', '-6.0')\n", |
| 2559 | + "req0 = SpecRequirement(exsqrt, '>', '-6.0')\n", |
2531 | 2560 | "testspec0 = InputSpecification([req0, reqDigit])\n", |
2532 | | - "req1 = Requirement(exsqrt, '<=', '-6.0')\n", |
| 2561 | + "req1 = SpecRequirement(exsqrt, '<=', '-6.0')\n", |
2533 | 2562 | "testspec1 = InputSpecification([req1, fbdDigit])\n", |
2534 | 2563 | "\n", |
2535 | 2564 | "numterm = NumericInterpretation('num(<term>)', '<term>')\n", |
2536 | | - "req2 = Requirement(numterm, '<', '-31.0')\n", |
| 2565 | + "req2 = SpecRequirement(numterm, '<', '-31.0')\n", |
2537 | 2566 | "testspec2 = InputSpecification([req2, req0, reqDigit])\n", |
2538 | 2567 | "\n", |
2539 | 2568 | "print('--generating samples--')\n", |
|
2663 | 2692 | "\n", |
2664 | 2693 | "# let's initialize Alhazen\n", |
2665 | 2694 | "# let's use the previously used sample_list (['sqrt(-16)', 'sqrt(4)'])\n", |
2666 | | - "alhazen = Alhazen(sample_list, CALC_GRAMMAR, MAX_ITERATION, GENERATOR_TIMEOUT)\n", |
| 2695 | + "alhazen = Alhazen(initial_sample_list,\n", |
| 2696 | + " CALC_GRAMMAR, MAX_ITERATION, GENERATOR_TIMEOUT)\n", |
2667 | 2697 | "\n", |
2668 | 2698 | "# and run it\n", |
2669 | 2699 | "# Alhazen returns a list of all the iteratively learned decision trees\n", |
|
2674 | 2704 | "cell_type": "markdown", |
2675 | 2705 | "metadata": {}, |
2676 | 2706 | "source": [ |
2677 | | - "\n", |
2678 | | - "</hr>\n", |
2679 | | - "\n", |
2680 | | - "Let's display the final decision tree learned by Alhazen. You can use the function `show_tree(decison_tree, features)` to display the final tree." |
| 2707 | + "Let's display the final decision tree learned by Alhazen. You can use the function `show_tree(decision_tree, features)` to display the final tree." |
2681 | 2708 | ] |
2682 | 2709 | }, |
2683 | 2710 | { |
|
2686 | 2713 | "metadata": {}, |
2687 | 2714 | "outputs": [], |
2688 | 2715 | "source": [ |
2689 | | - "def show_tree(clf, feature_names):\n", |
2690 | | - " dot_data = tree.export_graphviz(clf, out_file=None, \n", |
2691 | | - " feature_names= feature_names,\n", |
2692 | | - " class_names=[\"BUG\", \"NO_BUG\"], \n", |
2693 | | - " filled=True, rounded=True) \n", |
2694 | | - " return graphviz.Source(dot_data)" |
| 2716 | + "final_tree = trees[MAX_ITERATION-1]\n", |
| 2717 | + "final_tree" |
2695 | 2718 | ] |
2696 | 2719 | }, |
2697 | 2720 | { |
|
2701 | 2724 | "outputs": [], |
2702 | 2725 | "source": [ |
2703 | 2726 | "all_features = extract_existence(CALC_GRAMMAR) + extract_numeric(CALC_GRAMMAR)\n", |
2704 | | - "# show_tree(trees[MAX_ITERATION-1], all_features)" |
| 2727 | + "all_feature_names = [f.name for f in all_features]" |
| 2728 | + ] |
| 2729 | + }, |
| 2730 | + { |
| 2731 | + "cell_type": "code", |
| 2732 | + "execution_count": null, |
| 2733 | + "metadata": {}, |
| 2734 | + "outputs": [], |
| 2735 | + "source": [ |
| 2736 | + "show_decision_tree(final_tree, all_feature_names)" |
2705 | 2737 | ] |
2706 | 2738 | }, |
2707 | 2739 | { |
2708 | 2740 | "cell_type": "markdown", |
2709 | 2741 | "metadata": {}, |
2710 | 2742 | "source": [ |
2711 | | - "**Info:** The decision tree may contain unnecessary long paths, where the bug-class does not change. You can use the function 'remove_unequal_decisions(decision_tree)' to remove those nodes." |
| 2743 | + "**Info:** The decision tree may contain unnecessary long paths, where the bug-class does not change. You can use the function `remove_unequal_decisions(decision_tree)` to remove those nodes." |
2712 | 2744 | ] |
2713 | 2745 | }, |
2714 | 2746 | { |
|
2717 | 2749 | "metadata": {}, |
2718 | 2750 | "outputs": [], |
2719 | 2751 | "source": [ |
2720 | | - "show_tree(remove_unequal_decisions(trees[MAX_ITERATION-1]), all_features)" |
| 2752 | + "show_decision_tree(remove_unequal_decisions(final_tree), all_feature_names)" |
2721 | 2753 | ] |
2722 | 2754 | }, |
2723 | | - { |
2724 | | - "cell_type": "markdown", |
2725 | | - "metadata": {}, |
2726 | | - "source": [] |
2727 | | - }, |
2728 | 2755 | { |
2729 | 2756 | "cell_type": "markdown", |
2730 | 2757 | "metadata": {}, |
|
0 commit comments