CCSDS_study project

This commit is contained in:
2026-05-05 21:54:35 +08:00
commit 9be41f9270
585 changed files with 91275 additions and 0 deletions

View File

View File

@@ -0,0 +1,248 @@
# -*- coding: utf-8 -*-
#
# Netzob documentation build configuration file, created by
# sphinx-quickstart on Mon Aug 15 01:14:04 2011.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
sys.path.insert(0, os.path.abspath('../../../lib/'))
sys.path.insert(0, os.path.abspath('../../../lib/libNeedleman/'))
sys.path.insert(0, os.path.abspath('../../../src/'))
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.doctest']
autodoc_default_flags = ['members']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Netzob'
copyright = u'2011-2022, Frédéric Guihéry, Georges Bossert'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '2.0'
# The full version, including alpha/beta/rc tags.
release = '2.0.0.dev0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ["all.py"]
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
add_module_names = False
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'sphinx_book_theme'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {'show_navbar_depth': 1, "home_page_in_toc": True}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
html_title = "Netzob Documentation"
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = "zoby.png"
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = "favicon.ico"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'Netzobdoc'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'Netzob.tex', u'Netzob Documentation',
u'Frédéric Guihéry, Georges Bossert', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
# man_pages = [
# ('index', 'netzob', u'Netzob Documentation',
# [u'Frédéric Guihéry, Georges Bossert'], 1)
# ]
# -- Options for Netzob documentation generation--------------------------------
def setup(app):
app.add_config_value('scope', 'netzob', 'env')
# -- Options for apidoc generation in rtfd.org----------------------------------
# from unittest.mock import MagicMock
# class Mock(MagicMock):
# @classmethod
# def __getattr__(cls, name):
# return MagicMock()
# MOCK_MODULES = ['pcapy', 'numpy']
# sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
# on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
# if on_rtd:
# os.system("sphinx-apidoc -T -f -o ./developer_guide/API/ ../../../src/netzob")
# # In order to render a nice toctree, add a maxdepth in each file
# #os.system("find ./developer_guide/API/ -type f -exec sed -i ':a;N;$!ba;s/Subpackages\n-----------\n\n.. toctree::\n/Subpackages\n-----------\n\n.. toctree::\n :maxdepth: 1\n /g' {} +")

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

View File

@@ -0,0 +1,265 @@
.. currentmodule:: netzob
====================
Netzob documentation
====================
**Netzob** is an open source tool for reverse engineering,
modelization, traffic generation and fuzzing of communication
protocols.
Netzob is suitable for reversing network protocols, structured files
and system and process flows (IPC and communication with drivers and
devices). Netzob handles different types of protocols: text protocols
(like HTTP and IRC), delimiter-based protocols, fixed fields protocols
(like IP and TCP) and variable-length fields protocols (like TLV-based
protocols).
Netzob can be used to infer the message format and the state machine
of a protocol through passive and active processes. Its objective is
to bring state of art academic researches to the operational field, by
leveraging bio-informatic and grammatical inferring algorithms in a
semi-automatic manner.
Once modeled or inferred, a protocol model can be used in our traffic
generation engine, to allow simulation of realistic and controllable
communication endpoints and flows.
The main features of Netzob are:
**Protocol Modelization**
Netzob includes a complete model to represent the message format (aka its vocabulary)
and the state machine of a protocol (aka its grammar).
**Protocol Inference**
The vocabulary and grammar inference
component provides both passive and
active reverse engineering of communication flows through automated
and manuals mechanisms.
**Traffic Generation**
Given vocabulary and grammar models previously
inferred or modelized, Netzob can understand and generate communication traffic
with remote peers. It can thus act as either a client, a server or
both.
**Protocol Fuzzing**
Netzob helps security evaluators by simplifying the creation of
fuzzers for proprietary or undocumented protocols. Netzob considers the format message and state machine of the
protocol to generate optimized and specific test cases. Both mutation and generation are available for fuzzing.
**Import Communication Traces**
Data import is available in two ways: either by
leveraging the channel-specific captors (currently network and IPC --
Inter-Process Communication), or by using specific importers (such as
PCAP files, structured files and OSpy files).
**Export Protocol Models**
This module permits to export an model of
a protocol in formats that are understandable by third party software
or by a human. Current work focuses on export format compatible with
main traffic dissectors (Wireshark and Scapy) and fuzzers (Peach and
Sulley).
A :ref:`dedicated tutorial<discover_features>` gives you an overview of the main features in practice.
Netzob has been initiated by security auditors of AMOSSYS and the
CIDre research team of Supélec to address the reverse engineering of
communication protocols.
Follow us on Twitter: `@Netzob <https://twitter.com/netzob>`_.
Example of Ethernet IEEE 802.3 Modelization
===========================================
This quick example illustrates format message modelization, with fixed-size
fields and several relationship fields (CRC32, Size and Padding).
.. code-block:: python
>>> from netzob.all import *
>>>
>>> eth_length = Field(bitarray('0000000000000000'), "eth.length")
>>> eth_llc = Field(Raw(nbBytes=3), "eth.llc") # IEEE 802.2 header
>>> eth_payload = Field(Raw(), name="eth.payload")
>>> eth_padding = Field(Padding([eth_length,
... eth_llc,
... eth_payload],
... data=Raw(nbBytes=1),
... modulo=8*60),
... "eth.padding")
>>>
>>> eth_crc_802_3 = Field(bitarray('00000000000000000000000000000000'), "eth.crc")
>>> eth_crc_802_3.domain = CRC32([eth_length,
... eth_llc,
... eth_payload,
... eth_padding],
... dataType=Raw(nbBytes=4,
... unitSize=UnitSize.SIZE_32))
>>>
>>> eth_length.domain = Size([eth_llc, eth_payload],
... dataType=uint16(), factor=1./8)
>>>
>>> symbol = Symbol(name="ethernet_802_3",
... fields=[eth_length,
... eth_llc,
... eth_payload,
... eth_padding,
... eth_crc_802_3])
>>> print(symbol.str_structure())
ethernet_802_3
|-- eth.length
|-- Size(['eth.llc', 'eth.payload']) - Type:Integer(0,65535)
|-- eth.llc
|-- Data (Raw(nbBytes=3))
|-- eth.payload
|-- Data (Raw(nbBytes=(0,8192)))
|-- eth.padding
|-- Padding(['eth.length', 'eth.llc', 'eth.payload']) - Type:Raw(nbBytes=1)
|-- eth.crc
|-- Relation(['eth.length', 'eth.llc', 'eth.payload', 'eth.padding']) - Type:Raw(nbBytes=4)
Installation of Netzob
======================
.. toctree::
:maxdepth: 2
:caption: Installation
installation/python
..
installation/debian
installation/gentoo
installation/windows
Protocol Modelization with Netzob
=================================
.. toctree::
:maxdepth: 2
:caption: Protocol Modelization
language_specification/dataspec
language_specification/statemachinespec
language_specification/protospec
Protocol Inference with Netzob
==============================
*Note: this section should be completed*
..
.. toctree::
:maxdepth: 2
user_guide/inference/index
Traffic Generation with Netzob
==============================
.. toctree::
:maxdepth: 2
:caption: Traffic Generation
language_specification/trafficgeneration
language_specification/actor
.. note::
Several examples of actor usages are provided below:
* :ref:`Common automaton for a client and a server<ActorExample1>`
* :ref:`Dedicated automaton for a client and a server<ActorExample2>`
* :ref:`Modification of the emitted symbol by a client through a callback<ActorExample3>`
* :ref:`Modification of the emitted symbol by a server through a callback<ActorExample4>`
* :ref:`Modification of the current transition by a client through a callback<ActorExample5>`
* :ref:`Modification of the current transition of a server through a callback<ActorExample6>`
* :ref:`Transition with no input symbol<ActorExample7>`
* :ref:`How to catch all read symbol timeout<ActorExample8>`
* :ref:`How to catch all receptions of unexpected symbols<ActorExample9>`
* :ref:`How to catch all receptions of unknown messages<ActorExample10>`
* :ref:`Several actors on the same communication channel<ActorExample13>`
Protocol Fuzzing with Netzob
============================
Fuzzing can be applied on format message, state machine or both at the
same time. Fuzzing strategies may leverage either mutation of
generation approaches.
.. toctree::
:maxdepth: 2
:caption: Fuzzing
language_specification/fuzzing
language_specification/fuzzing_automata
.. note::
Thanks to the :class:`Actor <netzob.Simulator.Actor.Actor>`
componant, it is possible to fuzz specific messages at specific
states in an automaton. This allows to defined fine-tuned fuzzing
strategies. Several examples of actor usages in a fuzzing context
are provided below:
* :ref:`Message format fuzzing from an actor<ActorExample11>`
* :ref:`Message format fuzzing from an actor, at a specific state<ActorExample12>`
Import Communication Traces with Netzob
=======================================
Netzob supports import of communication traces from the following resources:
* Raw messages
* Raw files
* PCAP files
Export Protocol Models with Netzob
==================================
Netzob supports export of protocols in the following formats:
* XML meta representation
* Scapy Dissector
* :ref:`Wireshark Dissector<tutorial_wireshark>`
..
Tutorials
=========
.. :ref:`Get started with Netzob<tutorial_get_started>`
The goal of this tutorial is to present the usage of each main
component of Netzob (inference of message format, construction of
the state machine and generation of traffic) through an undocumented
protocol.
..
:ref:`Discover features of Netzob<discover_features>`
The goal of this tutorial is to present the usage of each main
component of Netzob (inference of message format, construction of
the state machine, generation of traffic and fuzzing) through an undocumented
protocol.
..
:ref:`Modeling your Protocol with Netzob<tutorial_modeling_protocol>`
This tutorial details the main features of Netzob's protocol modeling
aspects. It shows how your protocol fields can be described with Netzob's
language.
.. :ref:`Auto-generation of Peach pit files/fuzzers<tutorial_peach>`
This tutorial shows how to take advantage of the Peach exporter
plugin provided in Netzob to automatically generate Peach pit
configuration files, thus allowing to do smart fuzzing on
undocumented protocols.
.. :ref:`Auto-generation of Wireshark dissectors<tutorial_wireshark>`
This tutorial shows how to leverage Netzob' format message inference
in order to automatically generate Wireshark dissectors for
proprietary or undocumented protocols.
Licences
========
Netzob code in provided under the GPLv3 licence.
The documentation is under the CC-BY-SA licence.

View File

@@ -0,0 +1,99 @@
.. currentmodule:: netzob
.. _installation_debian:
Installation documentation on Debian
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Using Netzob's APT Repository
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
A dedicated APT repository (apt.netzob.org) is available for downloading
and installing Netzob.
Steps:
#. edit you ``/etc/apt/sources.list`` to add the netzob's repository
URL,
#. import the GPG key used to sign the repository,
#. install netzob threw ``apt-get``.
Edit ``/etc/apt/sources.list``
You need to register the repository in your APT client by adding the
following entry (stable or unstable) in your ``/etc/apt/sources.list``
or through a dedicated file in ``/etc/apt/sources.list.d/``. Then you
need to import the gpg public key used to sign the repository.
**Unstable & testing ("Wheezy")**
::
deb http://apt.netzob.org/debian/ unstable main
deb-src http://apt.netzob.org/debian/ unstable main
**Stable ("Squeeze")**
::
deb http://apt.netzob.org/debian/ squeeze-backports main
deb-src http://apt.netzob.org/debian/ squeeze-backports main
Import GPG key\ <#Import-GPG-key>`_
The repository is signed, so APT may complain until you register the
archive key ``0xE57AEA26`` to your APT keyring. The fingerprint of the
key is ``D865 DCF0 9B9A 195C 49F0 E3F3 F750 1A13 E57A EA26`` and has
been signed by the followings:
- 0xA255A6A3 : Georges Bossert
<`georges.bossert@supelec.fr <mailto:georges.bossert@supelec.fr>`_\ >
- 0x561F7A47 : Frederic Guihery
<`frederic.guihery@amossys.fr <mailto:frederic.guihery@amossys.fr>`_\ >
- 0x04B1A89C : Olivier Tetard
<`olivier.tetard@amossys.fr <mailto:olivier.tetard@amossys.fr>`_\ >
To import the key of the APT repository you can execute the following
commands :
::
# wget https://dev.netzob.org/misc/debian_archive.asc -O -| gpg --import
# gpg --export -a 0xF7501A13E57AEA26 | sudo apt-key add -
Install netzob\ <#Install-netzob>`_
You can install it with the following commands :
::
# apt-get update
# apt-get install netzob
Using the provided Debian package
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Installing Netzob directly from the debian package (deb file) implies
you manually install the necessary packages in order to handle the
required dependencies. Therefore, the following commands can be executed
to install them :
::
# apt-get install python python-ptrace python-hachoir-subfile python-matplotlib python-dpkt strace lsof python-pcapy python-bitarray python-dev libjs-sphinxdoc python-sphinx
Once the requirements are fullfilled you can download the debian file
(i386 or amd64) and install it using the following command for an i386
architecture (32 bits) :
::
# dpkg -i netzob_0.3.0-1_i386.deb
or for an AMD64 (64 bits) :
::
# dpkg -i netzob_0.3.0-1_amd64.deb

View File

@@ -0,0 +1,80 @@
.. currentmodule:: netzob
.. _installation_gentoo:
Installation documentation on Gentoo
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
From official portage (not yet available)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Some build scripts have been published for future integration in
Portage.
While the scripts have not yet been accepted please refer to the
alternative procedure.
::
# emerge -av netzob
From Gentoo overlay (recommended, automatic)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Alternative non official repositories are available on Gentoo which are
called "overlays".
The tool used to synchronize with these repositories is called "layman"
#. Installing layman
::
# emerge app-portage/layman
#. Adding "lootr" repository containing Netzob ebuild scripts
::
# layman -a lootr
#. Installing Netzob from this repository
::
# emerge -av dev-python/netzob
From netzob repository (expert users only, manual installation)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
First step is to clone the netzob repository:
::
# (~) git clone https://dev.netzob.org/git/netzob-gentoo.git
Then, declare this repository in the portage configuration file
*/etc/make.conf* by adding this line:
::
PORTDIR_OVERLAY="/home/USER/netzob-gentoo/"
Synchronize portage
::
# emerge --sync
Finally emerge Netzob package:
- *tildarched (testing-like) systems:*
::
# emerge -av netzob
- *stable systems:*
::
# ACCEPT_KEYWORDS="~x86" emerge -av netzob

View File

@@ -0,0 +1,66 @@
.. currentmodule:: netzob
.. _installation_python:
Installation of Netzob
======================
This page presents how to install Netzob as a Python package.
Installing Netzob system dependencies
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
First thing to do is to check the version of your python3 interpretor.
Netzob requires at least Python 3.8::
$ python3 --version
Python 3.8.10
You have to install the following system dependencies::
$ apt-get install -y python3 python3-dev python3-setuptools virtualenv build-essential libpcap-dev libgraph-easy-perl libffi-dev
Then, create a virtualenv::
$ mkdir venv
$ virtualenv venv
$ source venv/bin/activate
Installing Netzob from Pypi
^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can install Netzob from Pypi (recommended choice)::
(venv) $ pip3 install netzob
Installing Netzob from sources
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If you have retrieved Netzob sources, the installation procedure is::
(venv) $ pip3 install Cython==0.29.32 # Should be manually installed because of setup.py direct dependency
(venv) $ pip3 install -e .
API usage
---------
Once installed, we recommend to use the Netzob API inside scripts, with the following statement to import Netzob::
from netzob.all import *
Start Netzob CLI
----------------
Netzob also provides its own CLI, in order to play interactively with it::
(venv) $ netzob
Building the documentation
^^^^^^^^^^^^^^^^^^^^^^^^^^
The folder *doc/documentation* contains all the documentation of Netzob.
The user manual can be generated based on RST sources located in folder
*doc/documentation/source* with the following command::
$ sphinx-build -b html doc/documentation/source/ doc/documentation/build/

View File

@@ -0,0 +1,57 @@
.. currentmodule:: netzob
.. _installation_windows:
Installation documentation on Windows
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This documentation only applies for Netzob 0.3.3.
Installation of dependencies
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Steps:
#. Install Python 2.7 (download the installer from
`python.org <http://www.python.org/ftp/python/2.7.3/python-2.7.3.msi>`_)
#. Install SetupTools (download the installer from
`pypi.python.org <http://pypi.python.org/packages/2.7/s/setuptools/setuptools-0.6c11.win32-py2.7.exe#md5=57e1e64f6b7c7f1d2eddfc9746bbaf20>`_)
#. Install PyGTK (download the installer from
`gnome.org <http://ftp.gnome.org/pub/GNOME/binaries/win32/pygtk/2.24/pygtk-all-in-one-2.24.2.win32-py2.7.msi>`_)
#. Install WinPCap 4.1.2 (download the installer from
`winpcap.org <http://www.winpcap.org/install/bin/WinPcap_4_1_2.exe>`_)
#. Install Pcapy 0.10.5 (provided on `Netzob's
website <http://www.netzob.org/repository/0.3.3/windows-dep/pcapy-0.10.5.win32-py2.7.exe>`_
; original source:
`oss.coresecurity.com <http://oss.coresecurity.com/repo/pcapy-0.10.5.tar.gz>`_)
#. Install following dependencies with SetupTools (be sure to have
C:\\Python27\\Scripts\\easy\_install.exe in your PATH):
#. ::
easy_install numpy
#. ::
easy_install impacket
#. ::
easy_install -f "http://downloads.sourceforge.net/project/matplotlib/matplotlib/matplotlib-1.1.0/matplotlib-1.1.0.win32-py2.7.exe?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fmatplotlib%2Ffiles%2Fmatplotlib%2Fmatplotlib-1.1.0%2F&ts=1339591175&use_mirror=netcologne" matplotlib
#. ::
easy_install bitarray==0.3.5
Installation of Netzob
^^^^^^^^^^^^^^^^^^^^^^
#. Install
`Netzob <http://www.netzob.org/repository/0.3.3/Netzob-0.3.3.win32-py2.7.exe>`_
!
**Remark:** If you have disabled Windows UAC, a error can be raised by
Windows when executing Netzob's installer: Failed to start elevated
process (ShellExecute returned 3). So you have to run the installer with
administrator privilege : right-click on the executable and choose "run
as administrator".

View File

@@ -0,0 +1,23 @@
.. _actor:
Visiting a State Machine with an Actor
======================================
An **actor** (see :class:`~netzob.Simulator.Actor.Actor`) is a high-level representation of an entity that participates in a communication. An actor communicates with remote peers, in respect to an automaton (an actor is in fact a visitor of an automaton), and exchanges abstract representation of messages called Symbols.
In the API, a visitor of a state machine is modeled using the Actor
class.
.. autoclass:: netzob.Simulator.Actor.Actor
.. Note: we comment this section, as the figure is not referenced in the language specification.
.. figure:: ./img/transition_without_input_symbol_sequence.png
:align: center
Sequence diagram showing state transitions according to messages exchanged
between Alice and Bob.
.. raw:: latex
\newpage

View File

@@ -0,0 +1,506 @@
.. _dataspec:
.. _format_message_modeling:
Format Message Modeling
=======================
The Netzob Description Language (ZDL) is the API exposed by the Netzob library
to model data structures employed in communication protocols.
This textual language has been designed in order to be easily understandable
by a human. It enables the user to describe a protocol through dedicated
*\*.zdl* files, which are independent of the API and core of the library.
The ZDL language has been designed with attention to its expressiveness.
In this chapter, firstly, the main concepts of the ZDL language are presented,
then its expressiveness in terms of data types,
constraints and relationships are explained.
Format Message Modeling Concepts
--------------------------------
Definitions: Symbol, Field, Variable
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In the Netzob library, the set of valid messages and their formats are
represented through **symbols**. A symbol represents all the messages
that share a similar objective from a protocol perspective. For
example, the HTTP_GET symbol would describe any HTTP request with the
GET method being set. A symbol can be specialized into a context-valid
message and a message can be abstracted into a symbol.
A **field** describes a chunk of the symbol and is defined by a
**definition domain**, representing the set of values the field handles.
To support complex domains, a definition domain is represented by a tree where
each vertex is a **Variable**. There are three kinds of variables:
* **Data variables**, which describes data whose value is of a given **type**. Various types are provided with the library, such as String, Integer, Raw and BitArray.
* **Relationship variables**, which make it possible to model a relationship between a variable and a list of variables or fields. Besides, relationships can be done between fields of different symbols, thus making it possible to model both **intra-symbol relationships** and **inter-symbol relationships**.
* **Node variables**, which accept one or more children variables.
Node variables can be used to construct complex definition domains,
such as:
* **Aggregate node variable**, which can be used to model a concatenation of
variables.
* **Alternate node variable**, which can be used to model an alternative of
multiple variables.
* **Repeat node variable**, which can be used to model a repetition of a
variable.
* **Optional node variable**, which can be used to model a variable
that may or may not be present.
As an illustration of these concepts, the following figure presents the
definition of a Symbol structured with three Fields.
The first field contains an alternative between String Data with a constant
string and Integer Data with a constant value. The second field is String
Data with a variable length string.
The third field depicts an Integer whose value is the size of the second string.
.. figure:: img/netzob_vocabulary_model.*
:align: center
Example of Symbol definition and relationships with Field and Variable objects.
Abstraction and Specialization of Symbols
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The use of a symbolic model is required to represent the message formats of a protocol in a compact way. However, as the objective of this platform is to analyze the robustness of a target implementation, this implies that the testing tool should be able to exchange messages with this target. We therefore need to abstract received messages into symbols that can be used by the protocol model. Conversely, we also need to specialize symbols produced by the protocol model into valid messages. To achieve this, we use an **abstraction** method (*ABS*) and a **specialization** (*SPE*) method. As illustrated in the following figure, these methods play the role of an interface between the symbolic protocol model and a communication channel on which concrete messages transit.
.. figure:: img/abstractionAndSpecialization.*
:align: center
Abstraction (ABS) and Specialization (SPE) methods are interfaces between the protocol symbols and the wire messages.
To compute or verify the constraints and relationships that
participate in the definition of the fields, the library relies on a
:class:`~netzob.Model.Vocabulary.Domain.Variables.Memory.Memory`. This memory stores the value of previously captured or emitted
fields. More precisely, the memory contains all the variables that are
needed according to the field definition during the abstraction and
specialization processes.
.. raw:: latex
\newpage
Modeling Data Types
-------------------
The library enables the modeling of the following data types:
* **Integer**: The Integer type is a wrapper for the Python integer object with the capability to express more constraints regarding the sign, endianness and unit size.
* **HexaString**: The HexaString type makes it possible to describe a sequence of bytes of arbitrary size, with a hexastring notation (e.g. ``aabbcc``).
* **BLOB / Raw**: The Raw type makes it possible to describe a sequence of bytes of arbitrary size, with a raw notation (e.g. ``\xaa\xbb\xcc``).
* **String**: The String type makes it possible to describe a field that contains sequence of String characters.
* **BitArray**: The BitArray type makes it possible to describe a field that contains a sequence of bits of arbitrary size.
* **IPv4**: The IPv4 type makes it possible to encode a raw Python in an IPv4 representation, and conversely to decode an IPv4 representation into a raw object.
* **Timestamp**: The Timestamp type makes it possible to define dates in a specific format (such as Windows, Unix or MacOS X formats).
Data Types API
^^^^^^^^^^^^^^
Each data type provides the following API:
.. autoclass:: netzob.Model.Vocabulary.Types.AbstractType.AbstractType()
.. automethod:: netzob.Model.Vocabulary.Types.AbstractType.AbstractType.convert(typeClass)
.. automethod:: netzob.Model.Vocabulary.Types.AbstractType.AbstractType.generate
Some data types can have specific attributes regarding their endianness, sign and unit size. Values supported for those attributes are available through Python enumerations:
.. autoclass:: netzob.Model.Vocabulary.Types.AbstractType.Endianness
:members:
.. autoclass:: netzob.Model.Vocabulary.Types.AbstractType.Sign
:members:
.. autoclass:: netzob.Model.Vocabulary.Types.AbstractType.UnitSize
:members:
Data Types
^^^^^^^^^^
Supported data types are described in detail in this chapter.
.. _integer_type:
Integer Type
++++++++++++
In the API, the definition of an integer is done through the Integer class.
.. autoclass:: netzob.Model.Vocabulary.Types.Integer.Integer
BLOB / Raw Type
+++++++++++++++
In the API, the definition of a BLOB type is made through the Raw class.
.. autoclass:: netzob.Model.Vocabulary.Types.Raw.Raw(value=None, nbBytes=None, alphabet=None, default=None)
HexaString Type
+++++++++++++++
In the API, the definition of a hexastring type is made through the HexaString class.
.. autoclass:: netzob.Model.Vocabulary.Types.HexaString.HexaString(value=None, nbBytes=None, default=None)
String Type
+++++++++++
In the API, the definition of an ASCII or Unicode type is made through the String class.
.. autoclass:: netzob.Model.Vocabulary.Types.String.String(value=None, nbChars=None, encoding='utf-8', eos=[], default=None)
BitArray Type
+++++++++++++
In the API, the definition of a bitfield type is made through the BitArray class.
.. autoclass:: netzob.Model.Vocabulary.Types.BitArray.BitArray(value=None, nbBits=None, default=None)
IPv4 Type
+++++++++
In the API, the definition of an IPv4 type is made through the IPv4 class.
.. autoclass:: netzob.Model.Vocabulary.Types.IPv4.IPv4(value=None, network=None, endianness=Endianness.BIG, default=None)
Timestamp Type
++++++++++++++
In the API, the definition of a timestamp type is done through the Timestamp class.
.. autoclass:: netzob.Model.Vocabulary.Types.Timestamp.Timestamp(value=None, epoch=Epoch.UNIX, unity=Unity.SECOND, unitSize=UnitSize.SIZE_32, endianness=Endianness.BIG, sign=Sign.UNSIGNED, default=None)
.. raw:: latex
\newpage
Modeling Fields
---------------
In the API, field modeling is done through the Field class.
.. autoclass:: netzob.Model.Vocabulary.Field.Field
:members: specialize, abstract, getField, getSymbol, count
.. automethod:: netzob.Model.Vocabulary.Field.Field.copy()
.. automethod:: netzob.Model.Vocabulary.Field.Field.str_structure(preset=None)
.. raw:: latex
\newpage
Modeling Variables
------------------
The definition domain of a field is represented by a tree of variables, containing leaf and node variables. Each variable follows a common API, which is described in the abstract class AbstractVariable:
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.AbstractVariable.AbstractVariable()
:members: count, isnode
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.AbstractVariable.AbstractVariable.copy()
.. raw:: latex
\newpage
Modeling Data Variables
-----------------------
In the API, data variable modeling is made through the class Data.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Data.Data
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Data.Data.copy()
.. raw:: latex
\newpage
Modeling Node Variables
-----------------------
Multiple variables can be combined to form a complex and precise
specification of the values that are accepted by a field. Four complex
variable types are provided:
* **Aggregate node variables**, which can be used to model a concatenation of variables.
* **Alternate node variables**, which can be used to model an alternative of multiple variables.
* **Repeat node variables**, which can be used to model a repetition of a variable.
* **Optional node variables**, which can be used to model a variable
that may or may not be present.
Those node variables are described in detail in this chapter.
Aggregate Domain
^^^^^^^^^^^^^^^^
In the API, the definition of a concatenation of variables is made through the Agg class.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Agg.Agg
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Agg.Agg.copy()
Alternate Domain
^^^^^^^^^^^^^^^^
In the API, the definition of an alternate of variables is made through the Alt class.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Alt.Alt
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Alt.Alt.copy()
Repeat Domain
^^^^^^^^^^^^^
In the API, the definition of a repetition of variables, or sequence, is made through the Repeat class.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Repeat.Repeat
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Repeat.Repeat.copy()
Optional Domain
^^^^^^^^^^^^^^^
In the API, the definition of a conditional variable is made through the Opt class.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Opt.Opt
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Nodes.Opt.Opt.copy()
.. raw:: latex
\newpage
Modeling Fields with Relationship Variables
-------------------------------------------
The ZDL language defines constraints on variables, in order to handle relationships. Those constraints are leveraged during abstraction and specialization of messages. The API supports the following relationships.
Value Relationships
^^^^^^^^^^^^^^^^^^^
In the API, the definition of a relationship with the value of another field is made through the Value class. This class enables the computation of the relationship result by a basic copy of the targeted field or by calling a callback function.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Value.Value
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Value.Value.copy()
Size Relationships
^^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Size.Size
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Size.Size.copy()
Padding Relationships
^^^^^^^^^^^^^^^^^^^^^
In the API, it is possible to model a structure with a padding through the Padding class.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Padding.Padding
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Padding.Padding.copy()
Checksum Relationships
^^^^^^^^^^^^^^^^^^^^^^
The ZDL language enables the definition of checksum relationships between fields.
**Checksum API**
As an example, the API for the CRC16 checksum is as follows:
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC16.CRC16(targets)
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC16.CRC16.copy()
**Available checksums**
The following list shows the available checksums. The API for those checksums are similar to the CRC16 API.
* :class:`CRC16(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC16.CRC16>`
* :class:`CRC16DNP(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC16DNP.CRC16DNP>`
* :class:`CRC16Kermit(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC16Kermit.CRC16Kermit>`
* :class:`CRC16SICK(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC16SICK.CRC16SICK>`
* :class:`CRC32(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRC32.CRC32>`
* :class:`CRCCCITT(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.CRCCCITT.CRCCCITT>`
* :class:`InternetChecksum(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Checksums.InternetChecksum.InternetChecksum>` (used in ICMP, UDP, IP, TCP protocols, as specified in :rfc:`1071`).
Hash Relationships
^^^^^^^^^^^^^^^^^^
The ZDL language enables the definition of hash relationships between fields.
**Hash API**
As an example, the API for the MD5 hash is as follows:
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.MD5.MD5(targets)
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.MD5.MD5.copy()
**Available hashes**
The following list shows the available hashes. The API for those hashes are similar to the MD5 API.
* :class:`MD5(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.MD5.MD5>`
* :class:`SHA1(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.SHA1.SHA1>`
* :class:`SHA1_96(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.SHA1_96.SHA1_96>`
* :class:`SHA2_224(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.SHA2_224.SHA2_224>`
* :class:`SHA2_256(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.SHA2_256.SHA2_256>`
* :class:`SHA2_384(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.SHA2_384.SHA2_384>`
* :class:`SHA2_512(targets) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hashes.SHA2_512.SHA2_512>`
HMAC Relationships
^^^^^^^^^^^^^^^^^^
The ZDL language enables the definition of HMAC relationships between fields.
**HMAC API**
As an example, the API for the HMAC_MD5 is as follows:
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_MD5.HMAC_MD5(targets, key)
.. automethod:: netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_MD5.HMAC_MD5.copy()
**Available HMACs**
The following list shows the available HMACs. The API for those HMACs are similar to the HMAC_MD5 API.
* :class:`HMAC_MD5(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_MD5.HMAC_MD5>`
* :class:`HMAC_SHA1(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_SHA1.HMAC_SHA1>`
* :class:`HMAC_SHA1_96(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_SHA1_96.HMAC_SHA1_96>`
* :class:`HMAC_SHA2_224(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_SHA2_224.HMAC_SHA2_224>`
* :class:`HMAC_SHA2_256(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_SHA2_256.HMAC_SHA2_256>`
* :class:`HMAC_SHA2_384(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_SHA2_384.HMAC_SHA2_384>`
* :class:`HMAC_SHA2_512(targets, key) <netzob.Model.Vocabulary.Domain.Variables.Leafs.Hmacs.HMAC_SHA2_512.HMAC_SHA2_512>`
.. _modeling_symbols:
Modeling Symbols
----------------
In the API, symbol modeling is done through the Symbol class.
.. autoclass:: netzob.Model.Vocabulary.Symbol.Symbol
:members: specialize, abstract, getField, count
.. automethod:: netzob.Model.Vocabulary.Symbol.Symbol.copy()
.. automethod:: netzob.Model.Vocabulary.Symbol.Symbol.str_structure(preset=None)
.. raw:: latex
\newpage
Configuring Symbol Content
--------------------------
Setting Field Values
^^^^^^^^^^^^^^^^^^^^
In the API, it is possible to control values that will be used in
fields during symbol specialization. Such configuration can be done
through the Preset class.
.. autoclass:: netzob.Model.Vocabulary.Preset.Preset
:members: copy, update, bulk_set, clear
Symbol with no Content
^^^^^^^^^^^^^^^^^^^^^^
A specific symbol may be used in the state machine to represent the
absence of received symbol (EmptySymbol), when listening for incoming
message, or the fact that nothing is going to be sent, when attempting
to send something to the remote peer.
.. autoclass:: netzob.Model.Vocabulary.EmptySymbol.EmptySymbol
Relationships between Symbols and the Environment
-------------------------------------------------
In the API, a memory capability is provided in order to support
relationships between variables, as well as variable persistence
during the specialization and abstraction processes. This capability
is described in the Memory class.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Memory.Memory
:members: memorize, hasValue, getValue, forget, copy
In the API, the ability to specify relationships between successive
messages or between messages and the environment is provided by the
:class:`~netzob.Model.Vocabulary.Domain.Variables.Memory.Memory` class.
**Relationships between fields of successive messages**
The following example shows how to define a relationship between a
received message and the next message to send. A memory is used to store the value of each variable. During the first call to :meth:`specialize` on the ``s1`` symbol, the value associated to the field ``f3`` is notably stored in memory, so that it can be retrieved when calling :meth:`specialize` on the ``s2`` symbol::
>>> from netzob.all import *
>>> f1 = Field(domain=String("hello"), name="F1")
>>> f2 = Field(domain=String(";"), name="F2")
>>> f3 = Field(domain=String(nbChars=(5,10)), name="F3")
>>> s1 = Symbol(fields=[f1, f2, f3], name="S1")
>>>
>>> f4 = Field(domain=String("master"), name="F4")
>>> f5 = Field(domain=String(">"), name="F5")
>>> f6 = Field(domain=Value(f3), name="F6")
>>> s2 = Symbol(fields=[f4, f5, f6])
>>>
>>> memory = Memory()
>>> m1 = next(s1.specialize(memory=memory))
>>> m2 = next(s2.specialize(memory=memory))
>>>
>>> m1[len("hello;"):] == m2[len("master>"):]
True
**Relationships between a message field and the environment**
The following example shows how to define a relationship between a
message to send and an environment variable. The symbol is first
defined, and then an environment variable is created. The first step
consists in overloading the definition domain of the ``f9`` field to
link the environment variable::
>>> from netzob.all import *
>>>
>>> # Symbol definition
>>> f7 = Field(domain=String("master"), name="F7")
>>> f8 = Field(domain=String(">"), name="F8")
>>> f9 = Field(domain=String(), name="F9")
>>> s3 = Symbol(fields=[f7, f8, f9])
>>>
>>> # Environment variables definition
>>> memory = Memory()
>>> env1 = Data(String(), name="env1")
>>> memory.memorize(env1, String("John").value)
>>>
>>> # Overloading f9 field definition to link the environment variable
>>> f9.domain = Value(env1)
>>>
>>> # Symbol specialization
>>> next(s3.specialize(memory=memory))
b'master>John'
Persistence during Specialization and Abstraction of Symbols
------------------------------------------------------------
The values of variables defined in fields can have different assignment strategies, depending on their persistence and lifecycle.
The Scope class provides a description of those strategies, along with some examples.
.. autoclass:: netzob.Model.Vocabulary.Domain.Variables.Scope.Scope
.. raw:: latex
\newpage

View File

@@ -0,0 +1,20 @@
.. _fuzzing_symbols:
Fuzzing Message Format
----------------------
The Preset class can be used to apply format message fuzzing. Fuzzing configuration is provided by the :meth:`fuzz` method.
.. automethod:: netzob.Model.Vocabulary.Preset.Preset.fuzz(key, mode=None, generator=None, seed=None, counterMax=None, kwargs=None)
.. automethod:: netzob.Model.Vocabulary.Preset.Preset.setFuzzingCounterMax
.. automethod:: netzob.Model.Vocabulary.Preset.Preset.getFuzzingCounterMax
.. automethod:: netzob.Model.Vocabulary.Preset.Preset.unset
.. raw:: latex
\newpage

View File

@@ -0,0 +1,12 @@
.. _fuzzing_automata:
Fuzzing Automata
----------------
Mutation of a protocol state machine is provided by the :meth:`mutate` method of the :class:`Automata <netzob.Model.Grammar.Automata.Automata>` class.
.. automethod:: netzob.Model.Grammar.Automata.Automata.mutate
.. raw:: latex
\newpage

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@@ -0,0 +1,17 @@
.. _protospec:
Protocol Modeling
=================
In the API, the Protocol class is the entry point for defining
a complete protocol made of a state machine and different format
messages.
.. autoclass:: netzob.Model.Protocol.Protocol
:members: load_format
.. raw:: latex
\newpage

View File

@@ -0,0 +1,149 @@
.. _statemachinespec:
State Machine Modeling
======================
State Machine Modeling Concepts
-------------------------------
The ZDL language can be used to specify a **state machine**, or automaton, for a protocol. A state machine is based on two components: **States** and **Transitions**. A state represents the status of a service, and expects conditions to trigger the execution of a transition. A transition is a list of actions that will be executed when a condition is met at a specific state (such as the receipt of a network message). The list of actions may contain sending a network message, changing the value of session or global variables, moving to another state in the automaton, etc.
The language defines three kinds of transition in an automaton:
* **Standard transitions**: this represents a transition between two
states (an initial state and an end state) in an automaton. The
initial state and the end state can be the same.
* **Opening channel transitions**: this represents a transition which, when
executed, requests to open the current underlying communication channel.
* **Closing channel transitions**: this represents a transition which, when
executed, requests to close the current underlying communication channel.
In the Netzob library, a state machine relies on symbols to trigger transitions between states. In order to represent the state machine structure, the library relies on a mathematical model based on a **Mealy machine** (cf. https://en.wikipedia.org/wiki/Mealy_machine). The library leverages this model by associating, for each transition, an input symbol and a list of output symbols, as shown on the figure below.
.. figure:: img/state_machine.*
:align: center
Example of State Machine modeling with states, transitions, input and output symbols.
Depending on the peer point of view, either an initiator (e.g. a client that starts a communication with a remote service) or a non initiator (e.g. a service that waits for input messages), the interpretation of the state machine is different. This intepretation is done with a state machine visitor that is called an :class:`~netzob.Simulator.Actor.Actor` in the API.
From an **initiator point of view**, when the actor is at a specific state in the automaton, a random transition is taken amongst the available transitions. In the above example, two transitions, ``T1`` and ``T2``, are available at the state ``S1``. Then, the input symbol of the picked transition is specialized into a message and this message is emitted to the target. If the target replies, the actor abstracts the received message into a symbol, and checks if this symbol corresponds to one of the expected output symbols. If it matches, the transition succeeds and thus leads to the end state of the transition. In the above example, the transition ``T2`` would lead to the state ``S3``. If no response comes from the target, or if a wrong message is received, we leave the automaton.
From a **non initiator point of view**, when at a specific state in the automaton, the actor waits for a network message. When one network message is received, it is abstracted into a symbol. Then, we retrieve the transition that has this symbol as input symbol. When a transition is retrieved, we randomly pick one symbol amongst the output symbols, and send this symbol to the remote peer. Finally, the transition leads to the end state of the transition.
When the actor has to select a transition, or when the actor has to identify the current transition according to the received message, it is possible to influence this choice through the help of callback functions.
Likewise, when the actor sends the input symbol, or when the actor sends an output symbol, it is possible to influence the selection of the symbol, through the help of callback functions or selection probability weight.
In order to model hybrid state machines where a peer is able to send or receive symbols depending on the context, it is possible to change the initiator behavior at specific transitions. This is done through the :attr:`~netzob.Model.Grammar.Transitions.Transition.Transition.inverseInitiator` attribute on :class:`~netzob.Model.Grammar.Transitions.Transition.Transition` objects. When setting this attribute to ``True`` on a transition, an actor will inverse the way symbols are exchanged (e.g. an initiator actor will first wait for an input symbol and then send one of the output symbols).
A **Memory** (see :class:`~netzob.Model.Vocabulary.Domain.Variables.Memory.Memory`) is used to keep track of a context for a specific communication. This memory can leverage variable from the protocol or even the environment. The memory is initialized at the beginning of the communication, and its internal state evolves throughout the exchanged messages.
.. Besides, two extensions allow refining the state machine model:
.. * The capability to define a reaction time on a transition. This reaction time between receiving a specific symbol and sending the output symbol will be enforced by the library.
.. * The capability to provide indeterminism on output symbols. The library enables the user to model a transition which, for a sequence of input symbols, associates many sequences of output symbols. The chosen sequence of output symbol is selected randomly.
.. raw:: latex
\newpage
Modeling States
---------------
In the API, automaton states are modeled through the State class.
.. autoclass:: netzob.Model.Grammar.States.State.State
:members: copy
.. raw:: latex
\newpage
Modeling Transitions
--------------------
The available transitions are detailed in this chapter.
.. autoclass:: netzob.Model.Grammar.Transitions.Transition.Transition(startState, endState, inputSymbol=None, outputSymbols=None, name=None)
:members: copy
.. autoclass:: netzob.Model.Grammar.Transitions.OpenChannelTransition.OpenChannelTransition(startState, endState, name=None)
:members: copy
.. autoclass:: netzob.Model.Grammar.Transitions.CloseChannelTransition.CloseChannelTransition(startState, endState, name=None)
:members: copy
.. raw:: latex
\newpage
Taking Control over Emitted Symbol and Selected Transition
----------------------------------------------------------
A state may have different available transitions to other states. It
is possible to filter those available transitions in order to limit
them or to force a specific transition to be taken. The filtering
capability works by adding callbacks through the
:meth:`add_cbk_filter_transitions` method on a
:class:`~netzob.Model.Grammar.States.State.State` instance.
.. automethod:: netzob.Model.Grammar.States.State.State.add_cbk_filter_transitions
When a transition is selected, it is possible to modify it by adding
callbacks through the :meth:`add_cbk_modify_transition` method on a
:class:`~netzob.Model.Grammar.States.State.State` instance.
.. automethod:: netzob.Model.Grammar.States.State.State.add_cbk_modify_transition
Besides, during execution of a transition, it is possible to change
the symbol that will be sent to the remote peer, by adding callbacks
through the :meth:`add_cbk_modify_symbol` method on a
:class:`~netzob.Model.Grammar.Transitions.Transition.Transition` instance.
.. automethod:: netzob.Model.Grammar.Transitions.Transition.Transition.add_cbk_modify_symbol
Executing Actions during Transitions
------------------------------------
It is possible to execute specific actions during transitions, after sending or receiving a symbol, by adding
callbacks through the :meth:`add_cbk_action` method on a
:class:`~netzob.Model.Grammar.Transitions.Transition.Transition` instance. The typical usage of this callback is that it is possible to manipulate the memory context of the automaton after sending or receiving a symbol.
When specifying such callback on a transition, this callback is then called twice for a transition: in an initiator context, the callback is first called after sending the input symbol, and then called after receiving one of the output symbols; while in a non initiator context, the callback is called after receiving the input symbol, and then called after sending one of the output symbols.
.. automethod:: netzob.Model.Grammar.Transitions.Transition.Transition.add_cbk_action
Summary of States and Transitions Processing
--------------------------------------------
The following figure gives a summary of the sequence of operations during states and transitions processing.
.. figure:: img/Grammar_procedure.*
:align: center
:scale: 70 %
Sequence of operations during states and transitions processing
Modeling Automata
-----------------
In the API, an automaton is made of a list of permitted symbols
and an initial state. An automaton is modeled using the Automata class.
.. autoclass:: netzob.Model.Grammar.Automata.Automata
:members: getStates, getState, getTransitions, getTransition, set_cbk_read_symbol_timeout,
set_cbk_read_unexpected_symbol, set_cbk_read_unknown_symbol,
generateDotCode, generateASCII, copy
.. raw:: latex
\newpage

View File

@@ -0,0 +1,150 @@
.. _trafficgeneration:
Sending and Receiving Messages
==============================
Underlying Concepts
-------------------
In the Netzob library, a **communication channel** is an element allowing a connection to a remote device. Generally, if the device is connected with an Ethernet network, the channel includes a socket object and all the properties used to configure it. The channel also provides the connection status and send/receive APIs.
Some specific channels make it possible to access and manipulate the underlying protocol header. These channels are prefixed with the term ``Custom``. The underlying protocol header takes the form of a :class:`Symbol <netzob.Model.Vocabulary.Symbol.Symbol>` for which we can specify a :class:`Preset <netzob.Model.Vocabulary.Preset.Preset>` configuration.
These elements are described in this chapter.
.. _trafficgeneration_channel_list:
Communication Channel API
-------------------------
Each communication channel provides the following API:
.. autoclass:: netzob.Simulator.AbstractChannel.AbstractChannel()
:members: open, close, __enter__, __exit__, read, write, write_map, flush, sendReceive, setSendLimit, clearSendLimit, set_rate, unset_rate
.. note::
There are two ways to open and close a channel.
**Both methods provide the same behavior**.
1. either by using the related methods:
:meth:`~netzob.Simulator.AbstractChannel.AbstractChannel.open` and
:meth:`~netzob.Simulator.AbstractChannel.AbstractChannel.close`.
Example:
.. code-block:: python
channel.open()
try:
channel.write(b'abcd')
finally:
channel.close()
2. or by using Python contexts capability provided by the ``with`` statement
and following methods:
:meth:`~netzob.Simulator.AbstractChannel.AbstractChannel.__enter__` and
:meth:`~netzob.Simulator.AbstractChannel.AbstractChannel.__exit__`.
Example:
.. code-block:: python
with channel:
channel.write(b'abcd')
Builder classes (see `Build pattern <https://en.wikipedia.org/wiki/Builder_pattern>`_)
are also available for each communication channel. They could be used to create
an instance of the channel class using generic keys.
This API is available through the following class:
.. autoclass:: netzob.Simulator.ChannelBuilder.ChannelBuilder
:members: set, set_map, build
.. _channels:
Available Communication Channels
--------------------------------
The available communication channels are as follows:
* :class:`~netzob.Simulator.Channels.RawEthernetChannel.RawEthernetChannel`: this channel sends/receives Raw Ethernet frames.
* :class:`~netzob.Simulator.Channels.CustomEthernetChannel.CustomEthernetChannel`: this channel sends/receives Ethernet frames (with Ethernet header computed by this channel).
* :class:`~netzob.Simulator.Channels.CustomIPChannel.CustomIPChannel`: this channel sends/receives IP payloads (with IP header computed by this channel).
* :class:`~netzob.Simulator.Channels.IPChannel.IPChannel`: this channel sends/receives IP payloads (with IP header computed by the OS kernel).
* :class:`~netzob.Simulator.Channels.UDPClient.UDPClient`: this channel provides the connection of a client to a specific IP:Port server over a UDP socket.
* :class:`~netzob.Simulator.Channels.TCPClient.TCPClient`: this channel provides the connection of a client to a specific IP:Port server over a TCP socket.
* :class:`~netzob.Simulator.Channels.UDPServer.UDPServer`: this channel provides a server listening to a specific IP:Port over a UDP socket.
* :class:`~netzob.Simulator.Channels.TCPServer.TCPServer`: this channel provides a server listening to a specific IP:Port over a TCP socket.
* :class:`~netzob.Simulator.Channels.SSLClient.SSLClient`: this channel provides the connection of a client to a specific IP:Port server over a TCP/SSL socket.
* :class:`~netzob.Simulator.Channels.DebugChannel.DebugChannel`: this channel provides a way to log I/Os into a specific stream.
.. _trafficgeneration_channels:
Each communication channel, with their associated builder class, is described
in the following sub-chapters.
RawEthernetChannel channel
^^^^^^^^^^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.RawEthernetChannel.RawEthernetChannel
.. autoclass:: netzob.Simulator.Channels.RawEthernetChannel.RawEthernetChannelBuilder
CustomEthernetChannel channel
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.CustomEthernetChannel.CustomEthernetChannel
:members: setProtocol
.. autoclass:: netzob.Simulator.Channels.CustomEthernetChannel.CustomEthernetChannelBuilder
CustomIPChannel channel
^^^^^^^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.CustomIPChannel.CustomIPChannel
.. autoclass:: netzob.Simulator.Channels.CustomIPChannel.CustomIPChannelBuilder
IPChannel channel
^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.IPChannel.IPChannel
.. autoclass:: netzob.Simulator.Channels.IPChannel.IPChannelBuilder
UDPClient channel
^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.UDPClient.UDPClient
.. autoclass:: netzob.Simulator.Channels.UDPClient.UDPClientBuilder
TCPClient channel
^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.TCPClient.TCPClient
.. autoclass:: netzob.Simulator.Channels.TCPClient.TCPClientBuilder
UDPServer channel
^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.UDPServer.UDPServer
.. autoclass:: netzob.Simulator.Channels.UDPServer.UDPServerBuilder
TCPServer channel
^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.TCPServer.TCPServer
.. autoclass:: netzob.Simulator.Channels.TCPServer.TCPServerBuilder
SSLClient channel
^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.SSLClient.SSLClient
.. autoclass:: netzob.Simulator.Channels.SSLClient.SSLClientBuilder
DebugChannel channel
^^^^^^^^^^^^^^^^^^^^
.. autoclass:: netzob.Simulator.Channels.DebugChannel.DebugChannel
.. autoclass:: netzob.Simulator.Channels.DebugChannel.DebugChannelBuilder
.. raw:: latex
\newpage

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@@ -0,0 +1,300 @@
.. currentmodule:: netzob
.. _overview:
Overview of Netzob
==================
Netzob has been initiated by security auditors of
`AMOSSYS <http://www.amossys.fr>`_ and the `CIDre research team of
Supélec <http://www.rennes.supelec.fr/ren/rd/cidre/>`_ to address the
reverse engineering of communication protocols.
Originaly, the development of Netzob has been initiated to support
security auditors and evaluators in their activities of modeling and
simulating undocumented protocols. The tool has then been extended to
allow smart fuzzing of unknown protocol.
The following picture depicts the main modules of Netzob:
.. figure:: http://www.netzob.org/img/overview_archi.png
:align: center
:alt: Architecture of Netzob
Architecture of Netzob
- **Import module:** Data import is available in two ways: either by
leveraging the channel-specific captors (currently network and IPC --
Inter-Process Communication), or by using specific importers (such as
PCAP files, structured files and OSpy files).
- **Protocol inference modules:** The vocabulary and grammar inference
methods constitute the core of Netzob. It provides both passive and
active reverse engineering of communication flows through automated
and manuals mechanisms.
- **Simulation module:** Given vocabulary and grammar models previously
inferred, Netzob can understand and generate communication traffic
between multiple actors. It can act as either a client, a server or
both.
- **Export module:** This module permits to export an inferred model of
a protocol in formats that are understandable by third party software
or by a human. Current work focuses on export format compatible with
main traffic dissectors (Wireshark and Scapy) and fuzzers (Peach and
Sulley).
And here is a screenshot of the main graphical interface:
.. figure:: https://dev.netzob.org/attachments/96/netzob_UI.png
:align: center
:alt:
The following sections will describe in more details the available
mechanisms.
Import and capture data
~~~~~~~~~~~~~~~~~~~~~~~
The first step in the inferring process of a protocol in Netzob is to
capture and to import messages as samples. There are different methods
to retrieve messages depending of the communication channel used (files,
network, IPC, USB, etc.) and the format (PCAP, hex, raw binary flows,
etc.).
The figure below describes the multiple communication channels and
therefore possible sniffing point's Netzob aims at addressing.
.. figure:: http://www.netzob.org/img/overview_multipleFlows.png
:align: center
:alt: Multiple communication flows arround an application
Multiple communication flows arround an application
The current version (version 0.4) of Netzob deals with the following
data sources :
- **Live network communications**
- **Captured network communications** (PCAPs)
- **Inter-Process Communications** (IPCs)
- **Text and binary files**
- **API flows** through `oSpy <http://code.google.com/p/ospy/>`_ file
format support
Otherwise, if you plan to reverse a protocol implemented over an
supported communication channel, Netzob's can manipulates any
communications flow through an XML representation. Therefore, this
situation only requires a specific development to capture the targeted
flow and to save it using a compatible XML.
.. figure:: http://www.netzob.org/img/overview_extraImport.png
:align: center
:width: 800 px
:alt: Importing data from an unknown communication channel using the XML definition
Importing data from an unknown communication channel using the XML
definition
Inferring message format and state machine with Netzob
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The vocabulary of a communication protocol defines all the words which
are integrated in it. For example, the vocabulary of a malware's
communication protocol looks like a set of possible commands : {"attack
`www.google.fr <http://www.google.fr>`_", "dnspoison
this.dns.server.com", "execute 'uname -a'", ...}. Another example of a
vocabulary is the set of valids words in the HTTP protocol : { "GET
/images/logo.png HTTP/1.1 ...", "HTTP/1.1 200 OK ...", ...}.
Netzob's vocabulary inferring process has been designed in order to
retrieve the set of all possible words used in a targeted protocol and
to identify their structures. Indeed words are made of different fields
which are defined by their value and types. Hence a word can be
described using the structure of its fields.
We describe the learning process implemented in Netzob to
semi-automatically infer the vocabulary and the grammar of a protocol.
This process, illustrated in the following picture, is performed in
three main steps:
#. **Clustering messages and partitioning these messages in fields.**
#. **Characterizing message fields and abstracting similar messages in
symbols.**
#. **Inferring the transition graph of the protocol.**
.. figure:: http://www.netzob.org/img/overview_inferenceSteps.png
:align: center
:width: 800 px
:alt: The main functionalities
The main functionalities
Step 1: clustering Messages and Partitioning in Fields
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
To discover the format of a symbol, Netzob supports different
partitioning approaches. In this article we describe the most accurate
one, that leverages sequence alignment processes. This technique permits
to align invariants in a set of messages. The `Needleman-Wunsh
algorithm <http://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm>`_
performs this task optimally. Needleman-Wunsh is particularly effective
on protocols where dynamic fields have variable lengths (as shown on the
following picture).
.. figure:: http://www.netzob.org/img/overview_needleman.png
:align: center
:alt: Sequence alignment with Needleman-Wunsh algorithm
Sequence alignment with Needleman-Wunsh algorithm
When partitioning and clustering processes are done, we obtain a
relevant first approximation of the overall message formats. The next
step consists in determining the characteristics of the fields.
If the size of those fields is fixed, as in TCP and IP headers, it is
preferable to apply a basic partitioning, also provided by Netzob. Such
partitioning works by aligning each message by the left, then
separating successive fixed columns from successive dynamic columns.
To regroup aligned messages by similarity, the Needleman-Wunsh algorithm
is used in conjunction with a clustering algorithm. The applied
algorithm is `UPGMA <http://en.wikipedia.org/wiki/UPGMA>`_.
Step 2 : characterization of Fields
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The field type identification partially derives from the partitioning
inference step. For fields containing only invariants, the type merely
corresponds to the invariant value. For other fields, the type is
automatically materialized, in first approximation, with a regular
expression, as shown on next figure. This form enables easy validation of
the data compliance with a specific type. Moreover, Netzob offers the
possibility to visualize the definition domain of a field. This helps to
manually refine the type associated with a field.
.. figure:: http://www.netzob.org/img/overview_fieldType.png
:align: center
:alt: Characterization of field type
Characterization of field type
Some intra-symbol dependencies are automatically identified. The size
field, present in many protocol formats, is an example of intra-symbol
dependency. A search algorithm has been designed to look for potential
size fields and their associated payloads. By extension, this technique
permits to discover encapsulated protocol payloads.
Environmental dependencies are also identified by looking for specific
values retrieved during message capture. Such specific values consist of
characteristics of the underlying hardware, operating system and network
configuration. During the dependency analysis, these characteristics are
searched in various encoding.
Step 3: inferring the Transition Graph of the Protocol
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The third step of the learning process discovers and extracts the
transition graph from a targeted protocol (also called the grammar).
More formally, the grammar of a communication protocol defines the set
of valid sentences which can be produced by a communication. A sentence
is a sorted set of words which may be received or emmited by a protocol
handler. An exemple of a simple sentence is :
::
["attack www.google.fr", "attack has failed", "attack www.kernel.org", "root access granted."]
which can be described using the following simple automata with S0 the
initial state :
.. figure:: http://www.netzob.org/img/overview_exampleSimpleGrammar.png
:align: center
:alt: Schema of a simple grammar
Schema of a simple grammar
The learning process step is achieved by a set of active experiments
that stimulate a real client or server implementation using successive
sequences of input symbols and analyze its responses.
In Netzob, the automata used to represent or model a communication
protocol is an extended version of a Mealy automata which includes
semi-stochastic transitions, contextualized and parametrized inputs and
outputs. The first academic presention of this model is included in a
dedicated scientific paper provided in the documentation section.
The model is inferred through a dedicated **active** process which
consists in stimulating an implementation and to analyze its responses.
In this process, we use the previously infered vocabulary to discover
and to learn the grammar of the communication protocol. Each stimulation
is computed following an extension of the **Angluin L** algorithm\*.
Protocol simulation
~~~~~~~~~~~~~~~~~~~
One of our main goal is to generate realistic network traffic from
undocummented protocols. Therefore, we have implemented a dedicated
module that, given vocabulary and grammar models previously infered, can
simulate a communication protocol between multiple bots and masters.
Besides their use of the same model, each actors is independent from the
others and is organized around three main stages.
The first stage is a dedicated library that reads and writes from the
network channel. It also parses the flow in messages according to
previous protocols layers. The second stage uses the vocabulary to
abstract received messages into symbols and vice-versa to specialize
emitted symbols into messages. A memory buffer is also available to
manage dependency relations. The last stage implements the grammar model
and computes which symbols must be emitted or received according to the
current state and time.
Smart fuzzing with Netzob
~~~~~~~~~~~~~~~~~~~~~~~~~
A typical example of dynamic vulnerability analysis is the robustness
tests. It can be used to reveal software programming errors which can
leads to software security vulnerabilities. These tests provide an
efficient and almost automated solution to easily identify and study
exposed surfaces of systems. Nevertheless, to be fully efficient, the
fuzzing approaches must cover the complete definition domain and
combination of all the variables which exist in a protocol (IP adresses,
serial numbers, size fields, payloads, message identifer, etc.). But
fuzzing typical communication interface requires too many test cases due
to the complex variation domains introduced by the semantic layer of a
protocol. In addition to this, an efficient fuzzing should also cover
the state machine of a protocol which also brings another huge set of
variations. The necessary time is nearly always too high and therefore
limits the efficiency of this approach.
With all these contraints, achieving robustness tests on a target is
feasible only if the expert has access to a specially designed tool for
the targeted protocol. Hence the emergence of a large number of tools to
verify the behavior of an application on one or more communication
protocols. However in the context of proprietary communications
protocols for which no specifications are published, fuzzers do not
provide optimal results.
Netzob helps the security evaluator by simplifying the creation of a
dedicated fuzzer for a proprietary or undocumented protocol. It provides
to the expert means to execute a semi-automated inferring process to create a
model of the targeted protocol. This model can afterward be refined by
the evaluator. Finally, the created model is included in the fuzzing
module of Netzob which considers the vocabulary and the grammar of the
protocol to generate optimized and specific test cases. Both mutation
and generation are available for fuzzing.
Export protocol model
~~~~~~~~~~~~~~~~~~~~~
The following export formats are currently provided by Netzob:
- XML format
- human readable (Wireshark like)
- Peach fuzzer export: this enables efficiency combination of Peach
Fuzzer on previously undocumented protocols.
Besides, you can write your own exporter to manipulate the inferred
protocol model in your favorite tool.

View File

@@ -0,0 +1,613 @@
.. currentmodule:: netzob
.. _discover_features:
Discover features of Netzob
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
This tutorial for Netzob 1.x is currently slighlty obsolete, and should be updated to the Netzob API 2.x.
This tutorial presents the main features of Netzob regarding the
inference of message formats and grammar of a simple toy protocol. The
described features cover the following capabilities:
- Import of a PCAP file
- Format message inference
- Partitionment of messages following a specific delimiter
- Regroupment of messages following a specific key field
- Partitionment of a subset a each message following a sequence aligment
- Search for relationships in each group of messages
- Modification of the format message to apply found relationships
- Grammar inference
- Generation of an automaton with one main state according to a captured sequence of messages
- Generation of an automaton with a sequence of states according to a captured sequence of messages
- Generation of a Prefix Tree Acceptor (PTA) automaton according to a captured sequence of messages
- Traffic generation and fuzzing
- Generation of messages following the inferred message format of each group and through visiting the inferred automata
- Fuzzing of an implementation by generating altered message formats
Retrieve Netzob and resources.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
At first, retrieve the source code of Netzob::
$ git clone https://dev.netzob.org/git/netzob
Then, you can retrieve the source code of the toy protocol implementation used in this tutorial, as well as some PCAP files of sequences of messages.
- `Toy protocol implementation <https://dev.netzob.org/attachments/download/179/tutorial_netzob_v1.tar.gz>`_
- `PCAP of sequence 1 <https://dev.netzob.org/attachments/download/182/target_src_v1_session1.pcap>`_
- `PCAP of sequence 2 <https://dev.netzob.org/attachments/download/181/target_src_v1_session2.pcap>`_
- `PCAP of sequence 3 <https://dev.netzob.org/attachments/download/180/target_src_v1_session3.pcap>`_
Import messages from a PCAP file.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Reading packets from a PCAP file is done through the PCAPImporter.readFile() static function. This function can optionally take more parameters to specify a BPF filter, the import layer or the number of packets to capture::
from netzob.all import *
messages_session1 = PCAPImporter.readFile("target_src_v1_session1.pcap").values()
messages_session2 = PCAPImporter.readFile("target_src_v1_session2.pcap").values()
messages = messages_session1 + messages_session2
for message in messages:
print(message)
The output is::
[1388154953.32 127.0.0.1:57831->127.0.0.1:4242] 'CMDidentify#\x07\x00\x00\x00Roberto'
[1388154953.32 127.0.0.1:4242->127.0.0.1:57831] 'RESidentify#\x00\x00\x00\x00\x00\x00\x00\x00'
[1388154953.32 127.0.0.1:57831->127.0.0.1:4242] 'CMDinfo#\x00\x00\x00\x00'
[1388154953.32 127.0.0.1:4242->127.0.0.1:57831] 'RESinfo#\x00\x00\x00\x00\x04\x00\x00\x00info'
[1388154953.32 127.0.0.1:57831->127.0.0.1:4242] 'CMDstats#\x00\x00\x00\x00'
[1388154953.32 127.0.0.1:4242->127.0.0.1:57831] 'RESstats#\x00\x00\x00\x00\x05\x00\x00\x00stats'
[1388154953.32 127.0.0.1:57831->127.0.0.1:4242] 'CMDauthentify#\n\x00\x00\x00aStrongPwd'
[1388154953.32 127.0.0.1:4242->127.0.0.1:57831] 'RESauthentify#\x00\x00\x00\x00\x00\x00\x00\x00'
[1388154953.32 127.0.0.1:57831->127.0.0.1:4242] 'CMDencrypt#\x06\x00\x00\x00abcdef'
[1388154953.32 127.0.0.1:4242->127.0.0.1:57831] "RESencrypt#\x00\x00\x00\x00\x06\x00\x00\x00$ !&'$"
[1388154953.32 127.0.0.1:57831->127.0.0.1:4242] "CMDdecrypt#\x06\x00\x00\x00$ !&'$"
[1388154953.32 127.0.0.1:4242->127.0.0.1:57831] 'RESdecrypt#\x00\x00\x00\x00\x06\x00\x00\x00abcdef'
[1388154953.33 127.0.0.1:57831->127.0.0.1:4242] 'CMDbye#\x00\x00\x00\x00'
[1388154953.33 127.0.0.1:4242->127.0.0.1:57831] 'RESbye#\x00\x00\x00\x00\x00\x00\x00\x00'
[1388154953.31 127.0.0.1:57831->127.0.0.1:4242] 'CMDidentify#\x04\x00\x00\x00fred'
[1388154953.31 127.0.0.1:4242->127.0.0.1:57831] 'RESidentify#\x00\x00\x00\x00\x00\x00\x00\x00'
[1388154953.31 127.0.0.1:57831->127.0.0.1:4242] 'CMDinfo#\x00\x00\x00\x00'
(...)
Regroup messages in a symbol and do a format partitionment with a delimiter
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
According to a quick review of the displayed messages, the character '#' sounds interesting as i appears in the middle of each message. So let's use it as a delimiter::
symbol = Symbol(messages=messages)
Format.splitDelimiter(symbol, ASCII("#"))
print("[+] Symbol structure:")
print(symbol._str_debug())
print("[+] Partitionned messages:")
print(symbol)
We now obtain the following symbol (i.e. our goup of messages) structure::
[+] Symbol structure:
Symbol
|-- Field-0
|-- Alt
|-- Data (Raw='RESstats' ((0, 64)))
|-- Data (Raw='RESauthentify' ((0, 104)))
|-- Data (Raw='RESidentify' ((0, 88)))
|-- Data (Raw='CMDstats' ((0, 64)))
|-- Data (Raw='CMDdecrypt' ((0, 80)))
|-- Data (Raw='CMDauthentify' ((0, 104)))
|-- Data (Raw='RESdecrypt' ((0, 80)))
|-- Data (Raw='RESinfo' ((0, 56)))
|-- Data (Raw='CMDinfo' ((0, 56)))
|-- Data (Raw='RESauthentify' ((0, 104)))
|-- Data (Raw='CMDencrypt' ((0, 80)))
|-- Data (Raw='CMDauthentify' ((0, 104)))
|-- Data (Raw='CMDstats' ((0, 64)))
|-- Data (Raw='RESbye' ((0, 48)))
|-- Data (Raw='RESdecrypt' ((0, 80)))
|-- Data (Raw='RESencrypt' ((0, 80)))
|-- Data (Raw='CMDidentify' ((0, 88)))
|-- Data (Raw='CMDbye' ((0, 48)))
|-- Data (Raw='RESinfo' ((0, 56)))
|-- Data (Raw='RESencrypt' ((0, 80)))
|-- Data (Raw='RESidentify' ((0, 88)))
|-- Data (Raw='CMDidentify' ((0, 88)))
|-- Data (Raw='CMDencrypt' ((0, 80)))
|-- Data (Raw='RESbye' ((0, 48)))
|-- Data (Raw='CMDinfo' ((0, 56)))
|-- Data (Raw='CMDbye' ((0, 48)))
|-- Data (Raw='CMDdecrypt' ((0, 80)))
|-- Data (Raw='RESstats' ((0, 64)))
|-- Field-sep-23
|-- Data (ASCII=# ((0, 8)))
|-- Field-2
|-- Alt
|-- Data (Raw='\x04\x00\x00\x00fred' ((0, 64)))
|-- Data (Raw='\x00\x00\x00\x00\x00\x00\x00\x00' ((0, 64)))
|-- Data (Raw='\x00\x00\x00\x00\x05\x00\x00\x00stats' ((0, 104)))
|-- Data (Raw='\n\x00\x00\x00aStrongPwd' ((0, 112)))
|-- Data (Raw='\x00\x00\x00\x00\x00\x00\x00\x00' ((0, 64)))
|-- Data (Raw='\x00\x00\x00\x00' ((0, 32)))
|-- Data (Raw='\x00\x00\x00\x00\x00\x00\x00\x00' ((0, 64)))
|-- Data (Raw='\x00\x00\x00\x00\x00\x00\x00\x00' ((0, 64)))
|-- Data (Raw='\x00\x00\x00\x00' ((0, 32)))
|-- Data (Raw='\x06\x00\x00\x00abcdef' ((0, 80)))
|-- Data (Raw='\x00\x00\x00\x00\x04\x00\x00\x00info' ((0, 96)))
|-- Data (Raw='\n\x00\x00\x00123456test' ((0, 112)))
|-- Data (Raw='\x00\x00\x00\x00\x00\x00\x00\x00' ((0, 64)))
|-- Data (Raw='\x00\x00\x00\x00\n\x00\x00\x00123456test' ((0, 144)))
|-- Data (Raw='\x07\x00\x00\x00Roberto' ((0, 88)))
|-- Data (Raw="\x00\x00\x00\x00\x06\x00\x00\x00$ !&'$" ((0, 112)))
|-- Data (Raw="\x00\x00\x00\x00\n\x00\x00\x00spqvwt6'16" ((0, 144)))
|-- Data (Raw="\x06\x00\x00\x00$ !&'$" ((0, 80)))
|-- Data (Raw='\x00\x00\x00\x00\x05\x00\x00\x00stats' ((0, 104)))
|-- Data (Raw='\x00\x00\x00\x00' ((0, 32)))
|-- Data (Raw="\n\x00\x00\x00spqvwt6'16" ((0, 112)))
|-- Data (Raw='\t\x00\x00\x00myPasswd!' ((0, 104)))
|-- Data (Raw='\x00\x00\x00\x00' ((0, 32)))
|-- Data (Raw='\x00\x00\x00\x00\x04\x00\x00\x00info' ((0, 96)))
|-- Data (Raw='\x00\x00\x00\x00\x06\x00\x00\x00abcdef' ((0, 112)))
|-- Data (Raw='\x00\x00\x00\x00' ((0, 32)))
|-- Data (Raw='\x00\x00\x00\x00' ((0, 32)))
|-- Data (Raw='\x00\x00\x00\x00\x00\x00\x00\x00' ((0, 64)))
Regarding the partitioned messages, this now looks like this::
<pre><code class="bash">
'CMDidentify' | '#' | '\x07\x00\x00\x00Roberto'
'RESidentify' | '#' | '\x00\x00\x00\x00\x00\x00\x00\x00'
'CMDinfo' | '#' | '\x00\x00\x00\x00'
'RESinfo' | '#' | '\x00\x00\x00\x00\x04\x00\x00\x00info'
'CMDstats' | '#' | '\x00\x00\x00\x00'
'RESstats' | '#' | '\x00\x00\x00\x00\x05\x00\x00\x00stats'
'CMDauthentify' | '#' | '\n\x00\x00\x00aStrongPwd'
'RESauthentify' | '#' | '\x00\x00\x00\x00\x00\x00\x00\x00'
'CMDencrypt' | '#' | '\x06\x00\x00\x00abcdef'
'RESencrypt' | '#' | "\x00\x00\x00\x00\x06\x00\x00\x00$ !&'$"
'CMDdecrypt' | '#' | "\x06\x00\x00\x00$ !&'$"
'RESdecrypt' | '#' | '\x00\x00\x00\x00\x06\x00\x00\x00abcdef'
'CMDbye' | '#' | '\x00\x00\x00\x00'
'RESbye' | '#' | '\x00\x00\x00\x00\x00\x00\x00\x00'
'CMDidentify' | '#' | '\x04\x00\x00\x00fred'
(...)
Cluster according to a key field
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The first field seems interesting, as it contains some kind of commands ('CMDencrypt', 'CMDidentify', etc.). Let's thus cluster the symbol according to the first field::
symbols = Format.clusterByKeyField(symbol, symbol.fields[0])
print("[+] Number of symbols after clustering: {0}".format(len(symbols)))
print("[+] Symbol list:")
for keyFieldName, s in symbols.items():
print(" * {0}".format(keyFieldName))
The clustering algorithm produces 14 different symbols, where each symbol has a uniq value in the first field.::
[+] Number of symbols after clustering: 14
[+] Symbol list:
* RESdecrypt
* RESbye
* RESidentify
* CMDbye
* RESencrypt
* CMDidentify
* RESstats
* CMDencrypt
* RESauthentify
* CMDdecrypt
* CMDinfo
* CMDauthentify
* RESinfo
* CMDstats
Apply a format partitionment with a sequence alignment on the third field of each symbol
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
As the last field seems to have a dynamic size, let's have a look at what would provide a sequence alignment (i.e. a means to align static and dynamic sub-fields)::
for symbol in symbols.values():
Format.splitAligned(symbol.fields[2], doInternalSlick=True)
print("[+] Partitionned messages:")
print(symbol)
For the symbol 'CMDencrypt', the sequence alignment of the last field produces the following format, where we can observe a static field of '\x00\x00\x00' surrounded by two variable fields. The last field seems to be the buffer we want to encrypt, as the key field name suggest (i.e. 'CMDencrypt').::
(...)
[+] Partitionned messages:
'CMDencrypt' | '#' | '\n' | '\x00\x00\x00' | '123456test'
'CMDencrypt' | '#' | '\x06' | '\x00\x00\x00' | 'abcdef'
(...)
Find field relations in each symbol
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Let's now find any relationships is those messages::
for symbol in symbols.values():
rels = RelationFinder.findOnSymbol(symbol)
print("[+] Relations found: ")
for rel in rels:
print(" " + rel["relation_type"] + ", between '" + rel["x_attribute"] + "' of:")
print(" " + str('-'.join([f.name for f in rel["x_fields"]])))
p = [v.getValues()[:] for v in rel["x_fields"]]
print(" " + str(p))
print(" " + "and '" + rel["y_attribute"] + "' of:")
print(" " + str('-'.join([f.name for f in rel["y_fields"]])))
p = [v.getValues()[:] for v in rel["y_fields"]]
print(" " + str(p))
In the symbol 'CMDencrypt', we have found a relationship between the content of a field (the third one) and the length of another field (the last one, which presumably contains the buffer we want to encrypt).::
(...)
[+] Relations found:
SizeRelation, between 'value' of:
Field
[['\n', '\x06']]
and 'size' of:
Field
[['123456test', 'abcdef']]
(...)
Find relations and apply them in the symbol structure
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
We then modify the format message to apply the relationship we have just found, by creating a Size field whose value depends on the content of a targeted field. We also specify a factor that basically says that the value of the size field should be one eighth of the size of the buffer field (as every field size is expressed in bits by default)::
for symbol in symbols.values():
rels = RelationFinder.findOnSymbol(symbol)
for rel in rels:
# Apply first found relationship
rel = rels[0]
rel["x_fields"][0].domain = Size(rel["y_fields"], factor=1/8.0)
print("[+] Symbol structure:")
print(symbol._str_debug())
The 'CMDencrypt' symbol structure now looks like this::
(...)
[+] Symbol structure:
Symbol_CMDencrypt
|-- Field-0
|-- Data (ASCII=CMDencrypt ((0, 80)))
|-- Field-sep-23
|-- Data (ASCII=# ((0, 8)))
|-- Field-2
|-- Data (Raw=None ((0, None)))
|-- |-- Field
|-- Size(['Field']) - Type:Raw=None ((8, 8))
|-- |-- Field
|-- Data (Raw='\x00\x00\x00' ((0, 24)))
|-- |-- Field
|-- Data (Raw=None ((0, 80)))
(...)
That is all for the message format inference. Let's now look at the state machine of this toy protocol.
Generate a chained states automaton
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
We will generate a basic automaton that illustrates the sequence of commands and responses extracted from a PCAP file. For each message sent, this will create a new transition to a new state, thus the name of *chained states automaton*::
# Create a session of messages
session = Session(messages_session1)
# Abstract this session according to the inferred symbols
abstractSession = session.abstract(list(symbols.values()))
# Generate an automata according to the observed sequence of messages/symbols
automata = Automata.generateChainedStatesAutomata(abstractSession, list(symbols.values()))
# Print the dot representation of the automata
dotcode = automata.generateDotCode()
print(dotcode)
The obtained automaton is finally converted into Dot code in order to render a graphical version of it.::
digraph G {
"Start state" [shape=doubleoctagon, style=filled, fillcolor=white, URL="f8d33b83-d6b0-4180-832c-7cce9d6b3fea"];
"State 1" [shape=ellipse, style=filled, fillcolor=white, URL="a332ed56-e2d8-4c8c-9ec2-99c5f942e9a3"];
"State 2" [shape=ellipse, style=filled, fillcolor=white, URL="8f45bd4e-fe03-4a26-bf9a-1adec60f597d"];
"State 3" [shape=ellipse, style=filled, fillcolor=white, URL="01999e79-de00-467d-987a-e9411d57be99"];
"State 4" [shape=ellipse, style=filled, fillcolor=white, URL="9b20ed29-77e5-43c1-bb8b-cf3a84674941"];
"State 5" [shape=ellipse, style=filled, fillcolor=white, URL="52ec3815-656b-421b-bb1f-c4f7746be534"];
"State 6" [shape=ellipse, style=filled, fillcolor=white, URL="1cbbd123-32d5-4cd8-bd01-4fd3bcd8ae38"];
"State 7" [shape=ellipse, style=filled, fillcolor=white, URL="8a8ab662-db23-4206-ba35-28396ee31115"];
"State 8" [shape=ellipse, style=filled, fillcolor=white, URL="ee9e0d5d-bb4e-4d2e-8c97-1553afa1cc68"];
"End state" [shape=ellipse, style=filled, fillcolor=white, URL="3874e4e9-af5d-428e-92b8-e1fda38b6ef9"];
"Start state" -> "State 1" [fontsize=5, label="OpenChannelTransition", URL="4beecca4-0d48-4ca9-8d83-ffd8766b64c7"];
"State 1" -> "State 2" [fontsize=5, label="Transition (Symbol_CMDidentify;{Symbol_RESidentify})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 2" -> "State 3" [fontsize=5, label="Transition (Symbol_CMDinfo;{Symbol_RESinfo})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 3" -> "State 4" [fontsize=5, label="Transition (Symbol_CMDstats;{Symbol_RESstats})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 4" -> "State 5" [fontsize=5, label="Transition (Symbol_CMDauthentify;{Symbol_RESauthentify})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 5" -> "State 6" [fontsize=5, label="Transition (Symbol_CMDencrypt;{Symbol_RESencrypt})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 6" -> "State 7" [fontsize=5, label="Transition (Symbol_CMDdecrypt;{Symbol_RESdecrypt})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 7" -> "State 8" [fontsize=5, label="Transition (Symbol_CMDbye;{Symbol_RESbye})", URL="c4e5451c-6a53-41f3-9748-7179774eb7de"];
"State 8" -> "End state" [fontsize=5, label="CloseChannelTransition", URL="c6ac87b7-5de1-401a-8b75-5d2a73d81264"];
}
.. figure:: https://dev.netzob.org/attachments/download/172/automata_target_v1_chained.svg
:align: center
:target: https://dev.netzob.org/attachments/download/172/automata_target_v1_chained.svg
:alt:
Generate a one state automaton
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This time, instead of converting a PCAP into a sequence of states for each message observed, we generate a uniq state that accept any of the observed sent messages to trigger a new transition. In response to each sent message (for example 'CMDencrypt'), we expect a specific response (for example 'REDencrypt')::
# Create a session of messages
session = Session(messages_session1)
# Abstract this session according to the inferred symbols
abstractSession = session.abstract(list(symbols.values()))
# Generate an automata according to the observed sequence of messages/symbols
automata = Automata.generateOneStateAutomata(abstractSession, list(symbols.values()))
# Print the dot representation of the automata
dotcode = automata.generateDotCode()
print(dotcode)
The obtained automaton is finally converted into Dot code in order to render a graphical version of it.::
digraph G {
"Start state" [shape=doubleoctagon, style=filled, fillcolor=white, URL="0659071e-1849-4616-a11a-e98edfe86e24"];
"Main state" [shape=ellipse, style=filled, fillcolor=white, URL="424e0a69-da0b-4030-816a-8368e30a00a9"];
"End state" [shape=ellipse, style=filled, fillcolor=white, URL="9de3d54b-f0eb-45f8-809a-86a60d22812f"];
"Start state" -> "Main state" [fontsize=5, label="OpenChannelTransition", URL="3818118b-97db-474f-b9c3-f38c04152a74"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDidentify;{Symbol_RESidentify})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDinfo;{Symbol_RESinfo})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDstats;{Symbol_RESstats})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDauthentify;{Symbol_RESauthentify})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDencrypt;{Symbol_RESencrypt})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDdecrypt;{Symbol_RESdecrypt})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "Main state" [fontsize=5, label="Transition (Symbol_CMDbye;{Symbol_RESbye})", URL="f6000e04-10a8-41de-a1a0-29021440684a"];
"Main state" -> "End state" [fontsize=5, label="CloseChannelTransition", URL="75a4cc3a-72a4-42a3-af2c-aa3939f899aa"];
}
.. figure:: https://dev.netzob.org/attachments/download/173/automata_target_v1_onestate.svg
:align: center
:target: https://dev.netzob.org/attachments/download/173/automata_target_v1_onestate.svg
:alt:
Generate a PTA-based automaton
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Finally, we convert multiple sequences of messages taken form different PCAP files to generate an automaton for which we have merge identical paths. The underlying merging strategy is called a Prefix-Tree Acceptor::
# Create sessions of messages
messages_session1 = PCAPImporter.readFile("target_src_v1_session1.pcap").values()
messages_session3 = PCAPImporter.readFile("target_src_v1_session3.pcap").values()
session1 = Session(messages_session1)
session3 = Session(messages_session3)
# Abstract this session according to the inferred symbols
abstractSession1 = session1.abstract(list(symbols.values()))
abstractSession3 = session3.abstract(list(symbols.values()))
# Generate an automata according to the observed sequence of messages/symbols
automata = Automata.generatePTAAutomata([abstractSession1, abstractSession3], list(symbols.values()))
# Print the dot representation of the automata
dotcode = automata.generateDotCode()
print(dotcode)
The obtained automaton is finally converted into Dot code in order to render a graphical version of it.::
digraph G {
"Start state" [shape=doubleoctagon, style=filled, fillcolor=white, URL="e46d8a67-2a96-479a-9234-c1b38c75b847"];
"State 0" [shape=ellipse, style=filled, fillcolor=white, URL="0cd8a2c9-4410-45a0-9950-6456546f49dc"];
"State 1" [shape=ellipse, style=filled, fillcolor=white, URL="bbc10d50-f197-40f6-a674-5f80790ef954"];
"State 2" [shape=ellipse, style=filled, fillcolor=white, URL="739801b7-9e0d-4fba-a4f5-cf130e6b7fbf"];
"State 3" [shape=ellipse, style=filled, fillcolor=white, URL="c2075b80-16b9-4bd7-b290-6eb333f94e43"];
"State 4" [shape=ellipse, style=filled, fillcolor=white, URL="715ede75-d81e-46ea-a7c1-f537e5dba892"];
"State 9" [shape=ellipse, style=filled, fillcolor=white, URL="ad5873af-c26a-482f-94d9-0cf47c69376b"];
"State 10" [shape=ellipse, style=filled, fillcolor=white, URL="01859f7d-6b43-45af-8c17-9decb10dea9b"];
"End state 11" [shape=ellipse, style=filled, fillcolor=white, URL="7f4bd693-a35f-479b-8e86-128dc46c71cf"];
"State 5" [shape=ellipse, style=filled, fillcolor=white, URL="ee9da65c-b072-4344-bf71-2d67a3b73880"];
"State 6" [shape=ellipse, style=filled, fillcolor=white, URL="902e76e4-6a9a-45a2-95ba-ae9484f1084f"];
"State 7" [shape=ellipse, style=filled, fillcolor=white, URL="f7e9b27a-6879-4b4f-bb51-00530f07addf"];
"End state 8" [shape=ellipse, style=filled, fillcolor=white, URL="fe710eed-287f-4abf-93bf-6878e487d8a9"];
"Start state" -> "State 0" [fontsize=5, label="OpenChannelTransition", URL="5d6139d0-9b1c-49b2-b19d-91ae8c56f299"];
"State 0" -> "State 1" [fontsize=5, label="Transition (Symbol_CMDidentify;{Symbol_RESidentify})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 1" -> "State 2" [fontsize=5, label="Transition (Symbol_CMDinfo;{Symbol_RESinfo})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 2" -> "State 3" [fontsize=5, label="Transition (Symbol_CMDstats;{Symbol_RESstats})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 3" -> "State 4" [fontsize=5, label="Transition (Symbol_CMDauthentify;{Symbol_RESauthentify})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 4" -> "State 5" [fontsize=5, label="Transition (Symbol_CMDencrypt;{Symbol_RESencrypt})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 4" -> "State 9" [fontsize=5, label="Transition (Symbol_CMDdecrypt;{Symbol_RESdecrypt})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 9" -> "State 10" [fontsize=5, label="Transition (Symbol_CMDbye;{Symbol_RESbye})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 10" -> "End state 11" [fontsize=5, label="CloseChannelTransition", URL="f7ddbccf-93b6-4496-a153-5b2306d95dac"];
"State 5" -> "State 6" [fontsize=5, label="Transition (Symbol_CMDdecrypt;{Symbol_RESdecrypt})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 6" -> "State 7" [fontsize=5, label="Transition (Symbol_CMDbye;{Symbol_RESbye})", URL="a1d2d03d-8c58-4c83-afa1-c40433fbd833"];
"State 7" -> "End state 8" [fontsize=5, label="CloseChannelTransition", URL="f7ddbccf-93b6-4496-a153-5b2306d95dac"];
}
.. figure:: https://dev.netzob.org/attachments/download/174/automata_target_v1_pta.svg
:align: center
:target: https://dev.netzob.org/attachments/download/174/automata_target_v1_pta.svg
:alt:
Generate messages according to the inferred model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
We now have a pretty good knowledge of the format messsage and grammar of the targeted protocol. Let's thus play with this model, by trying to communicate with a real server implementation.
At first, let's start the server in order to discus with it.::
$ cd src_v1/
$ ./server
Ready to read incoming messages
(...)
Then, we create a UDP client that will communicate with the server (on 127.0.0.1:4242) by exchanging messages generated from the infered symbols::
# Create a UDP client instance
channelOut = UDPClient(remoteIP="127.0.0.1", remotePort=4242)
abstractionLayerOut = AbstractionLayer(channelOut, list(symbols.values()))
abstractionLayerOut.openChannel()
# Visit the automata for n iteration
state = automata.initialState
for n in range(8):
state = state.executeAsInitiator(abstractionLayerOut)
We go through 8 iterations in the automaton.::
1454: [INFO] AbstractionLayer:openChannel: Going to open the communication channel...
1454: [INFO] AbstractionLayer:openChannel: Communication channel opened.
1454: [INFO] State:executeAsInitiator: Next transition: Open.
1454: [INFO] AbstractionLayer:openChannel: Going to open the communication channel...
1454: [INFO] AbstractionLayer:openChannel: Communication channel opened.
1454: [INFO] State:executeAsInitiator: Transition 'Open' leads to state: State 1.
1455: [INFO] State:executeAsInitiator: Next transition: Transition.
1455: [INFO] AbstractionLayer:writeSymbol: Going to specialize symbol: 'Symbol_CMDidentify' (id=dbea29b9-7e9f-4c2b-be14-625f675569f3).
1455: [INFO] AbstractionLayer:writeSymbol: Data generated from symbol 'Symbol_CMDidentify': 'CMDidentify#\x03\x00\x00\x00\xfc{\xdb'.
1456: [INFO] AbstractionLayer:writeSymbol: Going to write to communication channel...
1456: [INFO] AbstractionLayer:writeSymbol: Writing to commnunication channel donne..
1456: [INFO] AbstractionLayer:readSymbol: Going to read from communication channel...
1456: [INFO] AbstractionLayer:readSymbol: Received data: ''RESidentify#\x00\x00\x00\x00\x00\x00\x00\x00''
1457: [INFO] AbstractionLayer:readSymbol: Received symbol on communication channel: 'Symbol_RESidentify'
1457: [INFO] Transition:executeAsInitiator: Possible output symbol: 'Symbol_RESidentify' (id=49c24e1c-3751-412e-9f6a-f006a7de7492).
1457: [INFO] State:executeAsInitiator: Transition 'Transition' leads to state: State 2.
1457: [INFO] State:executeAsInitiator: Next transition: Transition.
1457: [INFO] AbstractionLayer:writeSymbol: Going to specialize symbol: 'Symbol_CMDinfo' (id=5eb47a57-eccf-4d06-8231-0b1ae87f96a7).
1458: [INFO] AbstractionLayer:writeSymbol: Data generated from symbol 'Symbol_CMDinfo': 'CMDinfo#\x00\x00\x00\x00'.
1458: [INFO] AbstractionLayer:writeSymbol: Going to write to communication channel...
1458: [INFO] AbstractionLayer:writeSymbol: Writing to commnunication channel donne..
1458: [INFO] AbstractionLayer:readSymbol: Going to read from communication channel...
1458: [INFO] AbstractionLayer:readSymbol: Received data: ''RESinfo#\x00\x00\x00\x00\x04\x00\x00\x00info''
1462: [INFO] AbstractionLayer:readSymbol: Received symbol on communication channel: 'Symbol_RESinfo'
1462: [INFO] Transition:executeAsInitiator: Possible output symbol: 'Symbol_RESinfo' (id=b41502e3-21ea-4cb9-9c1e-dc171f715685).
1462: [INFO] State:executeAsInitiator: Transition 'Transition' leads to state: State 3.
1462: [INFO] State:executeAsInitiator: Next transition: Transition.
(...)
Regarding the real server, we can see that received messages are well formated, as the server is able to parse them and send correct responses.::
$ ./server
Ready to read incoming messages
-> Read: CMDidentify#.
Command: CMDidentify
Arg size: 2
Arg content: ..
<- Send:
Return value: 0
Size of data buffer: 0
Data buffer:
""
-> Read: CMDinfo#
Command: CMDinfo
Arg size: 0
<- Send:
Return value: 0
Size of data buffer: 4
Data buffer:
DATA: 69 6e 66 6f "info"
-> Read: CMDstats#
Command: CMDstats
Arg size: 0
<- Send:
Return value: 0
Size of data buffer: 5
Data buffer:
DATA: 73 74 61 74 73 "stats"
-> Read: CMDauthentify#.
Command: CMDauthentify
Arg size: 6
Arg content: ......
<- Send:
Return value: 0
Size of data buffer: 0
Data buffer:
""
-> Read: CMDencrypt#.
Command: CMDencrypt
Arg size: 2
Arg content: ..
<- Send:
(...)
Do some fuzzing on a specific symbol
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Finally, we voluntarily twist the format message of the 'CMDencrypt' symbol, in order to try some fuzzing. The format modification corresponds to an extention of the size of the buffer field (i.e. the one which receives the data to encrypt)::
def send_and_receive_symbol(symbol):
data = symbol.specialize()
print("[+] Sending: {0}".format(repr(data)))
channelOut.write(data)
data = channelOut.read()
print("[+] Receiving: {0}".format(repr(data)))
# Update symbol definition to allow a broader payload size
symbols["CMDencrypt"].fields[2].fields[2].domain = Raw(nbBytes=(10, 120))
for i in range(10):
send_and_receive_symbol(symbols["CMDencrypt"])
We can see that Netzob is only sending CMDencrypt messages with a potentially long last field::
[+] Sending: 'CMDencrypt#6\x00\x00\x00&\xe0*\xb3\xa8A(\x0b\xd2yA\xb5\xb8\rw\x0fGi\xee\xb3\xd6\xb0<\xfc\xc0\xa7m\xbd\xbc\xde2~\xceE\xe5\xda@\xd4\xed\xed\xf2\xb4\xe7\t\xfbC\xbf\x05\xc6\xce\xfb\x83\xf2\x00'
(...)
In the server part, we quickly get a segmentation fault, due to a bug in the parsing of the last field.::
$ gdb ./server
(gdb) run
Starting program: /home/fgy/travaux/netzob/git/netzob-resources/experimentations/tutorial_target/src_v1/server
Ready to read incoming messages
(...)
-> Read: CMDencrypt#6
Command: CMDencrypt
Arg size: 54
Arg content: &?*??A(
wGi???<???m???2~?E??@???????? ?C??
Program received signal SIGSEGV, Segmentation fault.
0x08048bc0 in api_encrypt (in=0x45ce7e32 <Address 0x45ce7e32 out of bounds>, len=3561020133, out=0xb4f2eded <Address 0xb4f2eded out of bounds>) at amo_api.c:80
80 tmpData[i] = (in[i] ^ key) % 0xff;
That's all folks for this introduction tutorial. You can get the entire `source code <https://dev.netzob.org/attachments/download/183/inference_target_src_v1.py>`_ of the script used to infer and play with the protocol:
We invite you to read the API documentation or talk with us on IRC (#netzob on Freenode) if you have any question.

View File

@@ -0,0 +1,308 @@
.. currentmodule:: netzob
.. _tutorial_get_started:
Getting started with Netzob
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The goal of this tutorial is to present the usage of each main component
of Netzob (inference of message format, construction of the state
machine and generation of traffic) through an undocumented protocol.
You can download the protocol material here :
- `Protocol
PCAP <https://dev.netzob.org/attachments/132/target_protocol.pcap>`_
: contains messages of the targeted protocol ;
- `Protocol
implementation <https://dev.netzob.org/attachments/127/target_protocol.tar.gz>`_
: provide the server and client implementation of the protocol.
You can follow the tutorial with only the PCAP file. But, you will need
the implementation if you want to generate traffic and allow Netzob to
discuss with a real implementation.
Setting the Workspace
^^^^^^^^^^^^^^^^^^^^^
Just after installing Netzob, when you start it, you have to set the
workspace directory (as in Eclipe).
.. figure:: https://dev.netzob.org/attachments/119/tuto_workspace.png
:align: center
:alt:
**Side note:** in Netzob, a workspace can be defined as a collection
of projects and of configuration properties. The directory which
host the workspace contains directories and files which includes
configuration files (workspace.xml), the set of projects (directory
projects) and other configuration resources (logging, traces, ...).
When creating a new workspace, Netzob will generate the necessary
workspace files based on templates. The directory "projects"
includes a directory for each created project. You can specify the
workspace on the command line (using the option "-w <path to the
workspace>" when executing Netzob. Otherwise, it will read the user
file located at "~/.netzob" to find out which workspace was lastly
used. If none, Netzob will ask you at startup where the workspace
is.
Your first project
^^^^^^^^^^^^^^^^^^
To create a project, navigate to the menu ``File`` > ``New project``.
Here, you can choose a project name which should be unique in the
workspace.
**Side note:** by default, Netzob chooses a location inside a
dedicated directory located in the "projects" directory of your
current workspace. The newly created project is automatically
selected which allow you to start working on it.
You can switch to another project at anytime through the use of the menu
``File`` > ``Open project from workspace``. Do not forget to save your
project before!
Capture traces
^^^^^^^^^^^^^^
The first step in the inferring process of a protocol in Netzob is to
capture and to import messages as samples. There are different methods
to retrieve messages depending of the communication channel used (files,
network, IPC, USB, etc.) and the format (PCAP, hex, raw binary flows,
etc.).
For this tutorial, you can import network messages with the provided
PCAP file. But we recommand to use the provided implementation to
generate samples of traffic and capture them with Netzob. You can do
this with the Netwok Capturer plugin, which is accessible in the menu
``File`` > ``Capture messages`` > ``Capture network traffic``.
.. figure:: https://dev.netzob.org/attachments/113/tuto_capture-small.png
:align: center
:target: https://dev.netzob.org/attachments/106/tuto_capture.png
:alt:
As shown in the picture, you have to launch the capture at the Layer 4
on the localhost ``lo`` interface. As the targeted protocol works over
UDP, you'll be able to capture only the UDP payloads. Then launch the
server of the targeted protocol and then the client. This one will send
different commands to the server and wait for the response.
Once you have captured one session, you have to select the messages you
want to import (you should import everything) and click the Import
messages button. A popup will ask you if you want to allow duplicate
messages. It's better to not do so, to avoid unnecessary messages. We
recommend to repeat this import process 4 times, in order to have enough
variation between messages.
Infer vocabulary
^^^^^^^^^^^^^^^^
Let's now start the inference of the message format (vocabulary).
The next picture shows the whole vocabulary inference interface and the
intended meaning of each component.
.. figure:: https://dev.netzob.org/attachments/120/tuto_voca_ui_small.png
:align: center
:target: https://dev.netzob.org/attachments/123/tuto_voca_ui.png
:alt:
The main window shows each message in raw hexadecimal format. You can
play with visualization attributes : right click on the symbol, then
select Visualization and the attribute you want to change (hex, decimal
or even string format, the unit size and potentially the sign and
endianness).
The following picture shows the rendering of the messages in hex format
(on the left) and string format (on the right). You can then see that
messages contain some interesting strings (``api_identify``,
``api_encrypt``, ``api_decrypt``, etc.).
.. figure:: https://dev.netzob.org/attachments/128/tuto_messages-small.png
:align: center
:target: https://dev.netzob.org/attachments/129/tuto_messages.png
:alt:
You can use the filter functionality to display messages that contain a
specific pattern. Here, we filter with the ``api_identify`` pattern.
.. figure:: https://dev.netzob.org/attachments/107/tuto_messages3-small.png
:align: center
:target: https://dev.netzob.org/attachments/101/tuto_messages3.png
:alt:
This filter permits to easily retrieve the messages associated with a
potential identification command.
You can see that a '``#``\ ' character is present in each messages. You
can try to split the messages by forcing their partitioning with a
specific delimiter. To do so, use the Force partitioning functionality
available in the symbol list (either with a right click on a symbol, or
by selecting a symbol with its checkbox and then clicking on the Force
partitioning button right above).
.. figure:: https://dev.netzob.org/attachments/117/tuto_force_partitioning.png
:align: center
:alt:
Using the '``#``\ ' string delimiter, you'll have the following result:
.. figure:: https://dev.netzob.org/attachments/130/tuto_force_part_result_small.png
:align: center
:target: https://dev.netzob.org/attachments/131/tuto_force_part_result.png
:alt:
You may also want to play with Sequence alignment. This partitioning
enables message alignment according to their common patterns.
After playing with the different partitioning available, you are able
to retrieve the different commands associated with the targeted
protocol, as shown on the following picture.
.. figure:: https://dev.netzob.org/attachments/109/tuto_symboles-small.png
:align: center
:target: https://dev.netzob.org/attachments/104/tuto_symboles.png
:alt:
According to the name of the commands, you can see that a
``api_encrypt`` command is available. Let's have a look at its message
format, which looks like:
::
[command]#[dataToEncrypt][padding]
Netzob enables you to indicate that a specific field has a mutable
content, which means its data is not fixed (such as the '#' delimiter)
nor part of a set of fixed elements (such as the command string).To
specify the structure of a field and its attributes, right click on a
field and select Edit Variable. A popup dialog displays a rooted tree
that corresponds to the inferred structure of the field. For example,
you should have all the observed values of the field (materialized
through DataVariable leafs) under an AlternateVariable node variable.
Regarding the targeted protocol, as we want to allow any data for the
current field, we first have to delete the ``AlternateVariableNode`` and
modify the root node to a ``DataVariable`` that has a mutable behavior,
as shown on the following picture.
.. figure:: https://dev.netzob.org/attachments/115/tuto_variable-small.png
:align: center
:target: https://dev.netzob.org/attachments/105/tuto_variable.png
:alt:
You can visualize the associated message format on bottom-left corner.
Its should display something like this:
.. figure:: https://dev.netzob.org/attachments/110/tuto_variable2-small.png
:align: center
:target: https://dev.netzob.org/attachments/97/tuto_variable2.png
:alt:
Now that we have refined the ``api_encrypt`` command message, we have to
do the same for other commands that also take as parameter a user data:
``api_identify``, ``api_authentify`` and ``api_decrypt``, but also for
some response messages such as ``resp_decrypt`` and ``resp_encrypt``.
At this time, you have a satisfactory approximation the vocabulary. You
can now start to construct the state machine of the protocol.
Infer Grammar
^^^^^^^^^^^^^
In this tutorial, we won't explain the automatic inference (learning) of
the state machine. As the targeted protocol has a basic state machine,
we will simply show how to model it in Netzob.
A basic state machine contains states and transitions. In Netzob, we use
a complex structure to model the grammar of a protocol. This model
enables information's specification such as the response time between an input
symbol and an output symbol, or even the probability of the different
output messages given an uniq input message. This model is called an
SMMDT (Stochastic Mealy Machine with Deterministic Transitions).
The grammar perspective interface of Netzob enables the creation of:
- states (initial or not);
- semi-stochastic transitions (i.e. "normal" transitions);
- open channel transitions;
- close channel transitions.
.. figure:: https://dev.netzob.org/attachments/118/tuto_grammar_buttons.png
:align: center
:alt:
Regarding our targeted protocol, we construct the associated model with
the following information:
- 1 open channel transition and an initial state;
- 1 close channel transition and a final state;
- 4 main states: init, identified, authenticated, closed;
- depending on the current state, we are able or not to launch certain
commands;
- some commands will trigger transitions (``api_identify``,
``api_authentify`` and ``api_bye``).
Once modeled, this looks like:
.. figure:: https://dev.netzob.org/attachments/114/tuto_grammar-small.png
:align: center
:target: https://dev.netzob.org/attachments/116/tuto_grammar.png
:alt:
Now that Netzob knows both the vocabulary and the grammar of the
targeted protocol, we are able to generate traffic that respect the
protocol model.
Generate traffic
^^^^^^^^^^^^^^^^
Let's go to the Simulator perspective of Netzob.
The simulator provides either client creation, server or both.
You can tell Netzob to talk with a real client or server implementation,
or you can just launch a client and a server inside Netzob and let them
talk together.
.. figure:: https://dev.netzob.org/attachments/121/tuto_simu_ui_small.png
:align: center
:target: https://dev.netzob.org/attachments/122/tuto_simu_ui.png
:alt:
Let's now create a client. We have to specify the following information:
- **client name**;
- **initiator** or not (i.e. who opens the communication channel ?): it
will usally be yes for a client;
- **client or server side**: client;
- **protocol**: UDP for te targeted protocol;
- **bind IP**: nothing here, as the client finds its own interface;
- **bind port**: nothing here, as the client finds its own port;
- **target IP**: 127.0.0.1;
- **target port**: 4242.
Now start the real server implementation, select the client in Netzob
and click the Start button on the top-right corner. This will generate
and send commands to the real server, and you'll be able to see the
exchanged messages in the interface, as shown on the following picture.
.. figure:: https://dev.netzob.org/attachments/108/tuto_simu-small.png
:align: center
:target: https://dev.netzob.org/attachments/99/tuto_simu.png
:alt:
After this introductive tutorial, we'll be glade to have feedbacks and
to `help you <http://www.netzob.org/community>`_ (see our mailing list
`user@lists.netzob.org <mailto:user@lists.netzob.org>`_ or ou IRC
channel #netzob on Freenode).
If you want to go further and `start
contributing <http://www.netzob.org/development>`_ to Netzob, that is
perfect. There are many simple or complex tasks everyone can do:
translation, documentation, bug fix, feature proposal or implementation.

View File

@@ -0,0 +1,255 @@
.. currentmodule:: netzob
.. _tutorial_modeling_protocol:
Modeling your Protocol with Netzob
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This tutorial details the main features of Netzob's protocol modeling
aspects. It shows how your protocol fields can be described with Netzob's
language.
The first thing to know is that a Netzob protocol model is entirely made of python code. Naturally, this code relies on Netzob's classes and methods. Thus, following this tutorial requires an installed version of ``Netzob (>=1.0)`` and your favorite python editor.
Initial Settings
^^^^^^^^^^^^^^^^
First step will be to create a directory that will hold our python source file.
For example, create the temporary ``/tmp/netzob`` directory and initiate the executable python file ``/tmp/netzob/tutorial.py``::
/$ mkdir /tmp/netzob
/$ cd /tmp/netzob
/tmp/netzob$ touch tutorial.py
/tmp/netzob$ chmod +x tutorial.py
Along with the traditional python shebang, imports the netzob library::
#!/usr/bin/env python
from netzob.all import *
Executing this file should return the following::
/tmp/netzob$ ./tutorial.py
Warning: FastBinaryTree not available, using Python version BinaryTree.
Warning: FastAVLTree not available, using Python version AVLTree.
Warning: FastRBTree not available, using Python version RBTree.
If an error related to the netzob import is returned, check the installation process you followed to install netzob.
Modeling the Protocol Vocabulary
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In Netzob, the vocabulary of a protocol consists in a list of symbols.
A symbol represents all the messages that share a similar objectif from a protocol perspective. For example, the HTTP_GET symbol describes any HTTP request with the method GET being set.
A symbol is made of a succession of fields and an optional name::
>>> s = Symbol(name="MySymbol", fields = [field1, field2])
A symbol can be **specialized** into a context-valid message and a message can be **abstracted** into a symbol.
A field describes a chunk of the symbol and is defined by a definition domain::
>>> field1 = Field(name="MyField1", domain=domainOfField1)
>>> field2 = Field(name="MyField2", domain=domainOfField2)
A definition domain describes the set of values its field accepts. To support complex domains, a definition domain is represented by a tree where each vertices is a variable. Thus it exists two kind of variables, *Leaf variables* that accept no children and *Node variables* that accept one or more children variables.
**Leaf variables** are the simplest variables. It exists four kinds of leaf variables.
A *Data Variables* describes a data which value is of a given type. Various types are provided with Netzob:
* *ASCII* : an ASCII string (see class :class:`ASCII <netzob.Common.Models.Types.ASCII>`)
Example of a field that only accepts the "netzob" ASCII string::
>>> field = Field(ASCII("netzob"))
>>> field.specialize()
"netzob"
Example of a field that only accepts ASCII strings of five characters::
>>> field = Field(ASCII(nbChars=5))
>>> field.specialize()
zorjf
Exemple of a field that only accepts ASCII strings made of 5 to 10 characters::
>>> field = Field(ASCII(nbChars=(5, 10)))
>>> field.specialize()
jfozkp
>>> field.specialize()
nckrphjj
* *Decimal* : a decimal number (see class :class:`Decimal <netzob.Common.Models.Types.Decimal>`)
Similarly to the ASCII type, a Decimal data can be constrained by a specific value::
>>> field = Field(Decimal(20))
>>> field.specialize()
'\x14'
A decimal variable also accepts a range of valid values::
>>> field = Field(Decimal(interval=(10, 100)))
>>> field.specialize()
'\xda\x82'
>>> field.specialize()
'\xd6\xca'
* *Raw* : a sequence of bytes (see class :class:`Raw <netzob.Common.Models.Types.Raw>`)
Example of a field that accepts a specific sequence of bytes::
>>> field = Field(Raw('\x00\x01\x02\x03'))
>>> repr(field.specialize())
"'\\x00\\x01\\x02\\x03'"
Example of a field that accepts any sequence of ten bytes::
>>> field = Field(Raw(nbBytes=10))
>>> field.specialize()
't)\x99\x8a\x02>\xd1\x91y\x9b'
* *BitArray* : a sequence of bits (see class :class:`BitArray <netzob.Common.Models.Types.BitArray>`)
Example of a field that accepts 3 to 10 bits::
>>> field = Field(BitArray(nbBits=(3, 10))
>>> field.specialize()
'\xbe@'
* *IPv4* : an IPv4 raw address (see class :class:`IPv4 <netzob.Common.Models.Types.IPv4>`)
Example of a field that only accepts an IPv4 address::
>>> field = Field(IPv4())
>>> field.specialize()
'\x86\x89\\\xac'
Example of a field that only accepts an IPv4 address that belongs to the network 192.168.0.0/24::
>>> field = Field(IPv4(network='192.168.0.0/24'))
>>> field.specialize()
'\xc0\xa8\x00\x0b'
Along with Data variables, the definition domain of a field can embed the definition of relationships. Two kinds of relationships are supported in Netzob; intra-symbol relationships and inter-symbol relationships. The former denotes a relationship between the size or the value of a variable, and another field in the same symbol. The latter one denotes a relationship with a field of another symbol. Currently, three kinds of relationships are supported.
* A *Size Relationship* that describes a data whose value is the size of another field.
The size field can be declared before the targeted field in the same symbol::
>>> payloadField = Field(Raw(nbBytes=(5, 10)))
>>> sizeField = Field(Size(payloadField))
>>> s = Symbol([sizeField, payloadField])
>>> s.specialize()
'\x08\xac\xa4\xb8\x93\x8d\x83\x95%' # size = 8
>>> s.specialize()
'\x05\xff\xef\x93\x07\xd7' # size = 5
The size field can also be declared after the targeted field in the same symbol::
>>> payloadField = Field(Raw(nbBytes=(5, 10)))
>>> sizeField = Field(Size(payloadField))
>>> s = Symbol([payloadField, sizeField])
>>> s.specialize()
'n\\\x82\x84`\x00\x13\x9f\x08' # size = 8
>>> s.specialize()
'\xe7\xc4\xde\xbd\x18\x05' " size = 5
An optional "factor" and "offset" can be applied to the value of the computed size::
>>> payloadField = Field(Raw(nbBytes=(5, 10)))
>>> sizeField = Field(Size(payloadField, offset=1))
>>> s = Symbol([sizeField, payloadField])
>>> s.specialize()
'\x07\xfb+K\xf4N\x99' # size = 6 + 1 (offset)
More details and examples of Size relationships can be found in its API doc :class:`Size <netzob.Common.Models.Vocabulary.Domain.Variables.Leafs.Size>`.
* A *Value Relationship* is very similar to the size relationship except that the relationship applies on the value of the targeted field.
For example, a symbol can be made of three fields, the former being a random sequence of 5 bytes, the second a simple ASCII delimitor (':') while the latest shares the same value than the first field::
>>> f1 = Field(Raw(nbBytes=5))
>>> f2 = Field(ASCII(':'))
>>> f3 = Field(Value(f1))
>>> s = Symbol(fields=[f1, f2, f3])
>>> s.specialize()
'\x0f\x01ShS:\x0f\x01ShS'
>>> s.specialize()
'6H\xf9\x84\xc4:6H\xf9\x84\xc4'
More details and examples of Value relationships can be found in its API doc :class:`Size <netzob.Common.Models.Vocabulary.Domain.Variables.Leafs.Size>`.
* A *Checksum Variable* describes a data whose value is the IP checksum of one or more other fields.
The following example, illustrates the creation of an ICMP Echo request packet with a valid checksum represented on two bytes computed on-the-fly::
>>> typeField = Field(name="Type", domain=Raw('\\x08'))
>>> codeField = Field(name="Code", domain=Raw('\\x00'))
>>> chksumField = Field(name="Checksum")
>>> identField = Field(name="Identifier", domain=Raw('\\x1d\\x22'))
>>> seqField = Field(name="Sequence Number", domain=Raw('\\x00\\x07'))
>>> timeField = Field(name="Timestamp", domain=Raw('\\xa8\\xf3\\xf6\\x53\\x00\\x00\\x00\\x00'))
>>> headerField = Field(name="header")
>>> headerField.fields = [typeField, codeField, chksumField, identField, seqField, timeField]
>>> dataField = Field(name="Payload", domain=Raw(nbBytes=(5, 10)))
>>> chksumField.domain = Checksum([headerField, dataField], "InternetChecksum", dataType=Raw(nbBytes=2))
>>> s = Symbol(fields = [headerField, dataField])
>>> s.specialize()
'\\x08\\x00\x9d\xda\\x1d\\x22\\x00\\x07\\xa8\\xf3\\xf6\\x53\\x00\\x00\\x00\\x00\xec6\xf4\x98\xee' # checksum = \\xda\\x1d
**Leaf Variables** can be combined into a tree model to produce much more complex definition domains. To achieve this, **Node Variables** can be used to construct complex definition domains made of a succession of variables, an alternative of variables or a repetition of variables.
* The *Aggregation Node Variable* can be used to model a succession of variables.
For example, a field that accepts an ASCII string of 10 characters followed by 2 bytes (see :class:`Agg <netzob.Common.Models.Vocabulary.Domain.Variables.Nodes.Agg>`)::
>>> domainOfField = Agg([ ASCII(nbChars=10), Raw(nbBytes=2) ])
>>> field = Field(domainOfField)
>>> repr(field.specialize())
"'VLAuxPd0A0\\x86M'"
* The *Alternate Node Variable* can be used to model an alternative of multiple variables (OR).
For example, in the following models a field either accepts the ASCII value "hello" or any ASCII string of 10 to 15 characters (see :class:`Alt <netzob.Common.Models.Vocabulary.Domain.Variables.Nodes.Alt>`) ::
>>> field = Field(Alt([ ASCII("hello"), ASCII(nbChars=(10, 15)) ]))
>>> repr(field.specialize())
"'hello'"
>>> repr(field.specialize())
"'Zm7D3Ade9K'"
* The *Repeat Node Variable* can be used to model a repetition of a variable.
For example, in the following models a field accepts between 1 and 4 repetitions of the ASCII string "netzob" (see :class:`Repeat <netzob.Common.Models.Vocabulary.Domain.Variables.Nodes.Repeat>`) ::::
>>> field = Field(Repeat(ASCII("netzob"), nbRepeat=(1, 4)))
>>> repr(field.specialize())
"'netzob'"
>>> repr(field.specialize())
"'netzobnetzobnetzob'"
Node variables can be combined to produce complex definition domains. For example, the following models a field that either accept an ASCII string that starts by the letter "n" or a random IPv4 address::
>>> field = Field( Alt([ Agg([ASCII('n'), ASCII()]), Agg([ IPv4() ])]) )
>>> repr(field.specialize())
"'nlPj66'"
>>> repr(field.specialize())
"'aI\\xe4\\xc5'"

View File

@@ -0,0 +1,329 @@
.. currentmodule:: netzob
.. _tutorial_peach:
Auto generation of Peach pit files/fuzzers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Principle
^^^^^^^^^
`Peach <http://peachfuzzer.com>`_ is an open-source framework of
fuzzing. It provides API to create smart fuzzers adapted to the tester's
needs through XML configuration files called `*Peach pit
files* <http://peachfuzzer.com/PeachPit>`_.
Making such files needs knowledge of the format message and state
machine of the targeted protocol as well as the actor Peach has to fuzz.
Fortunately, Netzob provides means for reverse engineering of undocumented and
proprietary protocols from provided traces in a semi-automatic way.
Netzob provides an exporter plugin for Peach that can transform
the inferred data model and state machine of a targeted protocol into a
Peach pit file automatically.
This tutorial shows how to take advantage of the Peach exporter plugin
provided in Netzob to automatically construct Peach pit configuration
files.
Prerequisite
^^^^^^^^^^^^
You need Netzob in version 0.4.1 or above.
This tutorial assumes that the user have previously followed the
`Getting started with
Netzob <http://www.netzob.org/resources/tutorial_get_started>`_ tutorial
and have a complete Netzob project (or at least some format messages).
The protocol implementation contains several vulnerabilities that should
be detected during fuzzing.
Moreover it assumes that the user has Peach 2.3.8 installed.
Export
^^^^^^
To export the project go in ``File`` > ``Export the project`` >
``Peach pit file``. The window below should appears :
.. figure:: https://dev.netzob.org/attachments/download/134
:align: center
:alt:
The window is composed of three panels. The left one lists all fuzzer
available. They differ on the state representation. There are three
kinds of fuzzer available:
- "Randomized state order fuzzer": one state is created for each
symbols of Netzob and at each step, the fuzzer changes of state for a
randomly chosen one.
- "Randomized transitions stateful fuzzer": one state is created for
each symbols of Netzob and the transitions between these states are
based on those Netzob allows, weight by their probability.
- "One-state fuzzer": one state is created corresponding to the chosen
symbol.
When the fuzzer is on a particular state, it sends fuzzed data that
corresponds to the associated symbol to the target. Choose one of them.
The right panel shows the fuzzer. It gives the user a small idea of what
he is doing and what changes between two configurations.
The bottom panel has two options:
- The first options ``Fuzzing based on`` tells on which Netzob data
model the fuzzing is based:
- "Variable": use the Netzob variables to make Peach data models. It
makes more fuzzy but less smart fuzzer.
- "Regex": use the Netzob Regex (which are displayed on the top of
the symbol visualization), it is the simplest solution.
- The second options ``Mutate static fields`` tells if the static
fields in the Netzob data model are fuzzed or not.
The ``Export`` button exports the fuzzer into a user defined file.
Use this fuzzer into Peach\ <#Use-this-fuzzer-into-Peach>`_
Export this fuzzer directly through the ``Export`` button to a file
named "test.xml" into the directory of Peach. It should create a
PeachzobAddons.py file, which is essential for Peach to leverage Netzob
capabilities as "fixup".
The "test.xml" file should look like this. Look closely to the few XML
comments.
::
<?xml version="1.0" encoding="utf-8"?>
<Peach xmlns="http://phed.org/2008/Peach" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://phed.org/2008/Peach /peach/peach.xsd">
<Include ns="default" src="file:defaults.xml"/>
<Import import="PeachzobAddons"/>
<DataModel name="dataModel 1">
<Blob name="Field 0_0" valueType="hex" value="6170695f"/>
<Blob name="Field 1_0" valueType="hex">
<Fixup class="PeachzobAddons.Or">
<Param name="values" value="Blob,696e666f; Blob,7374617473"/>
</Fixup>
</Blob>
<Blob name="Field 2_0" valueType="hex" value="2300000000000000000000000000000000000000000000"/>
<Blob name="Field 3_0" valueType="hex" value="00"/>
</DataModel>
<DataModel name="dataModel 2">
<Blob name="Field 0_0" valueType="hex" value="6170695f62796523000000000000000000000000000000000000000000000000"/>
</DataModel>
<DataModel name="dataModel 3">
<Blob name="Field 0_0" valueType="hex" value="6170695f6964656e746966792366726564000000000000000000000000000000"/>
</DataModel>
<DataModel name="dataModel 4">
<Blob name="Field 0_0" valueType="hex" value="6170695f61757468656e74696679236d79506173737764210000000000000000"/>
</DataModel>
<DataModel name="dataModel 5">
<Blob name="Field 0_0" valueType="hex" value="6170695f656e6372797074233132333435367465737400000000000000000000"/>
</DataModel>
<DataModel name="dataModel 6">
<Blob name="Field 0_0" valueType="hex" value="6170695f64656372797074237370717677743627313600000000000000000000"/>
</DataModel>
<DataModel name="dataModel 7">
<Blob name="Default-1_0" valueType="hex" value="00000000"/>
<Blob name="Default-2-1_0" valueType="hex" value="23"/>
<Blob name="Default-2-2-1-1_0" valueType="hex">
<Fixup class="PeachzobAddons.Or">
<Param name="values" value="Blob,00000000000000; Blob,00000004000000; Blob,00000005000000; Blob,0000000a000000; Blob,64000000000000; Blob,8b04080a000000"/>
</Fixup>
</Blob>
<Blob name="Default-2-2-1-2_0" valueType="hex">
<Fixup class="PeachzobAddons.Or">
<Param name="values" value="Blob,00000000000000000000; Blob,31323334353674657374; Blob,696e666f000000000000; Blob,73707176777436273136; Blob,73746174730000000000"/>
</Fixup>
</Blob>
<Blob name="Default-2-2-2_0" valueType="hex" value="00000000000000000000"/>
</DataModel>
<DataModel name="dataModel 9">
<Blob name="Field 0">
<Fixup class="PeachzobAddons.RandomField">
<Param name="minlen" value="0"/>
<Param name="maxlen" value="1024"/>
<Param name="type" value="Blob"/>
</Fixup>
</Blob>
</DataModel>
<StateModel initialState="state 0" name="stateModel">
<State name="state 0">
<Action ref="state 1" type="changeState" when="random.randint(1,8)==1"/>
<Action ref="state 2" type="changeState" when="random.randint(1,7)==1"/>
<Action ref="state 3" type="changeState" when="random.randint(1,6)==1"/>
<Action ref="state 4" type="changeState" when="random.randint(1,5)==1"/>
<Action ref="state 5" type="changeState" when="random.randint(1,4)==1"/>
<Action ref="state 6" type="changeState" when="random.randint(1,3)==1"/>
<Action ref="state 7" type="changeState" when="random.randint(1,2)==1"/>
<Action ref="state 9" type="changeState"/>
</State>
<State name="state 1">
<Action type="output">
<DataModel ref="dataModel 1"/>
<Data name="data"/>
</Action>
</State>
<State name="state 2">
<Action type="output">
<DataModel ref="dataModel 2"/>
<Data name="data"/>
</Action>
</State>
<State name="state 3">
<Action type="output">
<DataModel ref="dataModel 3"/>
<Data name="data"/>
</Action>
</State>
<State name="state 4">
<Action type="output">
<DataModel ref="dataModel 4"/>
<Data name="data"/>
</Action>
</State>
<State name="state 5">
<Action type="output">
<DataModel ref="dataModel 5"/>
<Data name="data"/>
</Action>
</State>
<State name="state 6">
<Action type="output">
<DataModel ref="dataModel 6"/>
<Data name="data"/>
</Action>
</State>
<State name="state 7">
<Action type="output">
<DataModel ref="dataModel 7"/>
<Data name="data"/>
</Action>
</State>
<State name="state 9">
<Action type="output">
<DataModel ref="dataModel 9"/>
<Data name="data"/>
</Action>
</State>
</StateModel>
<Agent name="DefaultAgent">
<!--Todo: Configure the Agents.-->
</Agent>
<Test name="DefaultTest">
<!--Todo: Enable Agent <Agent ref="TheAgent"/> -->
<StateModel ref="stateModel"/>
<Publisher class="udp.Udp">
<Param name="host" value="127.0.0.1"/>
<Param name="port" value="4242"/>
</Publisher>
<Publisher class="udp.Udp">
<Param name="host" value="127.0.0.1"/>
<Param name="port" value="10000"/>
</Publisher>
<!--The Netzob project has several simulator actors, so this file have several publishers. Choose one of them and remove the others.-->
</Test>
<Run name="DefaultRun">
<!--Todo: Configure the run.-->
<Logger class="logger.Filesystem">
<Param name="path" value="logs"/>
</Logger>
<Test ref="DefaultTest"/>
</Run>
</Peach>
This tutorial will not talk about Peach agents but configuring one of
them could be useful. In the Test block, there is as many publishers as
the Netzob simulator has actors. One publisher is needed, remove the
others. If there is no publishers, create one according to the model
above. On this example, the tester remove the second publisher.
Launch the fuzzing
^^^^^^^^^^^^^^^^^^
You first have to start the targeted server:
::
./server
Assuming that the user exports the "test.xml" file into the Peach
directory, you can now start the fuzzer:
::
python peach.py test.xml
After few seconds, you should trigger a segfault or a stack smashing
detection.
::
-> Read: api_identify#fred
Command: api_identify
Arg: fred
<- Send:
Return value: 0
Size of data buffer: 13
Data buffer:
DATA: 72 65 73 70 5f 69 64 65 6e 74 69 66 79 "resp_identify"
-> Read: api_identify#f
Command: api_identify
Arg: f
*** stack smashing detected ***: ./server terminated
======= Backtrace: =========
/lib/i386-linux-gnu/libc.so.6(__fortify_fail+0x45)[0xcec045]
/lib/i386-linux-gnu/libc.so.6(+0x103ffa)[0xcebffa]
./server[0x8048a3c]
./server[0x8048eb4]
./server[0x8048985]
/lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf3)[0xc014d3]
./server[0x8048831]
======= Memory map: ========
00289000-0028a000 r-xp 00000000 00:00 0 [vdso]
002fb000-00317000 r-xp 00000000 08:03 2605207 /lib/i386-linux-gnu/libgcc_s.so.1
00317000-00318000 r--p 0001b000 08:03 2605207 /lib/i386-linux-gnu/libgcc_s.so.1
00318000-00319000 rw-p 0001c000 08:03 2605207 /lib/i386-linux-gnu/libgcc_s.so.1
00bb4000-00bd4000 r-xp 00000000 08:03 673152 /lib/i386-linux-gnu/ld-2.15.so
00bd4000-00bd5000 r--p 0001f000 08:03 673152 /lib/i386-linux-gnu/ld-2.15.so
00bd5000-00bd6000 rw-p 00020000 08:03 673152 /lib/i386-linux-gnu/ld-2.15.so
00be8000-00d8b000 r-xp 00000000 08:03 672879 /lib/i386-linux-gnu/libc-2.15.so
00d8b000-00d8c000 ---p 001a3000 08:03 672879 /lib/i386-linux-gnu/libc-2.15.so
00d8c000-00d8e000 r--p 001a3000 08:03 672879 /lib/i386-linux-gnu/libc-2.15.so
00d8e000-00d8f000 rw-p 001a5000 08:03 672879 /lib/i386-linux-gnu/libc-2.15.so
00d8f000-00d92000 rw-p 00000000 00:00 0
08048000-0804a000 r-xp 00000000 08:03 6488874 /home/sygus/travaux/netzob/target_protocol/server
0804a000-0804b000 r--p 00001000 08:03 6488874 /home/sygus/travaux/netzob/target_protocol/server
0804b000-0804c000 rw-p 00002000 08:03 6488874 /home/sygus/travaux/netzob/target_protocol/server
09e0d000-09e2e000 rw-p 00000000 00:00 0 [heap]
b778b000-b778c000 rw-p 00000000 00:00 0
b77a8000-b77ac000 rw-p 00000000 00:00 0
bf90f000-bf930000 rw-p 00000000 00:00 0 [stack]
Abandon (core dumped)

View File

@@ -0,0 +1,139 @@
.. currentmodule:: netzob
.. _tutorial_wireshark:
Export Wireshark dissectors
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Principle
^^^^^^^^^
`Wireshark <http://www.wireshark.org>`_ is an open-source packet
analyzer able to identify protocols and to highlight fields from the
data stream. Its main drawback is that it is only usefull on
documented/standard protocols. Within Netzob, which achieves
semi-automatic reverse engineering of protocols, we have developed an
exporter plugin that provides automatic generation of Wireshark dissectors
from proprietary or undocumented protocols. Dissectors are built in
`LUA <http://wiki.wireshark.org/Lua>`_ programming language.
Netzob provides a powerful datamodel in which fields are described with
the following information:
- Regular expression (fixed or dynamic size)
- Name (textual representation)
- Format
- Size
- Endianness
- Signing
All this information is gathered to generate a script including a
dissector used by Wireshark.
Language
^^^^^^^^
Wireshark can be statically extended with C modules similar to core
dissectors. Optionally, Wireshark can be configured to embed a LUA
interpretor. For modularity purposes, the Lua engine has been chosen
to extend Wireshark with Netzob generated dissectors.
Prerequisite
^^^^^^^^^^^^
You need Netzob in version 0.4.1 or above. The wireshark exporter
functionality is provided as a netzob core plugin (which is included in
the 0.4.1 version).
This tutorial assumes that the user have previously inferred the
specification of the targeted protocol. An example of protocol inference
is avaibale in the `Getting started with
Netzob <http://www.netzob.org/resources/tutorial_get_started>`_
tutorial.
Usage
^^^^^
#. Check that Wireshark supports Lua
.. figure:: http://wiki.wireshark.org/Lua?action=AttachFile&do=get&target=lua-about.png
:align: center
:alt:
#. Select a project
Given a partitioned symbol in a project you can generate a wireshark
dissector using the Export project menu item, then by selecting
Wireshark.
.. figure:: https://dev.netzob.org/attachments/158/2012-10-25-173314_1595x647_scrot_small.png
:align: center
:target: https://dev.netzob.org/attachments/82/2012-10-25-173314_1595x647_scrot.png
:alt:
.. figure:: https://dev.netzob.org/attachments/159/2012-10-25-180841_1552x731_scrot_small.png
:align: center
:target: https://dev.netzob.org/attachments/83/2012-10-25-180841_1552x731_scrot.png
:alt:
You should get a popup with the LUA script automatically generated:
.. figure:: https://dev.netzob.org/attachments/161/2012-10-30-180554_987x807_scrot_small.png
:align: center
:target: https://dev.netzob.org/attachments/94/2012-10-30-180554_987x807_scrot.png
:alt:
#. Import into wireshark
Two methods are available:
- Evaluate the Lua script in a Wireshark instance.
In wireshark, select ``Tools > Lua > Evaluate`` and paste the
generated code.
- Start wireshark with a specific Lua script.
Start wireshark with the following parameters:
``wireshark -X lua_script:PATH_OF_LUA_SCRIPT``
This will automatically import the Lua script on start.
#. Dissect data packets
Within the lower panel of Wireshark, you should get the dissected packets:
.. figure:: https://dev.netzob.org/attachments/160/2012-10-25-182017_956x1041_scrot_small.png
:align: center
:target: https://dev.netzob.org/attachments/85/2012-10-25-182017_956x1041_scrot.png
:alt:
Limitations
^^^^^^^^^^^
Variable size fields cannot be easily exported to the datamodel used by
Wireshark when we don't know the expected size. In this case, an error
message will popup preventing Netzob from generating the dissector. If
this happen, you have to complete the protocol model in order to find
the expected size of the dynamic field.
Improvements
^^^^^^^^^^^^
These ideas could be use to enhance dissection:
- Use relations (field / size, repeat ...)
- Look at future bitfield core implementation
What next ?
^^^^^^^^^^^
After this tutorial, we'll be glade to have feedbacks and to help you
(see our mailing list
`user@lists.netzob.org <mailto:user@lists.netzob.org>`_ or our IRC
channel #netzob on Freenode).
If you want to go further and `start contributing to
Netzob <http://www.netzob.org/development#becomecontributor>`_, that's
perfect. There are many simple or complex tasks everyone can do:
translation, documentation, bug fix, feature proposal or implementation.

View File

@@ -0,0 +1,72 @@
.. currentmodule:: netzob
.. _import:
Importing Data
==============
Communication protocols can be found is every parts of a system, as shown on the following picture:
.. image:: netzob_comprot.png
:width: 750px
:alt: Payload extraction
Netzob can handle multiple kinds of input data. Hence, you can analyze network traffic, IPC communications, files structures, etc.
Import can either be done by using a dedicated captor or by providing already captured messages in a specific format.
Current accepted formats are:
* PCAP files
* Structured files
* Netzob XML files (used by Netzob for its internal representation of messages)
Current supported captors are:
* Network captor, based on the XXX library
* Intra Process communication captor (API calls), based on API hooking
* Inter Process Communication captor (pipes, shared memory and local sockets), based on system call hooking
Imported messages are manipulated by Netzob through specific Python
objects which contains metadata that describes contextual parameters
(timestamp or even IP source/destination for example). All the Python
object that describe messages derived from an abstract object :
AbstractMessage.
The next part of this section details the composition of each message
object.
AbstractMessage
---------------
All the messages inherits from this definition and therefore has the following parameters :
* a unique ID
* a data field represented with an array of hex
NetworkMessage
--------------
A network message is defined with the following parameters :
* a timestamp
* the ip source
* the ip target
* the protocol (TCP/UDP/ICMP...)
* the layer 4 source port
* the layer 4 target port
Definition of a NetworkMessage :
FileMessage
--------------
A file message is defined with the following parameters :
* a filename
* the line number in the file
* the creation date of the file
* the last modification date of the file
* the owner of the file
* the size of the file

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

View File

@@ -0,0 +1,12 @@
.. currentmodule:: netzob
.. _grammar:
Grammar inference
#################
Identification of the automata of the protocol
**********************************************
Fields dependencies with messages of previous states
****************************************************

View File

@@ -0,0 +1,73 @@
.. currentmodule:: netzob
.. _inference:
Protocol inference
==================
Definition of a communication protocol
--------------------------------------
A communication protocol is as language. A language is defined
through~:
* its vocabulary (the set of valid words or, in our context, the set
of valid messages) ;
* its grammar (the set of valid sentences which, in our context, can
be represented as a protocol state machine, like the TCP state
machine).
A word of the vocabular is called a symbol. A symbol represents an
abstract view of a set of similar messages. Similar messages refer to
messages having the same semantic (for example, a TCP SYN message, a
SMTP HELLO message, an ICMP ECHO REQUEST message, etc.).
A symbol is structured following a format, which specifies a sequence
of fields (like the IP format). A field can be splitted into
sub-fields. For example, a payload is a field of a TCP
message. Therefore, by defining a layer as a kind of payload (which is
a specific field), we can retrieve the so-called Ethernet, IP, TCP and
HTTP layers from a raw packet ; each layer having its own vocabular
and grammar.
Field's size can be fixed or variable.
Field's content can be static of dynamic.
Field's content can be basic (a 32 bits integer) or complex (an array).
A field has four attributes~:
* the type defines its definition domain or set of valid values (16 bits integer, string, etc.) ;
* the data description defines the structuration of the field (ASN.1, TSN.1, EBML, etc.) ;
* the data encoding defines ... (ASCII, little endian, big endian, XML, EBML, DER, XER, PER, etc.) ;
* the semantic defines ... (IP address, port number, URL, email, checksum, etc.).
Field's content can be~:
* static ;
* dependant of another field (or a set of fields) of the same message (intra-message dependency) ;
* dependant of a field (or a set of fields) of a previous message in the grammar (inter-message dependency) ;
* dependant of the environment ;
* dependant of the application behaviour (which could depend on the user behaviour) ;
* random (the initial value of the TCP sequence number for example).
Modelization in Netzob
----------------------
Netzob provides a framework for the semi-automated modelization (inference) of communication protocols, i.e. inferring its vocabular and grammar.
* Vocabular inference
* Message structure inference (based on sequence alignment)
* Regoupment of similar message structures
* Field type inference
* Field dependencies from the same message and from the environment
* Field semantic inference
* Grammar inference
* Identification of the automata of the protocol
* Fields dependencies with messages of previous states
All the functionalities of the framework are detailled in this chapter.
.. toctree::
:maxdepth: 2
vocabular
grammar

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,182 @@
.. currentmodule:: netzob
.. _vocabular:
Vocabular inference
###################
Structure inference
*******************
Regoupment of similar structures
********************************
Options during alignment process
================================
* "read-only” process (do not require a participation in the
communication).
* Identify the fixed and dynamic fields of all the messages.
* Regroups equivalent messages depending of their field structures.
* Clustering (Regroups equivalent messages using) :
* an UPGMA Algorithm to regroup similar messages
* an openMP and MPI implementation
* Sequencing, Alignment (Identification of fields in messages) :
* Needleman & Wunsch Implementation
Needleman and Wunsch algorithm
==============================
* Originaly a bio-informatic algorithm (sequencing DNA)
* Align two messages and identify common patterns and field structure
* Computes an alignment score representing the efficiency of the
alignment
The following picture shows the sequence alignment of two messages.
.. image:: ExampleOfAligning.png
:alt: Example of sequence alignment
UPGMA algorithm
===============
* Identify equivalent messages based on their alignment score.
* Build a hierarchical organization of the messages with the UPGMA
algorithm (Unweighted Pair Group Method with Arithmetic Mean)
The following picture shows a regroupment of similar messages based on the result of the clustering process.
.. image:: ExampleOfMultipleAlignment.png
:alt: Example of clustering
Abstraction of a set of message
===============================
The abstraction is the process of substituting the dynamic fields with their representation as a regex. An example of abstraction is shown on the follinw picture.
.. image:: message_abstraction.png
:alt: Example of message abstraction
Analyses after alignment process
================================
aaa
Message contextual menu
=======================
aaa
Group contextual menu
=====================
aaa
Refine regexes
==============
aaa
Slick regexes
=============
aaa
Concatenate
===========
aaa
Split column
============
aaa
Merge columns
=============
aaa
Delete message
==============
aaa
Field type inference
********************
Visualization options
=====================
aaa
Type structure contextual menu
==============================
aaa
Messages distribution
=====================
This function shows a graphical representation of the distribution of bytes per offset for each message of the current group. This function helps to identify entropy variation of each fields. Entropy variation combined with byte distribution help the user to infer the field type.
[INCLUDE GRAPH]
Data typing
===========
* Primary types : binary, ascii, num, base64...
* Definition domain, unique elements and intervals
* Data carving (tar gz, png, jpg, ...)
* Semantic data identification (emails, IP ...)
Domain of definition
====================
aaa
Change type representation
==========================
aaa
Field dependencies from the same message and from the environment
*****************************************************************
Fields dependancies identification
==================================
* Length fields and associated payloads
* Encapsulated messages identifications
And from the environment...
Payload extraction
==================
The function "Find Size Fields", as its name suggests, is dedicated to find fields that contain any length value as well as the associated payload. It does this on each group. Netzob supports different encoding of the size field : big and little endian binary values are supported through size of 1, 2 and 4 bytes. The algorithm used to find the size fields and their associated payloads is desribed in the table XXX.
[INCLUDE ALGORITHM]
The following picture represents the application of the function on a trace example. It shows the automated extraction of the IP and UDP payloads from an Ethernet frame.
.. image:: payload_extraction.png
:alt: Payload extraction
Field semantic inference
************************
Data carving
============
Data carving is the process of extracting semantic information from fields or messages. Netzob can extract the following semantic information :
* URL
* email
* IP address
[INCLUDE FIGURE]
Search
======
aaa
Properties
==========
aaa

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB