#24 Evolves kiskadee models, in order to support save several analysis, made by different analyzers, in a same package version
Merged 6 years ago by athoscr. Opened 6 years ago by davidcarlos.

file modified
+1 -1
@@ -10,7 +10,7 @@ 

      `sources` is the absolute path for the uncompressed package. Returns

      a analysis results.

      """

-     volume = {sources: {'bind': '/src', 'mode': 'ro'}}

+     volume = {sources: {'bind': '/src', 'mode': 'Z'}}

      client = docker.from_env(version='auto')

      return client.containers.run(analyzer, '/src', volumes=volume,

                                   stdout=True, stderr=True, tty=True)

file modified
+1 -1
@@ -33,7 +33,7 @@ 

      if (analyzer_module):

          with open(file_to_parse, 'r') as f:

              analysis_instance = analyzer_module.parse_file(f)

-             firehose_tree = analysis_instance.to_xml_bytes()

+             firehose_tree = str(analysis_instance.to_xml_bytes())

  

      shutil.rmtree(tempdir)

  

file modified
+3
@@ -1,6 +1,7 @@ 

  """Provide kiskadee Database operations."""

  

  import kiskadee

+ from kiskadee.model import Base

  from sqlalchemy import create_engine, orm

  

  
@@ -10,6 +11,8 @@ 

      def __init__(self):

          """Return a Database object with SQLAlchemy session and engine."""

          self.engine = self._create_engine()

+         Base.metadata.create_all(self.engine)

+         Base.metadata.bind = self.engine

          self.session = self._create_session(self.engine)

  

      def _create_engine(self):

file modified
+24 -3
@@ -2,7 +2,7 @@ 

  

  from sqlalchemy.ext.declarative import declarative_base

  from sqlalchemy import Column, Integer, UnicodeText, UniqueConstraint,\

-                        Sequence, Unicode, ForeignKey, Boolean, orm

+                        Sequence, Unicode, ForeignKey, orm

  

  Base = declarative_base()

  
@@ -46,8 +46,29 @@ 

                  Sequence('versions_id_seq', optional=True), primary_key=True)

      number = Column(Unicode(100), nullable=False)

      package_id = Column(Integer, ForeignKey('packages.id'), nullable=False)

-     has_analysis = Column(Boolean)

-     analysis = Column(UnicodeText)

+     analysis = orm.relationship('Analysis', backref='versions')

      __table_args__ = (

              UniqueConstraint('number', 'package_id'),

              )

+ 

+ 

+ class Analyzer(Base):

+     """Abstraction of a static analyzer."""

+ 

+     __tablename__ = 'analyzers'

+     id = Column(Integer,

+                 Sequence('analyzers_id_seq', optional=True), primary_key=True)

+     name = Column(Unicode(255), nullable=False, unique=True)

+     version = Column(Unicode(255), nullable=True)

+     analysis = orm.relationship('Analysis', backref='analyzers')

+ 

+ 

+ class Analysis(Base):

+     """Abstraction of a package analysis."""

+ 

+     __tablename__ = 'analysis'

+     id = Column(Integer,

+                 Sequence('analysis_id_seq', optional=True), primary_key=True)

+     version_id = Column(Integer, ForeignKey('versions.id'), nullable=False)

+     analyzer_id = Column(Integer, ForeignKey('analyzers.id'), nullable=False)

+     raw = Column(UnicodeText)

file modified
+7 -15
@@ -10,7 +10,7 @@ 

  import kiskadee.database

  import kiskadee.runner

  import kiskadee.queue

- from kiskadee.model import Package, Plugin, Version, Base

+ from kiskadee.model import Package, Plugin, Version

  

  RUNNING = True

  
@@ -18,10 +18,9 @@ 

  class Monitor:

      """Provide kiskadee monitoring objects."""

  

-     def __init__(self):

+     def __init__(self, _session):

          """Return a non initialized Monitor."""

-         self.engine = None

-         self.session = None

+         self.session = _session

  

      def initialize(self):

          """Start all threads related to the monitoring process.
@@ -36,11 +35,6 @@ 

              the analysis will never be performed. You can use thee decorator

              `@kiskadee.queue.package_enqueuer` to easiliy enqueue a package.

          """

-         database = kiskadee.database.Database()

-         self.engine = database.engine

-         self.session = database.session

-         Base.metadata.create_all(self.engine)

-         Base.metadata.bind = self.engine

          _start(self.monitor)

          plugins = kiskadee.load_plugins()

          for plugin in plugins:
@@ -86,8 +80,7 @@ 

          _package = Package(name=pkg['name'],

                             plugin_id=_plugin.id)

          _version = Version(number=pkg['version'],

-                            package_id=_package.id,

-                            has_analysis=False)

+                            package_id=_package.id)

          _package.versions.append(_version)

          self.session.add(_package)

          kiskadee.logger.debug("Saving package in db: {}".format(str(pkg)))
@@ -106,8 +99,7 @@ 

              if(pkg['plugin'].Plugin().

                 compare_versions(pkg['version'], current_pkg_version) == 1):

                  _new_version = Version(number=pkg['version'],

-                                        package_id=_pkg.id,

-                                        has_analysis=False)

+                                        package_id=_pkg.id)

                  _pkg.versions.append(_new_version)

                  self.session.add(_pkg)

                  self.session.commit()
@@ -150,9 +142,9 @@ 

  def daemon():

      """Entry point to the monitor module."""

      # TODO: improve with start/stop system

-     monitor = Monitor()

+     session = kiskadee.database.Database().session

+     monitor = Monitor(session)

      p = Process(target=monitor.initialize())

      p.daemon = True

      p.start()

      p.join()

-     # cleanup goes here

file modified
+127 -64
@@ -8,6 +8,7 @@ 

  import kiskadee.database

  import kiskadee.util

  import kiskadee.converter

+ from kiskadee.model import Analyzer

  

  running = True

  
@@ -19,79 +20,141 @@ 

      :func:`analyze` method, passing the dequeued package. After the analysis,

      updates the status of this package on the database.

      """

-     kiskadee.logger.debug("Starting runner component")

-     database = kiskadee.database.Database()

-     engine = database.engine

-     session = database.session

-     kiskadee.model.Base.metadata.create_all(engine)

-     kiskadee.model.Base.metadata.bind = engine

+     kiskadee.logger.debug('Starting runner component')

+     session = kiskadee.database.Database().session

+     create_analyzers(session)

      while running:

          if not kiskadee.queue.is_empty():

              kiskadee.logger.debug('RUNNER: dequeuing...')

-             package = kiskadee.queue.dequeue_analysis()

+             source_to_analysis = kiskadee.queue.dequeue_analysis()

              kiskadee.logger.debug('RUNNER: dequeued %s-%s from %s'

-                                   % (package['name'],

-                                      package['version'],

-                                      package['plugin'].__name__))

-             analysis_reports = analyze(package)

-             if analysis_reports:

-                 kiskadee.logger.debug('RUNNER: Saving analysis %s' %

-                                       str(package))

-                 all_analyses = '\n'.join(analysis_reports)

-                 pkg = (session.query(kiskadee.model.Package).

-                        filter(kiskadee.model.Package.name == package['name']).

-                        first())

-                 pkg.versions[-1].has_analysis = True

-                 pkg.versions[-1].analysis = all_analyses

-                 session.add(pkg)

-                 session.commit()

-                 kiskadee.logger.debug('RUNNER: DONE running analysis')

-             else:

-                 kiskadee.logger.debug('RUNNER: Something went wrong')

-                 kiskadee.logger.debug('RUNNER: could not generate analysis')

+                                   % (source_to_analysis['name'],

+                                      source_to_analysis['version'],

+                                      source_to_analysis['plugin'].__name__))

+ 

+             call_analyzers(source_to_analysis, session)

  

  

- def analyze(package):

-     """Run each analyzer on a package.

+ def call_analyzers(source_to_analysis, session):

+     """Iterate over the package analyzers.

  

-     The package dict is in the queue. The keys are:

-         plugin: the plugin module itself

-         name: the package name

-         version: the package version

-         path: plugin default path for packages

-         return: list with firehose reports

+     For each analyzer defined to analysis the source, call

+     the function :func:`analyze`, passing the source dict, the analyzer

+     to run the analysis, and the path to a compressed source.

      """

-     plugin = package['plugin'].Plugin()

+     plugin = source_to_analysis['plugin'].Plugin()

+     source_path = _path_to_uncompressed_source(

+             source_to_analysis, plugin

+     )

+     analyzers = plugin.analyzers()

+     for analyzer in analyzers:

+         firehose_report = analyze(

+                 source_to_analysis, analyzer, source_path

+         )

+         _save_source_analysis(

+                 source_to_analysis, firehose_report, analyzer, session

+         )

+ 

+     session.commit()

+ 

+ 

+ def analyze(source_to_analysis, analyzer, source_path):

+     """Run each analyzer on a source_to_analysis.

+ 

+     The `source_to_analysis` dict is in the queue. The keys are:

+         - plugin: the plugin module itself

+         - name: the package name

+         - version: the package version

+         - path: plugin default path for packages

+         - return: list with firehose reports

+     The `analyzer` is the name of a static analyzer already created on the

+     database.

+     The `source_path` is the absolute path to a compressed source, returned

+     by the :func:`_path_to_uncompressed_source`.

+     """

+     if source_path is None:

+         return None

+ 

+     with kiskadee.util.chdir(source_path):

+             kiskadee.logger.debug('ANALYSIS: running {} ...'.format(analyzer))

+             try:

+                 analysis = kiskadee.analyzers.run(analyzer, source_path)

+                 firehose_report = kiskadee.converter.to_firehose(analysis,

+                                                                  analyzer)

+                 kiskadee.logger.debug(

+                         'ANALYSIS: DONE {} analysis'.format(analyzer)

+                 )

+                 return firehose_report

+             except Exception as err:

+                 kiskadee.logger.debug('RUNNER: could not generate analysis')

+                 kiskadee.logger.debug(err)

+         # TODO: remove compressed/uncompressed files after the analysis

+ 

+ 

+ def _save_source_analysis(source_to_analysis, analysis, analyzer, session):

+ 

+     if analysis is None:

+         return None

+ 

+     source_name = source_to_analysis['name']

+     source_version = source_to_analysis['version']

+ 

+     kiskadee.logger.debug(

+         "Saving analysis of {} on package {}-{}"

+         .format(analyzer, source_name, source_version)

+     )

+     package = (

+             session.query(kiskadee.model.Package)

+             .filter(kiskadee.model.Package.name == source_name).first()

+     )

+     version_id = package.versions[-1].id

+     _analysis = kiskadee.model.Analysis()

+     try:

+         _analyzer = session.query(kiskadee.model.Analyzer).\

+             filter(kiskadee.model.Analyzer.name == analyzer).first()

+         _analysis.analyzer_id = _analyzer.id

+         _analysis.version_id = version_id

+         _analysis.raw = analysis

+         session.add(_analysis)

+     except Exception as err:

+         kiskadee.logger.debug(

+             "The required analyzer was not registered in kiskadee"

+         )

+         kiskadee.logger.debug(err)

+ 

+ 

+ def _path_to_uncompressed_source(package, plugin):

      kiskadee.logger.debug(

              'ANALYSIS: Downloading {} '

              'source...'.format(package['name'])

      )

-     compressed_source = plugin.get_sources(package)

-     if compressed_source:

-         kiskadee.logger.debug('ANALYSIS: Downloaded!')

-         kiskadee.logger.debug('ANALYSIS: Unpacking...')

-         reports = []

-         path = tempfile.mkdtemp()

-         shutil.unpack_archive(compressed_source, path)

-         with kiskadee.util.chdir(path):

-             kiskadee.logger.debug('ANALYSIS: Unpacked!')

-             analyzers = plugin.analyzers()

-             for analyzer in analyzers:

-                 kiskadee.logger.debug('ANALYSIS: running %s ...' % analyzer)

-                 try:

-                     analysis = kiskadee.analyzers.run(analyzer, path)

-                     firehose_report = kiskadee.converter.to_firehose(

-                             analysis, analyzer

-                     )

-                     reports.append(str(firehose_report))

-                     kiskadee.logger.debug(

-                             'ANALYSIS: DONE running %s' % analyzer

-                     )

-                 except:

-                     kiskadee.logger.debug(

-                             'ERROR: Could not run analysis inside container'

-                     )

-             # TODO: remove compressed/uncompressed files after the analysis

-         return reports

-     else:

-         kiskadee.logger.debug('RUNNER: invalid source dict')

+     try:

+         compressed_source = plugin.get_sources(package)

+     except Exception as err:

+         kiskadee.logger.debug('RUNNER: invalid compressed source')

+         return None

+ 

+     kiskadee.logger.debug('ANALYSIS: Downloaded!')

+     kiskadee.logger.debug('ANALYSIS: Unpacking...')

+     path = tempfile.mkdtemp()

+     shutil.unpack_archive(compressed_source, path)

+     kiskadee.logger.debug('ANALYSIS: Unpacked!')

+     return path

+ 

+ 

+ def create_analyzers(_session):

+     """Create the analyzers on database.

+ 

+     The kiskadee analyzers are defined on the section `analyzers` of the

+     kiskadee.conf file. The `_session` argument represents a sqlalchemy

+     session.

+     """

+     list_of_analyzers = dict(kiskadee.config._sections["analyzers"])

+     for name, version in list_of_analyzers.items():

+         if not (_session.query(Analyzer).filter(Analyzer.name == name).

+                 filter(Analyzer.version == version).first()):

+             new_analyzer = kiskadee.model.Analyzer()

+             new_analyzer.name = name

+             new_analyzer.version = version

+             _session.add(new_analyzer)

+     _session.commit()

@@ -1,33 +0,0 @@ 

- import os

- from unittest import TestCase

- 

- from kiskadee.runner import analyze

- import kiskadee.plugins.debian

- 

- 

- class TestAnalyzers(TestCase):

- 

-     def setUp(self):

-         self.plugin = kiskadee.plugins.debian.Plugin()

-         self.deb_pkg = {'name': 'test',

-                         'version': '1.0.0',

-                         'plugin': kiskadee.plugins.debian

-                         }

- 

-     def test_run_analyzer(self):

- 

-         def mock_get_sources(arg1, arg2):

-             base_path = os.path.dirname(os.getcwd())

-             return ''.join([base_path,

-                             '/kiskadee/kiskadee/tests/test_source/'

-                             'test_source.tar.gz'])

- 

-         kiskadee.plugins.debian.Plugin.get_sources = mock_get_sources

-         self.deb_pkg = {'name': 'test',

-                         'version': '1.0.0',

-                         'plugin': kiskadee.plugins.debian

-                         }

- 

-         result = analyze(self.deb_pkg)

-         self.assertTrue(isinstance(result, list))

-         self.assertTrue(len(result) == 1)

file modified
+89 -5
@@ -1,8 +1,10 @@ 

  from unittest import TestCase

- from kiskadee import model

  from sqlalchemy import create_engine, exc

  from sqlalchemy.orm import sessionmaker

  

+ from kiskadee import model

+ from kiskadee.runner import create_analyzers

+ 

  

  class TestModel(TestCase):

  
@@ -11,14 +13,21 @@ 

          Session = sessionmaker(bind=self.engine)

          self.session = Session()

          model.Base.metadata.create_all(self.engine)

+         create_analyzers(self.session)

          self.plugin = model.Plugin(name='kiskadee-plugin', target='university')

          self.package = model.Package(name='python-kiskadee')

-         self.version = model.Version(number='1.0-rc1', has_analysis=False)

+         self.version = model.Version(number='1.0-rc1')

          self.plugin.packages.append(self.package)

          self.package.versions.append(self.version)

          self.session.add(self.package)

          self.session.add(self.plugin)

          self.session.add(self.version)

+ 

+         self.analysis = model.Analysis(

+                 analyzer_id=1,

+                 version_id=self.version.id,

+                 raw=""

+                 )

          self.session.commit()

  

      def tearDown(self):
@@ -44,7 +53,7 @@ 

          self.assertEqual(len(plugins), 2)

  

      def test_add_version_without_package(self):

-         version = model.Version(number='3.1', has_analysis=False)

+         version = model.Version(number='3.1')

          self.session.add(version)

          with self.assertRaises(exc.IntegrityError):

              self.session.commit()
@@ -64,9 +73,84 @@ 

              self.session.commit()

  

      def test_unique_version_for_package(self):

-         package_version_1 = model.Version(number='1.0', has_analysis=False)

-         package_version_2 = model.Version(number='1.0', has_analysis=False)

+         package_version_1 = model.Version(number='1.0')

+         package_version_2 = model.Version(number='1.0')

          self.package.versions.append(package_version_1)

          self.package.versions.append(package_version_2)

          with self.assertRaises(exc.IntegrityError):

              self.session.commit()

+ 

+     def test_compose_kiskadee_source(self):

+         _analyzer = self.session.query(model.Analyzer)\

+                     .filter(model.Analyzer.name == "cppcheck").first()

+         package = model.Package(

+                 name='bla',

+                 plugin_id=self.plugin.id

+                 )

+         package_version = model.Version(

+                 number='1.0.1',

+                 package_id=package.id

+                 )

+ 

+         package_analysis = model.Analysis(

+                 raw="<>",

+                 analyzer_id=_analyzer.id,

+                 version_id=package_version.id

+                 )

+ 

+         self.plugin.packages.append(package)

+         package.versions.append(package_version)

+         package_version.analysis.append(package_analysis)

+ 

+         self.assertEqual(package.versions[0].analysis[0].raw, "<>")

+ 

+     def test_save_several_analysis(self):

+ 

+         _analyzer1 = (

+                 self.session.query(model.Analyzer)

+                 .filter(model.Analyzer.name == "cppcheck").first()

+                 )

+         _analyzer2 = (

+                 self.session.query(model.Analyzer)

+                 .filter(model.Analyzer.name == "flawfinder").first()

+                 )

+ 

+         package = model.Package(

+                 name='bla',

+                 plugin_id=self.plugin.id

+                 )

+         package_version = model.Version(

+                 number='1.0.1',

+                 package_id=package.id

+                 )

+ 

+         self.plugin.packages.append(package)

+         package.versions.append(package_version)

+ 

+         self.session.add(package)

+         self.session.add(package_version)

+         self.session.commit()

+ 

+         package_analysis1 = model.Analysis(

+                 raw="<>",

+                 analyzer_id=_analyzer1.id,

+                 version_id=package_version.id

+                 )

+         package_analysis2 = model.Analysis(

+                 raw="><",

+                 analyzer_id=_analyzer2.id,

+                 version_id=package_version.id

+                 )

+ 

+         self.session.add(package_analysis1)

+         self.session.add(package_analysis2)

+         self.session.commit()

+ 

+         saved_package = (

+                 self.session.query(model.Package)

+                 .filter(model.Package.name == 'bla').first()

+                 )

+         analysis = saved_package.versions[-1].analysis

+         self.assertEqual(len(analysis), 2)

+         self.assertEqual(analysis[0].raw, "<>")

+         self.assertEqual(analysis[1].raw, "><")

@@ -1,22 +1,25 @@ 

  from unittest import TestCase

- from kiskadee.monitor import Monitor

- from kiskadee import model

  from sqlalchemy import create_engine

  from sqlalchemy.orm import sessionmaker

+ 

+ from kiskadee import model

+ from kiskadee.monitor import Monitor

  from kiskadee.queue import enqueue_package

  from kiskadee.model import Package, Plugin

  import kiskadee.queue

  import kiskadee.plugins.debian

+ from kiskadee.runner import create_analyzers

  

  

  class TestMonitor(TestCase):

  

      def setUp(self):

-         self.monitor = Monitor()

-         self.monitor.engine = create_engine('sqlite:///:memory:')

-         Session = sessionmaker(bind=self.monitor.engine)

-         self.monitor.session = Session()

-         model.Base.metadata.create_all(self.monitor.engine)

+         self.engine = create_engine('sqlite:///:memory:')

+         Session = sessionmaker(bind=self.engine)

+         session = Session()

+         self.monitor = Monitor(session)

+         model.Base.metadata.create_all(self.engine)

+         create_analyzers(self.monitor.session)

          self.pkg1 = {'name': 'curl',

                       'version': '7.52.1-5',

                       'plugin': kiskadee.plugins.debian,
@@ -37,7 +40,7 @@ 

  

      def tearDown(self):

          # model.metadata.drop_all(self.engine)

-         model.Base.metadata.drop_all(self.monitor.engine)

+         model.Base.metadata.drop_all(self.engine)

  

      def test_dequeue_package(self):

          enqueue_package(self.pkg1)

@@ -0,0 +1,107 @@ 

+ from unittest import TestCase

+ 

+ from kiskadee.runner import analyze, _path_to_uncompressed_source

+ from kiskadee.runner import _save_source_analysis, create_analyzers

+ import kiskadee.plugins.example

+ from sqlalchemy import create_engine

+ from sqlalchemy.orm import sessionmaker

+ from kiskadee import model

+ 

+ 

+ class TestAnalyzers(TestCase):

+ 

+     def setUp(self):

+         self.engine = create_engine('sqlite:///:memory:')

+         Session = sessionmaker(bind=self.engine)

+         self.session = Session()

+         model.Base.metadata.create_all(self.engine)

+         create_analyzers(self.session)

+         self.plugin = kiskadee.plugins.debian.Plugin()

+         self.deb_pkg = {'name': 'test',

+                         'version': '1.0.0',

+                         'plugin': kiskadee.plugins.debian

+                         }

+         self.plugin = model.Plugin(name='kiskadee-plugin', target='university')

+         self.session.add(self.plugin)

+         self.session.commit()

+ 

+     def test_run_analyzer(self):

+ 

+         source_to_analysis = {

+                 'name': 'test',

+                 'version': '1.0.0',

+                 'plugin': kiskadee.plugins.example

+         }

+ 

+         source_path = _path_to_uncompressed_source(

+                 source_to_analysis, kiskadee.plugins.example.Plugin()

+         )

+         firehose_report = analyze(self.deb_pkg, "cppcheck", source_path)

+         self.assertIsNotNone(firehose_report)

+ 

+     def test_save_source_analysis(self):

+ 

+         source_to_analysis = {

+                 'name': 'test',

+                 'version': '1.0.0',

+                 'plugin': kiskadee.plugins.example

+         }

+ 

+         package = model.Package(

+                 name='test',

+                 plugin_id=self.plugin.id

+                 )

+ 

+         package_version = model.Version(

+                 number='1.0.0',

+                 package_id=package.id

+                 )

+ 

+         package.versions.append(package_version)

+ 

+         self.session.add(package)

+         self.session.add(package_version)

+         self.session.commit()

+ 

+         firehose_report = "<>"

+         _save_source_analysis(

+                 source_to_analysis,

+                 firehose_report,

+                 "cppcheck",

+                 self.session

+         )

+ 

+         saved_analysis = (

+                 self.session.query(model.Analysis)

+                 .filter(model.Analysis.raw == firehose_report).first()

+         )

+ 

+         self.assertIsNotNone(saved_analysis)

+ 

+     def test_path_to_uncompressed_source(self):

+ 

+         source_to_analysis = {

+                 'name': 'test',

+                 'version': '1.0.0',

+                 'plugin': kiskadee.plugins.example

+         }

+ 

+         source_path = _path_to_uncompressed_source(

+                 source_to_analysis, kiskadee.plugins.example.Plugin()

+         )

+ 

+         self.assertIsNotNone(source_path)

+ 

+     def test_invalid_path_to_uncompressed_source(self):

+ 

+         source_to_analysis = {

+                 'name': 'test',

+                 'version': '1.0.0',

+                 'plugin': kiskadee.plugins.example

+         }

+ 

+         source_path = _path_to_uncompressed_source(

+                 source_to_analysis, None

+         )

+ 

+         self.assertIsNone(source_path)

file modified
+4
@@ -37,3 +37,7 @@ 

  description = SAMATE Juliet test suite

  analyzers = cppcheck flawfinder

  active = yes

+ 

+ [analyzers]

+ cppcheck = 1.0.0

+ flawfinder = 1.0.0