From fcf672ca51831bd3379c0e3660790589a8386bf2 Mon Sep 17 00:00:00 2001
From: martiivGylden <martin.iversen@gyldendal.no>
Date: Fri, 15 Mar 2024 11:25:44 +0100
Subject: [PATCH] Working on metrics, and duplicates FML

---
 components.py                            |  71 ++++++++++++++
 diagramParser.py                         |  17 ++--
 documentation/applicationRequirements.md |  70 ++++++++++++++
 documentation/requirements.txt           |  33 -------
 dynamics.py                              | 113 ++++++++++++++++++++---
 5 files changed, 251 insertions(+), 53 deletions(-)
 create mode 100644 documentation/applicationRequirements.md
 delete mode 100644 documentation/requirements.txt

diff --git a/components.py b/components.py
index 8338d53..e1e074f 100644
--- a/components.py
+++ b/components.py
@@ -1,3 +1,9 @@
+from dataclasses import dataclass
+import pandas as pd
+import ERFormatConstants as const
+from dynamics import *
+
+
 class Threat: 
     """_summary_ 
     The class threat is used to classify a threat in the ER model.
@@ -89,4 +95,69 @@ class Attack:
     
     def linkThreats():
         # Function should link the attack to a threat
+        pass
+    
+    
+@dataclass
+class Diagram():
+    threats: dict
+    consequences : dict
+    attacks : dict
+    dynamics : dict
+    metrics : dict
+    
+    def __init__(self) -> None:
+        self.threats = {}
+        self.consequences = {}
+        self.attacks = {}
+        self.dynamics = pd.DataFrame()
+        self.metrics = {}    
+    
+    def getThreat(self, id) -> Threat:
+        """_summary_
+
+        Args:
+            id (_type_): _description_
+
+        Returns:
+            Threat: _description_
+        """
+        
+        threat = self.threats[id]
+        return threat
+   
+    def getConsequence(self, id) -> Consequence:
+        """_summary_
+
+        Args:
+            id (_type_): _description_ 
+
+        Returns:
+            Consequence: _description_
+        """
+        consequence = self.consequences[id]
+        return consequence
+   
+    def getAttack(self, id) -> Attack:
+        attack = self.attacks[id]
+        return attack
+
+    def getERDynamic(self, threatID) -> ERDynamic:
+        """_summary_
+        The get ER dynamic is used to retrieve the ER dynamic component associated with a threat
+        This is done through the metric matrix and threat ID 
+        1. Threat ID is used to find a row in the metric matrix which contains the threat ID, 
+
+        Args:
+            threatID (_type_): _description_
+
+        Returns:
+            ERDynamic: _description_
+        """
+        pass
+    
+    def getBowtieDynamic(self, associatedId) -> BowtieDynamic:
+        pass
+
+    def getMetric(self) -> Metric:
         pass
\ No newline at end of file
diff --git a/diagramParser.py b/diagramParser.py
index e3d0d5a..961cc03 100644
--- a/diagramParser.py
+++ b/diagramParser.py
@@ -5,16 +5,21 @@ import dynamics as dynamic
 import logging as log
 import matrices as matrix
 
+
 # Function will parse a csv file and extract the necessary information, this is step 1 of the parse 
-def parseDiagramFile(csvFile): 
+def parseDiagramFile(csvFile) -> component.Diagram: 
     df = pd.read_csv(csvFile)    
     df.drop(["Shape Library", "Page ID", "Contained By", "Group", "Comments", "property 1"], axis=1, inplace=True) #Removing unecessary data
     
+    diagram = component.Diagram()
+    
+    
+    
     # List containing all threats and their descriptions 
-    threats = {}
-    consequences = {}
-    dynamics = {}
-    metrics = []    
+    threats = diagram.threats
+    consequences = diagram.consequences
+    dynamics = diagram.dynamics
+    metrics = diagram.metrics
 
     metricsMatrix = matrix.parseTable(df) #Parse the table
     threats = parseThreats(df, threats)    
@@ -77,7 +82,7 @@ def parseAttacks(df, attackDict):
     return attackDict        
         
 #Parses metrics components and adds it to list
-def parseDynamic(df, metricList, dynamicsDict):
+def parseDynamic(df, metricDict, dynamicsDict):
     for i in range(len(df)):                                            # Iterates through the dataframe
         if df[const.textArea1][i] == const.Dynamics:                    # If the component is a dynamic component
             
diff --git a/documentation/applicationRequirements.md b/documentation/applicationRequirements.md
new file mode 100644
index 0000000..91dc7c0
--- /dev/null
+++ b/documentation/applicationRequirements.md
@@ -0,0 +1,70 @@
+# Requirements for the diagram parser
+
+This document contains the functional and non functional requirements of the application.
+Additionally the document contains success criteria which are formulated from the three research questions formulated for the thesis;
+
+- RQ1: What are important aspects of a software supply chain, how do we identify and document them?
+- RQ2: What are dynamic aspects of a software supply chain, how do we identify and document them?
+- RQ3: How can we maintain traceability and consistency between the software supply chain and its risk picture in a scalable and understandable manner?
+
+## Success criteria
+
+- By leveraging the method and application a developer will be able to identify dynamic indicators related to the found software supply chain threats, and find the different components in the software supply chain architecture related to these dynamic indicators
+- The developer will be able to use the resulting scorecard of the modeled software supply chain to identify which dynamic indicators that need to be updated in order to keep maintain an accurate context
+- The developer can use the scorecard to assess which components are not accurately represented by the documented indicators.
+
+## Functional requirements
+
+- The developer can leverage a custom library of Entity relationship components in LucidChart to annotate an architecture diagram with dynamic components
+- The developer can leverage a custom library of Entity relationship components in LucidChart to annotate create a Bowtie diagram
+- The application can parse a lucidChart page containing an architecture model, bowtie model and link matrix through the csv export format
+- The parser needs to extract all the information in the diagrams and store it in python classes, including: Threats, Consequences, Attacks, Dynamic componetns, Indicators (Metrics), and Metric matrix
+- The application must visualize threats and their related components in the architecture model
+- The application must show the linking indicators between, threats, consequences and architecture components
+- The application must rank the indicators based on last update to show which indicators are outdated
+- The application must rank indicators based on importance, the importance is based on how many components the indicator has an impact on
+
+## Non functional requirements
+
+- The application will feature a user interface to visualize the parsed data
+- The application will be able to handle large diagrams with many components to facilitate for scalability
+- The application will only accept csv files from lucidchart as input
+- The application will be able to run on as a standalone application on a local machine
+- The application will only parse the diagrams if the architecture and bowtie diagram have been annotated with the custom library
+
+## Thesis problem description
+
+The thesis aims to innovate in the area of risk modeling through the use of bowtie diagrams by facilitating the identification and documentation of dynamic indicators related to software supply chain risk.
+The goal of the thesis is to research the possibilitiy of graphical linkage between bow tie risk models and entity relationship diagrams.
+
+This is all in an effort to facilitate dynamic risk management for software supply chains.
+The main issue with dynamic risk management is the amount of work related to keeping the risk picture up to date, this can however be done using dynamic indicators which impact the different threats related to software supply chains.
+
+The thesis proposes a method for linking bow tie risk diagrams and entity relationship diagrams through dynamic indicators.
+The method will use entity relationship components to annotate bow tie risk models and architecture models with dynamic indicators.
+Furthermore the method will use tables for standard indicators and attacks relevant to software supply chains.
+Having leveraged the method and its components a developer is left with an updated architecture and bowtie model which contains dynamic indicators, to score the diagrams and provide insight a parser will be developed.
+This parser will be able to extract the information from the diagrams and visualize the links between the bowtie model and the system architecture.
+The parser will present and score the diagram based on amount of indicators and when they were updated.
+Moreover the parser will aid in the identification of which relationships are not accurately represented by the documented indicators.
+
+Method formulation and scorecard developement is being done using Technology research. This methodology splits the research into problem definition where a literature mapping was leveraged to understand state of the art.
+The next step is innovation where the method and prototype is developed.
+
+For evaluation the method and tool will be tested on two cases provided by partners.
+The tool will be subject to emphirical use case driven evaluation where it is tested on the two cases.
+Evaluation will be based on the defined success criteria and requirements for the application.
+
+Preceeding the evaluation a new iteration of technology research will be started to improve the method and tool according to the evaluation.
+
+The focus of the annotation will be on risks and aspects related to software supply chains.
+The method will use specialized Entity relationship components to annotate the bow tie risk models and architecture diagram, this annotation will facilitate linkage between an entity relationship diagram of a software supply chain and the bow tie risk models associated with it.
+Some examples on success criteria are:
+
+- The developer can use the scorecard to assess which components are not accurately represented by the documented indicators.
+- By leveraging the method and application a developer will be able to identify dynamic indicators related to the found software supply chain threats, and find the different components in the software supply chain architecture related to these dynamic indicators
+
+Some examples of the tools functional requirements are:
+
+- The application can parse a lucidChart page containing an architecture model, bowtie model and link matrix through the csv export format
+- The parser needs to extract all the information in the diagrams and store it in python classes, including: Threats, Consequences, Attacks, Dynamic componetns, Indicators (Metrics), and Metric matrix
diff --git a/documentation/requirements.txt b/documentation/requirements.txt
deleted file mode 100644
index f2db6f7..0000000
--- a/documentation/requirements.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Har komponenter 
-Lagt til lokale metrics i hver komponent
-1. Må nå lage en global metric liste FERDIG
-2. Må lage ER dynamikk liste 
-3. Må koble dynamic komponenter til trusselkomponent 
-
-
-
-# Thesis problem description 
-Problem description:
-
-The thesis aims to innovate in risk modeling through the use of bowtie diagrams and the identification and documentation of dynamic indicators of software supply chain risk.
-The goal of the thesis is to research the possibilities of graphical linkage between bow tie risk models and entity relationship diagrams and to develop a method for this linkage.
-This is all in an effort to facilitate dynamic risk management for software supply chains.
-
-The thesis proposes a method for linking bow tie risk diagrams and entity relationship diagrams through dynamic indicators. 
-The method will use new annotations on bow tie risk models and matrices containing indicators which can be observed for both ER architecture diagrams and bow tie risk models.
-To analyze the modeled diagrams and matrices, a analysis tool will be developed to facilitate the analysis and scoring of the modeled risk image. 
-The scoring will be based on amount of indicators observed for critical components and their status.
-
-Method formulation and scorecard developement is being done using Technology research. This methodology splits the research into problem definition where a literature mapping was leveraged to understand state of the art. 
-The next step is innovation where the method and prototype is developed.
-For evaluation the method and tool will be tested on two cases provided by partners.
-Preceeding the evaluation a new iteration of technology research will be started to improve the method and tool according to the evaluation.
-
-The focus of the annotation will be on risks and aspects related to software supply chains. 
-The method will use specialized Entity relationship components to annotate the bow tie risk models and architecture diagram, this annotation will facilitate linkage between an entity relationship diagram of a software supply chain and the bow tie risk models associated with it. 
-All resulting in a method which can help in determine which metrics one should observe to pick up on changes in the risk landscape of the software supply chain facilitating dynamic risk management. 
-
-Tabell: 
-
-Rad 1 to og 3 Er kolonner 
-Rad 
\ No newline at end of file
diff --git a/dynamics.py b/dynamics.py
index 8ca8627..6e68dd4 100644
--- a/dynamics.py
+++ b/dynamics.py
@@ -6,8 +6,7 @@ class DynamicComponent:
 
     def __init__(self, componentID) -> None:
         self.componentID = componentID
-        self.metrics = []             # List of metrics for the dynamic
-        self.DynamicsRow = None             # The row in the dynamics table the dynamic is associated with
+        self.metrics = []                   # List of metrics for the dynamic
         
         
     #String only returns necessary info         
@@ -19,16 +18,18 @@ class Metric:
     def __init__(self, ID, name) -> None:
         
         #Metrics found in the dynamics tables 
+        self.erID = []                # ID of the metric used to locate in dynamics matrics
+        self.bowtieID = []            # ID of the metric used to locate in dynamics matrics
         self.ID = ID                    # ID of the metric used to locate in dynamics matrics
         self.name = name                # Name of the metric
+        self.value = None               # Value of the metric
+        self.date = None                # Date of the metric
+        self.frequency = None           # Frequency of the metric
+        self.measureGuide = None        # Measure guide of the metric
         
         
-        # Metrics found in the metric table 
-        
-        
-
     def __str__(self) -> str:
-        return f"Metric: {self.name}, {self.description}"
+        return f"Metric: {self.name}, Value:  {self.value} Last update: {self.date}"
             
 class BowtieDynamic(DynamicComponent):
     def __init__(self, componentID, type) -> None:
@@ -49,13 +50,14 @@ class BowtieDynamic(DynamicComponent):
             return super().__str__() + f"Associated attack ID: {self.associatedAttack[const.Id].item()}"
         
         
+        
+    def associateBowtie(self, df, componentType, metricsDict: dict):
         """_summary_
         Handles the associated dynamics for the bowtie model as they are different from the ER model
         ER model has one type of dynamic component with metrics however, the bowtie model has three dynamic types and needs more parsing 
         than the ER model du to the modeling annotation.
         Abstracting this saves time
         """ 
-    def associateBowtie(self, df, componentType ):
         
         componentId = self.componentID  # Define the ID of the component
         
@@ -80,12 +82,9 @@ class BowtieDynamic(DynamicComponent):
             self.associatedAttack = lineTwo          # ! The "line" is an attack, not a line, we add it to the associated attack field and move on
             return                                      # The attack is the only component associated with the dynamic        
         
-        
         sourceComponent = df.loc[df[const.Id]==lineTwo[const.From].item()]
         destinationComponent = df.loc[df[const.Id]==lineTwo[const.To].item()]
             
-
-
         if componentType == const.ThreatDynamic:  # If the component is a threat
             if sourceComponent[const.textArea1].item() == const.Threat:  # Checks if source or destination is the threat
                 self.associatedThreat = sourceComponent  
@@ -101,8 +100,45 @@ class BowtieDynamic(DynamicComponent):
             else:
                 self.associatedConsequence = destinationComponent    
                 self.associatedAttack = sourceComponent
+        
+        self.linkMetric(df, metricsDict)                
+        
+    def linkMetric(self, df, metricsDict: dict):
+        dynamicComponent = df.loc[df[const.Id] == self.componentID]  # Find all dynamics components
+        for i in range (4, len(dynamicComponent.columns(), 2)): # Iterate through all the metrics 
+            
+            if(dynamicComponent["Text Area"+str(i)] == None): # No more metrics
+                break
+            
+            metricID = dynamicComponent.loc[dynamicComponent["Text Area"+str(i)]].item()  # Find the metric ID      
+            metricName = dynamicComponent.loc[dynamicComponent["Text Area"+str(i+1)]].item()  # Find the metric ID      
+            self.metrics.append(metricID) # ? Adds the metric to the metric list associated with the dynamic component specifically
+            
+            if checkDuplicateMetrics(metricsDict, metricID) == False: # If the metric is not a duplicate
+                metric = Metric(metricID, metricName)  # Create a new metric object
+            else: 
+                metric = metricsDict[metricID]
+            
+            if self.type == const.ThreatDynamic:
+                if checkDuplicate(metric.associatedThreat, self.associatedThreat): #* If the threat component is a duplicate
+                    continue # * We wont add it to the metric threatList and attack list since its already there
+                metric.bowtieID.append(self.associatedThreat, self.associatedAttack)
+            
+            elif self.type == const.ConsequenceDynamic:
+                if checkDuplicate(metric.associatedConsequence, self.associatedConsequence):
+                    continue
+                metric.bowtieID.append(self.associatedConsequence, self.associatedAttack) 
+                
+            elif self.type == const.AttackDynamic:
+                if checkDuplicate(metric.associatedAttack, self.associatedAttack):
+                    continue
+                metric.bowtieID.append(self.associatedAttack) # TODO Remember to remove duplicates later
                 
+            metricsDict[metricID] = metric  # ! Adds the metric to the global metric list
             
+            
+        
+                    
 class ERDynamic(DynamicComponent):
     def __init__(self, componentID, type, description) -> None:
         super().__init__(componentID)
@@ -118,7 +154,7 @@ class ERDynamic(DynamicComponent):
         Function will use the dynamic component ID to find linked threats, ER components, attacks and consequences
         This will be needed to associate the dynamics with the correct components and analyze relationships
         """
-    def associatER(self, df):
+    def associatER(self, df, metricsDict: dict):
         componentId = self.componentID  # Define the ID of the component
         
         #Find the lines associated with the component
@@ -145,5 +181,54 @@ class ERDynamic(DynamicComponent):
                 
             self.associatedERComponents.append(associatedComponent)  # Add the associated component to the list of associated components
             log.info("Associated components found: ", self.associatedERComponents)
-        return 
-        
\ No newline at end of file
+        
+        self.linkMetric(df, metricsDict)                
+
+    
+     
+    def linkMetric(self,df, metricsDict):
+        dynamicComponent = df.loc[df[const.Id] == self.componentID]  # Find all dynamics components
+        
+        for i in range (6, len(dynamicComponent.columns(), 2)): # Iterate through all the metrics 
+            
+            if(dynamicComponent["Text Area"+str(i)] == None): # No more metrics
+                break
+            
+            metricID = dynamicComponent.loc[dynamicComponent["Text Area"+str(i)]]  # Find the metric ID      
+            metricName = dynamicComponent.loc[dynamicComponent["Text Area"+str(i+1)]]  # Find the metric ID      
+            self.metrics.append(metricID) # ? Adds the metric to the metric list associated with the dynamic component specifically
+
+            if checkDuplicateMetrics(metricsDict, metricID) == False: # If the metric is not a duplicate
+                metric = Metric(metricID, metricName)  # Create a new metric object
+            else: 
+                metric = metricsDict[metricID]
+            
+            for i in range(self.associatedERComponents):
+                erID = self.associatedERComponents[i][const.Id].item()
+                                
+                if checkDuplicate(metric.erID, erID): #* If the associated component is a duplicate
+                    continue    
+                metric.erID.append(erID)
+                
+        metricsDict[metricID] = metric  # ! Adds the metric to the global metric list 
+
+            
+def checkDuplicateMetrics(metricsDict: dict, metricID: str):
+    """_summary_
+    Function will check if the diagrams metric dict contains a metric,
+    with the same ID as the one which is listed in the dynamics component, if it does 
+    """
+    if metricID in metricsDict:
+        log.warning("Duplicate metric found: ", metricID)
+        return True
+    else:
+        return False
+    
+def checkDuplicate(list: list, id: str):
+    """_summary_
+    Check if the already contains the associated component
+    """
+    if id in list:
+        return True
+    else:
+        return False
\ No newline at end of file
-- 
GitLab