CPE#

Common Platform Enumeration (CPE) is a structured naming scheme for information technology systems, software, and packages. Based upon the generic syntax for Uniform Resource Identifiers (URI), CPE includes a formal name format, a method for checking names against a system, and a description format for binding text and tests to a name. This method of naming is known as a well-formed CPE name (WFN)

source: cpe.mitre.org/specification

You can see this notebook directly via:

Generation time#

from datetime import datetime, timezone, timedelta

timezone_offset = 0.0
tzinfo = timezone(timedelta(hours=timezone_offset))
generation_time = datetime.now(tzinfo).strftime('%Y-%m-%d %H:%M:%S %z')
print(generation_time)

2024-04-16 06:01:44 +0000

Creative Commons#

This notebook and generated diagrams are released with Creative Commons liecense (CC BY 4.0).

import requests
import urllib3

urllib3.disable_warnings()

urls = ['https://mirrors.creativecommons.org/presskit/icons/cc.xlarge.png',
       'https://mirrors.creativecommons.org/presskit/icons/by.xlarge.png']
for url in urls:
    file_name = url.split("/")[-1:][0]
    print(file_name)

    file = requests.get(url, verify=False)
    open(file_name, 'wb').write(file.content)

cc.xlarge.png
by.xlarge.png

CPE data downloading#

All CPE stats are taken from nvd.nist.gov/products/cpe/statistics

from urllib.request import urlopen
import ssl
from bs4 import BeautifulSoup, SoupStrainer

def get_data(url):

    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE

    page = urlopen(url, context=ctx)
    html = page.read().decode("utf-8")
    
    product = SoupStrainer('table')
    soup = BeautifulSoup(html, "html.parser", parse_only=product)
    
    return soup

url = "https://nvd.nist.gov/products/cpe/statistics"
data = get_data(url)

print(len(data))

New CPE entries#

CPE data parsing#

import pandas as pd

def pars(data):
    data_table = []
    
    for table in data:
        table_id = table['id']
        table_year = table_id[-4:]
        table_rows = table.find_all("tr")
        number_of_new_cpe_entries_yearly = 0
        number_of_new_cpe_entries_list = []
        data_row = []
        for table_row in table_rows:
            data = table_row.find_all("td")
            
            if data:
                number_of_new_cpe_entries = int(data[1].string.replace(",",""))
                
                number_of_new_cpe_entries_list.append(number_of_new_cpe_entries)
                
                number_of_new_cpe_entries_yearly += number_of_new_cpe_entries
        
        while len(number_of_new_cpe_entries_list) < 12:
            number_of_new_cpe_entries_list.append(0)

        data_row.append(table_year)
        data_row.append(number_of_new_cpe_entries_yearly)
        data_row = data_row + number_of_new_cpe_entries_list

        data_table.append(data_row)
    
    data_columns = ['Year', 'Summary', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
    df = pd.DataFrame (data_table, columns = data_columns)
    df.sort_values(by=['Year'], inplace=True)    
    df.reset_index(drop=True, inplace=True)
    df.index += 1
    return df
    
parsed_data = pars(data)

parsed_data.style.bar(subset=['Summary'], color='#FF6200')

Matplotlib is building the font cache; this may take a moment.

	Year	Summary	January	February	March	April	May	June	July	August	September	October	November	December
1	2009	5786	181	422	830	639	612	209	241	110	330	102	1960	150
2	2010	11094	990	234	509	1106	803	886	426	1252	707	1336	1026	1819
3	2011	9263	702	925	583	2252	818	682	430	390	423	470	737	851
4	2012	23235	1148	1003	1622	2501	3228	2259	3124	2887	1583	1830	1110	940
5	2013	15671	1460	1105	1138	1271	932	989	1623	1359	1140	1941	1327	1386
6	2014	16714	1552	1151	1644	2103	2175	1447	1606	886	885	260	1380	1625
7	2015	6503	758	654	849	838	429	736	446	564	81	216	144	788
8	2016	11074	2450	535	871	699	732	1471	321	762	979	452	696	1106
9	2017	18650	746	888	1070	1935	1494	1210	1364	1889	1516	1596	2585	2357
10	2018	56509	2273	2404	1571	2525	10489	5160	3707	3895	3722	10862	6564	3337
11	2019	272322	6810	7197	10466	12979	13181	32837	45459	23763	47195	26717	22731	22987
12	2020	171681	15960	18668	20159	16306	14157	11250	10659	14459	13077	10921	9293	16772
13	2021	189565	11253	14969	18562	15476	13997	18730	20791	16531	14022	16164	17160	11910
14	2022	175793	11299	12125	13566	10410	10262	14432	13652	18251	14397	15560	14888	26951
15	2023	223030	18297	16483	22071	14262	17591	19858	18929	20943	16827	21344	18296	18129
16	2024	39737	20260	16220	2058	1199	0	0	0	0	0	0	0	0

CPE data saving#

CSV file is available in GitHub repository, see:

csv_filename = 'cpe-number-of-new-entries.csv'

parsed_data.to_csv(csv_filename, index=False)

CPE data ploting#

PNG files are available in GitHub repository with two background versions, see:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import urllib

df = pd.read_csv(csv_filename)

df.plot(x='Year', 
        xlabel='Year',
        y='Summary', 
        ylabel='Number of CPE',
        kind='bar', 
        title='Number of CPE per year')
plt.tight_layout()
plt.legend(['CPE'])
plt.figtext(0.16, 0.02, f"Generated on {generation_time} thanks to limberduck.org based on source: nvd.nist.gov/products/cpe/statistics", ha="left", fontsize=7)
fig = plt.gcf()
fig.set_size_inches(10,6)
fig.patch.set_facecolor('white')
plt.grid(True)

img_cc = plt.imread('cc.xlarge.png')
newax_cc = fig.add_axes([0.88, 0.0, 0.05, 0.05], anchor='NE', zorder=-1)
newax_cc.imshow(img_cc)
newax_cc.axis('off')
img_by = plt.imread('by.xlarge.png')
newax_by = fig.add_axes([0.92, 0.0, 0.05, 0.05], anchor='NE', zorder=-1)
newax_by.imshow(img_by)
newax_by.axis('off')

plt.savefig('cpe-number-of-new-entries-bg-white.png', dpi = 300, facecolor = 'white')
plt.savefig('cpe-number-of-new-entries-bg-transparent.png', dpi = 300, transparent = True)

../../_images/e19e9d6b5133aecf2fcd2fd1a0dbe3cca2d2ca92a866cf15dc82b79175b47638.png