Files
PatchGen/histogram-plot/plot.py
2024-09-17 10:45:39 +05:30

69 lines
2.2 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
import logging
import time
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Start script
logging.info('Script started')
start_time = time.time()
# Read the CSV files
logging.info('Reading CSV files...')
patches_df = pd.read_csv('histogram-plot/appended_output.csv')
objects_df = pd.read_csv('histogram-plot/pd-objects-csv.csv')
# Get the list of object names
object_names = objects_df['Object Name'].tolist()
# Initialize a counter for the object occurrences
object_counter = Counter()
# Ensure 'Patch Contents' column is of string type and handle NaNs
logging.info('Processing patch contents...')
patches_df['Patch Contents'] = patches_df['Patch Contents'].fillna('').astype(str)
# Loop through the patch contents and count occurrences of each object
for i, patch_content in enumerate(patches_df['Patch Contents']):
if i % 1000 == 0: # Log progress every 1000 rows
logging.info(f'Processing row {i}/{len(patches_df)}')
for object_name in object_names:
object_counter[object_name] += patch_content.split().count(object_name)
# Save the results to a text file
logging.info('Saving results to object_counts_all.txt...')
with open('object_counts_all.txt', 'w') as file:
for object_name, count in object_counter.most_common():
file.write(f"{object_name}: {count}\n")
num_objects_to_plot = 20
# Get the most common objects
logging.info('Generating plot for the most common objects...')
most_common_objects = object_counter.most_common(num_objects_to_plot)
# Plot a histogram of the object occurrences
plt.figure(figsize=(10, 6))
plt.bar([obj[0] for obj in most_common_objects], [obj[1] for obj in most_common_objects], color='skyblue')
plt.xlabel('Object Name')
plt.ylabel('Occurrences')
plt.title(f'Top {num_objects_to_plot} Most Used Objects in Pd Patches')
plt.xticks(rotation=90)
plt.tight_layout()
# Save the plot as an image
plt.savefig('object_histogram_all.png')
logging.info('Plot saved as object_histogram_all.png')
# Show the plot
plt.show()
# End script
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f'Script finished in {elapsed_time:.2f} seconds')