SEMDimpleExtractor/process_SEM.py at main · RESSLab-Team/SEMDimpleExtractor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Import libraries
from IPython import get_ipython
import cv2
from PIL import Image
from skimage import filters
import exifread
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import skimage
import os
import math

def round_down(n, decimals=0):
    multiplier = 10**decimals
    return math.floor(n * multiplier) / multiplier

def round_up(n, decimals=0):
    multiplier = 10**decimals
    return math.ceil(n * multiplier) / multiplier

def single_figure(x_label, y_label, tr_margin, fntsz, lgd_loc, xlims, ylims, filename):
    """
    Basic single figure properties.
    :param x_label: x label of the figure.
    :param y_label: y label of the figure.
    :param tr_margin: top/right figure bound line ('none'/'k').
    :param fntsz: font size.
    :param lgd_loc: location of the legend.
    :param xlims: limits of the x axis.
    :param ylims: limits of the y axis.
    :param filename: filename to be saved.
    """
    plt.xlabel(x_label, fontsize=fntsz)
    plt.ylabel(y_label, fontsize=fntsz)
    plt.gca().spines['right'].set_color(tr_margin)      # Use 'none' to erase this bound
    plt.gca().spines['top'].set_color(tr_margin)        # Use 'none' to erase this bound
    plt.grid(linestyle='dashed', linewidth=0.3, color=(0.8, 0.8, 0.8, 0.5), dashes=(14, 7), zorder=0)
    plt.gca().set_axisbelow(True)                       # Ensure the grid is below the plot
    # plt.tight_layout()
    is_label = plt.gca().get_legend_handles_labels()[1]
    if is_label:
        legend = plt.legend(loc=lgd_loc, fancybox=False, fontsize=fntsz*0.8)
        legend.get_frame().set_linewidth(0.4)           # Adjust the linewidth as needed
    if xlims:
        plt.gca().set_xlim(xlims)
    if ylims:
        plt.gca().set_ylim(ylims)
    plt.savefig(filename+'.pdf')
    plt.savefig(filename+'.png', dpi=600, transparent=False)

def group_close_values(d, tolerance_factor=0.5):
    """
    Group values in groups of close values.
    :param d: Array of randomly sorted values.
    :return: Grouped array.
    """
    # Find contours of the voids
    d.sort()
    diff = [d[i+1]-d[i] for i in range(len(d)-1)]
    avg = sum(diff) / len(diff) * tolerance_factor
    m = [[d[0]]]
    for x in d[1:]:
        if x - m[-1][-1] < avg:
            m[-1].append(x)
        else:
            m.append([x])
    return m

def find_contour_radius(img):
    """
    Get contour of binary image and the corresponding areas and equivalent radius.
    :param img: Input binary image.
    :return: Contour, area and equivalent lists.
    """
    # Find contours of the voids
    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    A = []
    R = []
    # Add area surrounded by the contour to the list
    for i in range(len(contours)):
        A.append(cv2.contourArea(contours[i]))
        R.append(np.sqrt(A[i] / np.pi))
    return contours, A, R

def process_sem(img, f_size=3, clahe_size=4, thres=10, opening_size=2):
    """
    Process SEM image and get statistics of voids.
    :param img: Initial image.
    :param f_size: Median filter size.
    :param clahe_size: Adaptive histogram equalization local grid size.
    :param thres: Threshold for void segmentation.
    :param opening_size: Remove island size.
    :return: void area and radius lists of the image.
    """
    # Median filter
    f_img = cv2.medianBlur(img, f_size)
    # Adaptive histogram equalization
    clahe = cv2.createCLAHE(tileGridSize=(clahe_size, clahe_size))
    cl_img = clahe.apply(f_img)
    # Morphological opening on a binary image after thresholding
    o_img = skimage.morphology.opening(cl_img < thres,
                                       skimage.morphology.disk(opening_size), out=None)
    # Get contour of the voids and obtain area and equivalent radius distribution
    contours, area, radius = find_contour_radius(np.array(o_img, np.uint8))

    # Visualization
    # Original image
    plt.imshow(img, cmap='gray'); plt.xticks([]); plt.yticks([])
    plt.close()
    # Image after filtering
    plt.imshow(f_img, cmap='gray'); plt.xticks([]); plt.yticks([])
    plt.close()
    # Image after histogram equalization
    plt.imshow(cl_img, cmap='gray'); plt.xticks([]); plt.yticks([])
    plt.close()
    # Binary image after thresholding and opening
    plt.imshow(o_img, cmap='gray'); plt.xticks([]); plt.yticks([])
    plt.close()

    # Contours extracted
    im = np.array(np.expand_dims(o_img, axis=-1), np.uint8)
    im = np.concatenate((im, im, im), axis=-1)
    cv2.drawContours(im, contours, -1, (255, 0, 0), 1)
    # plt.imshow(im); plt.show()
    # Equivalent radius histogram
    # plt.hist(radius); plt.show()
    return area, radius

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "Times New Roman",
    "mathtext.fontset": "dejavuserif",
    "axes.linewidth": 0.4,
    "legend.edgecolor": 'k',
    "legend.borderpad": 0.2,
    "legend.labelspacing": 0.10,
    "legend.borderaxespad": 0.1,
    "hatch.color": 'k',
    "hatch.linewidth": 0.2})

plt.rcParams.update({'hatch.color': 'k'})
plt.rcParams['hatch.color'] = 'black'  # Set hatch color to black

# Create Results directory if it doesn't exist
os.makedirs('Results', exist_ok=True)

# Image folder
img_folder = ["SEM_Images"] # "Monotonic", "Cyclic"
SEM_mag = ['2000x', '500x', '250x'] # The smaller this number is, the larger the pixel size

# Find pixel sizes of all images and group them in close groups (e.g. Mag 250x, 500x, 2000x or very close to such values)
img_pxl_sz_group = {}
img_pxl_sz = {}
img_area = {}
for img_dir in img_folder:
    spec_info_file = [d for d in os.listdir(os.path.join(img_dir)) if os.path.isdir(os.path.join(img_dir, d))]
    img_pxl_sz_tot = []
    img_pxl_sz[img_dir] = {}
    img_area[img_dir] = {}
    for spec in spec_info_file:
        img_info_file = os.listdir(os.path.join(img_dir, spec))
        img_pxl_sz[img_dir][spec] = {}
        img_area[img_dir][spec] = {}
        for img in img_info_file:
            img_path = os.path.join(img_dir, spec, img)
            with open(img_path, 'rb') as read_tiff:
                tags = exifread.process_file(read_tiff, details=False)
            try:
                pxl_tag = tags.get('Image Tag 0x8546', None)
                if pxl_tag is None:
                    raise KeyError(f"Tag 'Image Tag 0x8546' not found in {img_path}")
                pxl_val = float(str(pxl_tag.values)[9:21])
                img_pxl_sz[img_dir][spec][img] = pxl_val    # Image pixel size (to get pxl to mm)
                img_len = float(tags['Image ImageLength'].values[0])
                img_wid = float(tags['Image ImageWidth'].values[0])
                img_area[img_dir][spec][img] = pxl_val**2 * img_len * img_wid * 1000 * 1000 # Find the area of the figure in terms of mm
                img_pxl_sz_tot = np.append(img_pxl_sz_tot, pxl_val)
            except Exception as e:
                print(f"Error reading EXIF tags for {img_path}: {e}")
    img_pxl_sz_group[img_dir] = group_close_values(np.unique(img_pxl_sz_tot))                # Separate images in different pixel size images

# Process images to find the dimple diameter for specimens grouped  by img_folder and img_pxl_sz_group
for img_dir in img_folder:
    spec_info_file = [d for d in os.listdir(os.path.join(img_dir)) if os.path.isdir(os.path.join(img_dir, d))]
    print('Processing images in folder: '+img_dir)
    # Lists for saving void area and radius information from the images
    area_all = [[] for _ in range(len(img_pxl_sz_group[img_dir]))]
    diam_all = [[] for _ in range(len(img_pxl_sz_group[img_dir]))]
    image_area_all = [[] for _ in range(len(img_pxl_sz_group[img_dir]))]
    for spec in spec_info_file:
        img_info_file = [f for f in os.listdir(os.path.join(img_dir, spec)) if os.path.isfile(os.path.join(img_dir, spec, f))]
        for img in img_info_file:
            image = np.array(Image.open(os.path.join(img_dir, spec, img)))[:, :, 0]
            area, radius = process_sem(image, f_size=9, clahe_size=5, thres=50, opening_size=10)
            for outer_index, inner_array in enumerate(img_pxl_sz_group[img_dir]):
                try:
                    index_group = inner_array.index(img_pxl_sz[img_dir][spec][img])
                    break
                except ValueError:
                    continue
            area_all[outer_index] = area_all[outer_index] + [i*(float(img_pxl_sz[img_dir][spec][img])*1000)**2 for i in area]
            diam_all[outer_index] = diam_all[outer_index] + [i*float(img_pxl_sz[img_dir][spec][img])*2*1000 for i in radius]  # From pixel to mm
            image_area_all[outer_index] = np.append(image_area_all[outer_index], np.repeat(img_area[img_dir][spec][img],len(area)))
    for mag in range(0, len(img_pxl_sz_group[img_dir])):
        fig1, ax1 = plt.subplots(figsize = (8/2.54, 6/2.54)); ax1.set_position([0.2, 0.2, 0.77, 0.77])
        plt.hist(diam_all[mag], bins=20,rwidth=1.0, fc=(0.8, 0.8, 0.8, 0.8), edgecolor=(0.0, 0.0, 0.0, 1.0), lw=0.5)
        results_dir = os.path.join('Results')
        single_figure('Dimple diameter [mm]', 'Number of dimples', 'k', 12, '', [], [], os.path.join(results_dir, img_dir+'_dimple_diam_hist'+'_Mag_'+SEM_mag[mag]))

        fig2, ax2 = plt.subplots(figsize = (8/2.54, 6/2.54)); ax2.set_position([0.2, 0.2, 0.77, 0.77])
        plt.plot(np.sort(diam_all[mag]), np.linspace(0, 1, len(diam_all[mag]), endpoint=False), lw=1.0, color=(0.0, 0.0, 0.0, 1.0))
        single_figure('Dimple diameter [mm]', 'CDF', 'k', 12, '', [], [], os.path.join(results_dir, img_dir+'_dimple_diam_CDF'+'_Mag_'+SEM_mag[mag]))

    weights1 = np.ones_like(diam_all[0]) / image_area_all[0]
    weights2 = np.ones_like(diam_all[1]) / image_area_all[1]
    weights3 = np.ones_like(diam_all[2]) / image_area_all[2]

    diam_all_tot = diam_all[0]+diam_all[1]+diam_all[2]

    fig1, ax1 = plt.subplots(figsize = (8/2.54, 6/2.54)); ax1.set_position([0.25, 0.20, 0.70, 0.72])
    plt.hist(diam_all[0], bins=np.arange(min(diam_all[0]), max(diam_all[0]) + 0.001, 0.001),
             rwidth=1.0, linestyle='solid', fc=(0.1, 0.1, 0.1, 0.5), edgecolor='black',
             lw=0.3, label=SEM_mag[0], weights=weights1, hatch='/////')
    plt.hist(diam_all[1], bins=np.arange(min(diam_all[1]), max(diam_all[1]) + 0.001, 0.001),
             rwidth=1.0, linestyle='solid', fc=(0.8, 0.2, 0.4, 0.5), edgecolor='black',
             lw=0.3, label=SEM_mag[1], weights=weights2, hatch='')
    plt.hist(diam_all[2], bins=np.arange(min(diam_all[2]), max(diam_all[2]) + 0.001, 0.001),
             rwidth=1.0, linestyle='solid', fc=(0.9, 0.9, 0.9, 0.5), edgecolor='black',
             lw=0.3, label=SEM_mag[2], weights=weights3, hatch='\\\\\\\\')
    plt.xticks(np.arange(0, round_up(max(diam_all_tot), 2) + 0.03, 0.03))
    single_figure('Dimple diameter [mm]', 'Nr. of dimples per mm$^2$', 'k', 12, 'upper right', [0,round_up(max(diam_all_tot),2)], [0,round_up(plt.gca().get_ylim()[1],0)], os.path.join(results_dir, img_dir+'_dimple_diam_hist'))

    fig2, ax2 = plt.subplots(figsize = (8/2.54, 6/2.54)); ax2.set_position([0.25, 0.20, 0.70, 0.72])
    plt.plot(np.sort(diam_all_tot), np.linspace(0, 1, len(diam_all_tot), endpoint=False), lw=1.0, color=(0.0, 0.0, 0.0, 1.0), label='all images')
    plt.plot(np.sort(diam_all[0]), np.linspace(0, 1, len(diam_all[0]), endpoint=False), lw=1.0, linestyle='-.', color=(0.8, 0.2, 0.4, 1.0), label=SEM_mag[0])
    plt.plot(np.sort(diam_all[1]), np.linspace(0, 1, len(diam_all[1]), endpoint=False), lw=1.0, linestyle='dotted', color=(0.8, 0.2, 0.4, 1.0), label=SEM_mag[1])
    plt.plot(np.sort(diam_all[2]), np.linspace(0, 1, len(diam_all[2]), endpoint=False), lw=1.0, linestyle='dashed', color=(0.8, 0.2, 0.4, 1.0), label=SEM_mag[2])
    plt.xticks(np.arange(0, round_up(max(diam_all_tot), 2) + 0.03, 0.03))
    single_figure('Dimple diameter [mm]', 'CDF', 'k', 12, 'lower right', [0,round_up(max(diam_all_tot),2)], [0,1], os.path.join(results_dir, img_dir+'_dimple_diam_CDF'))

    exp_dimple_dia = [np.max(diam_all[0]), np.median(diam_all[0]), np.min(diam_all[0]),
                      np.max(diam_all[1]), np.median(diam_all[1]), np.min(diam_all[1]),
                      np.max(diam_all[2]), np.median(diam_all[2]), np.min(diam_all[2]),
                      np.max(diam_all_tot), np.median(diam_all_tot), np.min(diam_all_tot),]