@article{35702, author = {Viktoriia Baibakova and Mahmoud Elzouka and Sean D Lubner and Ravi S Prasher and Anubhav Jain}, title = {Optical emissivity dataset of multi-material heterogeneous designs generated with automated figure extractionAbstract}, abstract = {
Optical device design is typically an iterative optimization process based on a good initial guess from prior reports. Optical properties databases are useful in this process but difficult to compile because their parsing requires finding relevant papers and manually converting graphical emissivity curves to data tables. Here, we present two contributions: one is a dataset of thermal emissivity records with design-related parameters, and the other is a software tool for automated colored curve data extraction from scientific plots. We manually collected 64 papers with 176 figures reporting thermal emissivity and automatically retrieved 153 colored curve data records. The automated figure analysis software pipeline uses Faster R-CNN for axes and legend object detection, EasyOCR for axes numbering recognition, and k-means clustering for colored curve retrieval. Additionally, we manually extracted geometry, materials, and method information from the text to add necessary metadata to each emissivity curve. Finally, we analyzed the dataset to determine the dominant classes of emissivity curves and determine the underlying design parameters leading to a type of emissivity profile.
}, year = {2022}, journal = {Scientific Data}, volume = {9}, month = {12/2022}, url = {https://www.nature.com/articles/s41597-022-01699-3}, doi = {10.1038/s41597-022-01699-3}, language = {eng}, }