@inproceedings{bibcite_36982, keywords = {Semantic Ontology, Knowledge, Large Language Models}, author = {Ozan Baris Mulayim and Avia Anwar and Umut Mete Saka and Lazlo Paul and Anand Prakash and Gabe Fierro and Marco Pritoni and Mario Berg{\'e}s}, title = {BuildingQA: A Benchmark for Natural Language Question Answering over Building Knowledge Graphs}, abstract = {
Graph-based representations of building metadata using ontologies like Brick are vital for smart building applications, but querying them remains a challenge for practitioners. Knowledge Graph Question Answering (KGQA) systems, meant to retrieve answers from natural language questions, traditionally require large-scale training data, making them ill-suited for the specialized and data-scarce building domain. The advent of Large Language Models (LLMs) offers a paradigm shift, enabling zero-shot natural language querying without building/domain-specific training. Yet, there is no standardized benchmark for building-specific KGQA which can guide and validate research in this area.\
To address this gap, our work makes three primary contributions. First, we introduce the BuildingQA Benchmark Dataset, constructed through a multi-stage process of collecting practitioner data, augmenting it with LLMs for linguistic diversity, and curating a final set of 188 questions across 4 buildings. Second, we characterize the benchmark{\textquoteright}s complexity and ambiguity, introducing a novel method to quantify its {\textquotedblleft}lexical gap{\textquotedblright} and providing a four-stage diagnostic framework for analyzing how systems fail. Third, we benchmark zero-shot LLM-powered KGQA systems to establishbaseline performance and analyze their failure modes.
Our evaluation reveals that top-performing systems achieve a maximum F1 score of only 0.38. This result does not indicate a failure of these powerful systems, but rather underscores the unique challenges posed by our benchmark. It demonstrates a critical performance gap, showing that current methods successful on general KGs struggle with the specific lexical and structural nuances of the building domain. BuildingQA1 thus provides the benchmark dataset and foundational analysis needed to drive the development of novel, domain-aware methods required to unlock the use of semantic data in buildings.
}, year = {2025}, booktitle = {Proceedings of the 12th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation}, journal = {Proceedings of the 12th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation}, series = {Proceedings of the 12th ACM International Conference on Systems for Energy-Efficient Buildings, Cities, and Transportation}, pages = {65-75}, month = {19/11/2025}, institution = {ACM}, publisher = {ACM}, url = {https://doi.org/10.1145/3736425.3770097}, doi = {10.1145/3736425.3770097}, }