publications | Chun-Yi Kuan

2025

Teaching Audio-Aware Large Language Models What Does Not Hear: Mitigating Hallucinations through Synthesized Negative Samples

Chun-Yi Kuan , and Hung-yi Lee

2025

arXiv Bib PDF

@article{kuan2025teaching,
  title = {Teaching Audio-Aware Large Language Models What Does Not Hear: Mitigating Hallucinations through Synthesized Negative Samples},
  author = {Kuan, Chun-Yi and Lee, Hung-yi},
  pages = {1--6},
  year = {2025},
  organization = {ISCA},
  bibtex_show = true,
}

NAACL 2025

Gender Bias in Instruction-Guided Speech Synthesis Models

Chun-Yi Kuan , and Hung-yi Lee

2025

Bib PDF

@misc{kuan2025genderbiasinstructionguidedspeech,
  title = {Gender Bias in Instruction-Guided Speech Synthesis Models},
  author = {Kuan, Chun-Yi and Lee, Hung-yi},
  year = {2025},
  primaryclass = {cs.CL},
  bibtex_show = true,
  booktitle = {2025 Annual Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics}
}

ICASSP 2025

Can Large Audio-Language Models Truly Hear? Tackling Hallucinations with Multi-Task Assessment and Stepwise Audio Reasoning

Chun-Yi Kuan , and Hung-yi Lee

In ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , 2025

arXiv Bib PDF

@inproceedings{kuan2024can,
  title = {Can Large Audio-Language Models Truly Hear? Tackling Hallucinations with Multi-Task Assessment and Stepwise Audio Reasoning},
  author = {Kuan, Chun-Yi and Lee, Hung-yi},
  booktitle = {ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  year = {2025},
  organization = {IEEE},
  bibtex_show = true,
}

2024

ICLR 2025

Dynamic-superb phase-2: A collaboratively expanding benchmark for measuring the capabilities of spoken language models with 180 tasks

Chien-yu Huang , Wei-Chih Chen , Shu-wen Yang , and 8 more authors

arXiv preprint arXiv:2411.05361, 2024

Bib PDF

@article{huang2024dynamic,
  title = {Dynamic-superb phase-2: A collaboratively expanding benchmark for measuring the capabilities of spoken language models with 180 tasks},
  author = {Huang, Chien-yu and Chen, Wei-Chih and Yang, Shu-wen and Liu, Andy T and Li, Chen-An and Lin, Yu-Xiang and Tseng, Wei-Cheng and Diwan, Anuj and Shih, Yi-Jen and Shi, Jiatong and others},
  journal = {arXiv preprint arXiv:2411.05361},
  year = {2024},
  bibtex_show = true,
  booktitle = {The Thirteenth International Conference on Learning Representations (ICLR 2025)}
}

Building a Taiwanese Mandarin Spoken Language Model: A First Attempt

Chih-Kai Yang , Yu-Kuan Fu , Chen-An Li , and 8 more authors

arXiv preprint arXiv:2411.07111, 2024

Bib PDF

@article{yang2024building,
  title = {Building a Taiwanese Mandarin Spoken Language Model: A First Attempt},
  author = {Yang, Chih-Kai and Fu, Yu-Kuan and Li, Chen-An and Lin, Yi-Cheng and Lin, Yu-Xiang and Chen, Wei-Chih and Chung, Ho Lam and Kuan, Chun-Yi and Huang, Wei-Ping and Lu, Ke-Han and others},
  journal = {arXiv preprint arXiv:2411.07111},
  year = {2024},
  bibtex_show = true,
}

SLT 2024

Speech-Copilot: Leveraging Large Language Models for Speech Processing via Task Decomposition, Modularization, and Program Generation

Chun-Yi Kuan , Chih-Kai Yang , Wei-Ping Huang , and 2 more authors

2024

arXiv Bib PDF

@misc{kuan2024speechcopilotleveraginglargelanguage,
  title = {Speech-Copilot: Leveraging Large Language Models for Speech Processing via Task Decomposition, Modularization, and Program Generation},
  author = {Kuan, Chun-Yi and Yang, Chih-Kai and Huang, Wei-Ping and Lu, Ke-Han and Lee, Hung-yi},
  year = {2024},
  booktitle = {IEEE Spoken Language Technology Workshop 2024 (SLT)},
  url = {https://arxiv.org/abs/2407.09886},
  bibtex_show = true,
  organization = {IEEE},
}

EMNLP 2024

Large Language Model as an Assignment Evaluator: Insights, Feedback, and Challenges in a 1000+ Student Course

Cheng-Han Chiang , Wei-Chih Chen , Chun-Yi Kuan , and 2 more authors

In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing , Nov 2024

arXiv Bib PDF

@inproceedings{chiang-etal-2024-large,
  title = {Large Language Model as an Assignment Evaluator: Insights, Feedback, and Challenges in a 1000+ Student Course},
  author = {Chiang, Cheng-Han and Chen, Wei-Chih and Kuan, Chun-Yi and Yang, Chienchou and Lee, Hung-yi},
  booktitle = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},
  month = nov,
  year = {2024},
  address = {Miami, Florida, USA},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2024.emnlp-main.146},
  doi = {10.18653/v1/2024.emnlp-main.146},
  pages = {2489--2513},
  bibtex_show = true,
}

SLT 2024

Listen and Speak Fairly: A Study on Semantic Gender Bias in Speech Integrated Large Language Models

Yi-Cheng Lin , Tzu-Quan Lin , Chih-Kai Yang , and 4 more authors

Nov 2024

arXiv Bib PDF

@article{lin2024listen,
  title = {Listen and Speak Fairly: A Study on Semantic Gender Bias in Speech Integrated Large Language Models},
  author = {Lin, Yi-Cheng and Lin, Tzu-Quan and Yang, Chih-Kai and Lu, Ke-Han and Chen, Wei-Chih and Kuan, Chun-Yi and Lee, Hung-yi},
  booktitle = {IEEE Spoken Language Technology Workshop 2024 (SLT)},
  year = {2024},
  organization = {IEEE},
  bibtex_show = true,
}

INTERSPEECH

Understanding Sounds, Missing the Questions: The Challenge of Object Hallucination in Large Audio-Language Models

Chun-Yi Kuan , Wei-Ping Huang , and Hung-yi Lee

Nov 2024

arXiv Bib PDF

@article{kuan2024understanding,
  title = {Understanding Sounds, Missing the Questions: The Challenge of Object Hallucination in Large Audio-Language Models},
  author = {Kuan, Chun-Yi and Huang, Wei-Ping and Lee, Hung-yi},
  booktitle = {2024 Conference of the International Speech Communication Association (INTERSPEECH)},
  pages = {1--6},
  year = {2024},
  organization = {ISCA},
  bibtex_show = true,
}

ICASSP 2024

Dynamic-superb: Towards a dynamic, collaborative, and comprehensive instruction-tuning benchmark for speech

Chien-yu Huang , Ke-Han Lu , Shih-Heng Wang , and 8 more authors

In ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , Nov 2024

arXiv Bib PDF

@inproceedings{huang2024dynamid,
  title = {Dynamic-superb: Towards a dynamic, collaborative, and comprehensive instruction-tuning benchmark for speech},
  author = {Huang, Chien-yu and Lu, Ke-Han and Wang, Shih-Heng and Hsiao, Chi-Yuan and Kuan, Chun-Yi and Wu, Haibin and Arora, Siddhant and Chang, Kai-Wei and Shi, Jiatong and Peng, Yifan and others},
  booktitle = {ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages = {12136--12140},
  year = {2024},
  organization = {IEEE},
  bibtex_show = true,
}

2023

ICASSP 2024

Investigating Zero-Shot Generalizability on Mandarin-English Code-Switched ASR and Speech-to-text Translation of Recent Foundation Models with Self-Supervision and Weak Supervision

Chih-Kai Yang , Kuan-Po Huang , Ke-Han Lu , and 3 more authors

Nov 2023

arXiv Bib PDF

@article{yang2023investigating,
  title = {Investigating Zero-Shot Generalizability on Mandarin-English Code-Switched ASR and Speech-to-text Translation of Recent Foundation Models with Self-Supervision and Weak Supervision},
  author = {Yang, Chih-Kai and Huang, Kuan-Po and Lu, Ke-Han and Kuan, Chun-Yi and Hsiao, Chi-Yuan and Lee, Hung-yi},
  booktitle = {ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  year = {2023},
  organization = {IEEE},
  bibtex_show = true,
}

ASRU 2023

Towards General-Purpose Text-Instruction-Guided Voice Conversion

Chun-Yi Kuan , Chen-An Li , Tsu-Yuan Hsu , and 5 more authors

In 2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU) , Nov 2023

arXiv Bib PDF

@inproceedings{kuan2023towards,
  title = {Towards General-Purpose Text-Instruction-Guided Voice Conversion},
  author = {Kuan, Chun-Yi and Li, Chen-An and Hsu, Tsu-Yuan and Lin, Tse-Yang and Chung, Ho-Lam and Chang, Kai-Wei and Chang, Shuo-Yiin and Lee, Hung-yi},
  booktitle = {2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
  pages = {1--8},
  year = {2023},
  organization = {IEEE},
  bibtex_show = true,
}