<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Russian Journal of Linguistics</journal-id><journal-title-group><journal-title xml:lang="en">Russian Journal of Linguistics</journal-title><trans-title-group xml:lang="ru"><trans-title>Russian Journal of Linguistics</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2687-0088</issn><issn publication-format="electronic">2686-8024</issn><publisher><publisher-name xml:lang="en">Peoples’ Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">31335</article-id><article-id pub-id-type="doi">10.22363/2687-0088-30084</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Articles</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Статьи</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="zh"><subject>Articles</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Word frequency and text complexity: an eye-tracking study of young Russian readers</article-title><trans-title-group xml:lang="ru"><trans-title>Влияние частотности слов текста на его сложность: экспериментальное исследование читателей младшего школьного возраста методом айтрекинга</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-0693-7657</contrib-id><name-alternatives><name xml:lang="en"><surname>Laposhina</surname><given-names>Antonina N.</given-names></name><name xml:lang="ru"><surname>Лапошина</surname><given-names>Антонина Николаевна</given-names></name></name-alternatives><bio xml:lang="en"><p>leading expert of the Laboratory of Cognitive and Linguistic Studies</p></bio><bio xml:lang="ru"><p>ведущий эксперт лаборатории когнитивных и лингвистических исследований</p></bio><email>ANLaposhina@pushkin.institute</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-9893-9846</contrib-id><name-alternatives><name xml:lang="en"><surname>Lebedeva</surname><given-names>Maria Yu.</given-names></name><name xml:lang="ru"><surname>Лебедева</surname><given-names>Мария Юрьевна</given-names></name></name-alternatives><bio xml:lang="en"><p>holds a PhD in Philology and is a leading researcher of the Laboratory of Cognitive and Linguistic Studies, Associate Professor of the Department of Methods of Teaching Russian as a Foreign Language</p></bio><bio xml:lang="ru"><p>кандидат филологических наук, ведущий научный сотрудник лаборатории когнитивных и лингвистических исследований, доцент кафедры методики преподавания РКИ</p></bio><email>MULebedeva@pushkin.institute</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-2034-1526</contrib-id><name-alternatives><name xml:lang="en"><surname>Berlin Khenis</surname><given-names>Alexandra A.</given-names></name><name xml:lang="ru"><surname>Берлин Хенис</surname><given-names>Александра Алехандровна</given-names></name></name-alternatives><bio xml:lang="en"><p>specialist of the Laboratory of Cognitive and Linguistic Studies</p></bio><bio xml:lang="ru"><p>специалист лаборатории когнитивных и лингвистических исследований</p></bio><email>alexa.munxen@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Pushkin State Russian Language Institute</institution></aff><aff><institution xml:lang="ru">Государственный институт русского языка имени А.С. Пушкина</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2022-06-29" publication-format="electronic"><day>29</day><month>06</month><year>2022</year></pub-date><volume>26</volume><issue>2</issue><issue-title xml:lang="en">Computational Linguistics and Discourse Complexology</issue-title><issue-title xml:lang="ru">Компьютерная лингвистика и дискурсивная комплексология</issue-title><fpage>493</fpage><lpage>514</lpage><history><date date-type="received" iso-8601-date="2022-06-29"><day>29</day><month>06</month><year>2022</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2022, Laposhina A.N., Lebedeva M.Y., Berlin Khenis A.A.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2022, Лапошина А.Н., Лебедева М.Ю., Берлин Хенис А.А.</copyright-statement><copyright-statement xml:lang="zh">Copyright ©; 2022, Laposhina A., Lebedeva M., Berlin Khenis A.</copyright-statement><copyright-year>2022</copyright-year><copyright-holder xml:lang="en">Laposhina A.N., Lebedeva M.Y., Berlin Khenis A.A.</copyright-holder><copyright-holder xml:lang="ru">Лапошина А.Н., Лебедева М.Ю., Берлин Хенис А.А.</copyright-holder><copyright-holder xml:lang="zh">Laposhina A., Lebedeva M., Berlin Khenis A.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/linguistics/article/view/31335">https://journals.rudn.ru/linguistics/article/view/31335</self-uri><abstract xml:lang="en"><p style="text-align: justify;">Although word frequency is often associated with the cognitive load on the reader and is widely used for automated text complexity assessment, to date, no eye-tracking data have been obtained on the effectiveness of this parameter for text complexity prediction for the Russian primary school readers. Besides, the optimal ways for taking into account the frequency of individual words to assess an entire text complexity have not yet been precisely determined. This article aims to fill these gaps. The study was conducted on a sample of 53 children of primary school age. As a stimulus material, we used 6 texts that differ in the classical Flesch readability formula and data on the frequency of words in texts. As sources of the frequency data, we used the common frequency dictionary based on the material of the Russian National Corpus and DetCorpus - the corpus of literature addressed to children. The speed of reading the text aloud in words per minute averaged over the grades was employed as a measure of the text complexity. The best predictive results of the relative reading time were obtained using the lemma frequency data from the DetCorpus. At the text level, the highest correlation with the reading speed was shown by the text coverage with a list of 5,000 most frequent words, while both sources of the lists - Russian National Corpus and DetCorpus - showed almost the same correlation values. For a more detailed analysis, we also calculated the correlation of the frequency parameters of specific word forms and lemmas with three parameters of oculomotor activity: the dwell time, fixations count, and the average duration of fixations. At the word-by-word level, the lemma frequency by DetCorpus demonstrated the highest correlation with the relative reading time. The results we obtained confirm the feasibility of using frequency data in the text complexity assessment task for primary school children and demonstrate the optimal ways to calculate frequency data.</p></abstract><trans-abstract xml:lang="ru"><p style="text-align: justify;">Параметр частотности слова во многих исследовательских трудах связывается с когнитивной нагрузкой на читателя и широко используется в автоматических системах анализа сложности текста. Однако к настоящему моменту для русскоязычного материала не представлено достаточное количество экспериментальных данных о влиянии параметра частотности слов на сложность текста, собранных с помощью метода айтрекинга. Кроме того, не определены оптимальные способы учета частотности отдельных слов для характеристики целого текста. Целью данной статьи является заполнение этих лакун. Исследование проводилось на выборке 53 детей младшего школьного возраста. Материалом для эксперимента выступили 6 текстов, отличающихся по параметрам классической формулы читабельности Флеша и данным о частотности слов в текстах. В качестве источников данных о частотности слов использованы как стандартный частотный словарь на материале Национального корпуса русского языка, так и корпус литературы, адресованной детям, ДетКорпус. В качестве меры сложности текста использовался параметр скорости чтения текста вслух в словах в минуту, усредненный по классам. Для более детального анализа были произведены подсчеты корреляции параметров частотности конкретных словоформ и их лемм с тремя параметрами глазодвигательной активности: средней относительной скорости чтения слова, средней длительности фиксаций и средним количеством фиксаций. На пословном уровне анализа наивысший коэффициент корреляции с относительным временем чтения продемонстрировали данные частотности леммы по корпусу детской литературы. На уровне анализа текстов наиболее высокую корреляцию со средним временем чтения фрагмента показал параметр процента покрытия текста списком 5 000 самых частотных слов, при этом данные по разным источникам показали близкие значения. Приведенные результаты айтрекингового эксперимента подтверждают связь сложности текста и частотности входящих в него слов на материале для младших школьников, а также обозначают оптимальную методику и источники подсчета частотности для данной задачи.</p></trans-abstract><kwd-group xml:lang="en"><kwd>text complexity</kwd><kwd>text readability</kwd><kwd>word frequency</kwd><kwd>eye tracking</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>сложность текста</kwd><kwd>читабельность текста</kwd><kwd>частотность слова</kwd><kwd>айтрекинг</kwd></kwd-group><funding-group><funding-statement xml:lang="ru">Работа выполнена с использованием средств государственного бюджета по госзаданию на 2020–2024 годы (проект FZNM-2020-0005).</funding-statement></funding-group></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Иомдин Б.Л., Морозов Д.А. Кто поймет «Незнайку»? Автоматическое определение сложности текстов для детей // Русская речь. 2021. № 5. С. 55-68. [Iomdin, Boris L. &amp; Dmitry A. Morozov. 2021. Who can understand “Dunno”? Automatic assessment of text complexity in children’s literature. Russian Speech 5. 55-68 (In Russ.)]. https://doi.org/10.31857/S013161170017239-1</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Корнеев А.А., Ахутина Т.В., Матвеева Е.Ю. Особенности чтения третьеклассников с разным уровнем развития навыка: анализ движений глаз // Вестник Московского университета. Серия 14. Психология. 2019. № 2. С. 64-87. [Korneev, Aleksei A., Tatiana V. Akhutina &amp; Ekaterina Yu. Matveeva. 2019. Reading in third graders with different state of the skill: An eye-tracking study. Vestnik Moskovskogo Universiteta. Seriya 14. Psikhologiya 2. 64-87. (In Russ.)]. https://doi.org/10.11621/vsp.2019.02.64</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Криони Н.К., Никин А.Д., Филиппова А.В. Автоматизированная система анализа сложности учебных текстов // Вестник Уфимского государственного авиационного технического университета. 2008. № 11 (1). С. 101-107. [Krioni, Nikolai K., Aleksei D. Nikin &amp; Anastasia V. Filippova. 2008. Automated system for analyzing the complexity of educational texts. Bulletin of the Ufa State Aviation Technical University 11(1). 101-107. (In Russ.)].</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Лапошина А.Н., Веселовская Т.С., Лебедева М.Ю., Купрещенко О.Ф. Лексический состав текстов учебников русского языка для младшей школы: корпусное исследование // Компьютерная лингвистика и интеллектуальные технологии: по материалам международной конференции «Диалог 2019». 2019. T. 18 (25). С. 351-363. [Laposhina, Antonina N., Тatiana S. Veselovskaya, Maria U. Lebedeva &amp; Olga F. Kupreshchenko. 2019. Lexical analysis of the Russian language textbooks for primary school: Corpus study. Computational Linguistics and Intellectual Technologies: Proceedings of the International Conference "Dialogue 2019”18. 351-363. (In Russ.)].</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Мартынова Е.В., Солнышкина М.И., Мерзлякова А.Ф., Гизатулина Д.Ю. Лексические параметры учебного текста (на материале текстов учебного корпуса русского языка) // Филология и культура. 2020. № 3 (61). С. 72-80. [Martynova, Ekaterina V., Marina I. Solnyshkina, Amina F. Merzlyakova &amp; Diana Yu. Gizatulina. 2020. Lexical parameters of the academic text (based on the texts of the academic corpus of the Russian language). Philology and Culture 3. 72-80. (In Russ.)]. https://doi.org/10.26907/2074-0239-2020-61-3-72-80</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Мизернов И.Ю., Гращенко Л.А. Анализ методов оценки сложности текста. // Новые информационные технологии в автоматизированных системах. 2015. № 18. С. 572-581. [Mizernov, I. Yu. &amp; L. A. Grashchenko. 2015. Analysis of methods for assessing text complexity. New Information Technologies in Automated Systems 18. 572-581. (In Russ.)].</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Микк Я.А. О факторах понятности учебного текста: автореф. дис. … канд. пед. наук. Тарту, 1970. 22 с. [Mikk, Ya.A. 1970. Factors of educational text clarity. Abstract of Pedagogy Cand. Diss. Tartu. (In Russ.)].</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Оборнева И.В. Автоматизированная оценка сложности учебных текстов на основе статистических параметров: дис... канд. пед. наук: 13.00.02. М., 2006. 165 с. [Oborneva, Irina V. 2006. Automated estimation of complexity of educational texts on the basis of statistical parameters. Pedagogy Cand. Diss. Moscow. (In Russ.)].</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Солнышкина М.И., Кисельников А.С. Сложность текста: этапы изучения в отечественном прикладном языкознании. // Вестник Томского государственного университета. Филология. 2015. № 6 (38). С. 86-99. [Solnyshkina, Marina I. &amp; Alexander S. Kiselnikov. 2015. Text complexity: Study phases in Russian linguistics. Tomsk State University Journal of Philology 6. 86-99. (In Russ.)]. https://doi.org/10.17223/19986645/38/7</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Шпаковский Ю.Ф. Разработка количественной методики оценки трудности восприятия учебных текстов для высшей школы // Научно-технический вестник информационных технологий, механики и оптики. 2008. № 1 (83). С. 110-117. [Shpakovsky, Yury F. 2008. Development of a quantitative methodology for assessing the difficulty of perceiving educational texts for higher education. Scientific and Technical Bulletin of Information Technologies, Mechanics and Optics 1(83). 110-117. (In Russ.)].</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Chall, Jeanne S. &amp; Edgar Dale. 1995. Readability Revisited: The New Dale-Chall Readability Formula. Cambridge, MA: Brookline Books.</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Chen, Xiaobin &amp; Detmar Meurers. 2016. Characterizing text difficulty with word frequencies. In Joel Tetreault, Jill Burstein, Claudia Leacock &amp; Helen Yannakoudakis (eds.), Proceedings of the 11th workshop on innovative use of nlp for building educational applications, 84-94. San Diego: Association for Computational Linguistics.</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Clifton, Jr. Charles, Adrian Staub &amp; Keith Rayner. 2007. Eye movements in reading words and sentences. In Roger P. G. van Gompel, Martin H. Fischer, Wayne S. Murray &amp; Robin L. Hill (eds.), Eye movements: A window on mind and brain, 341-371. Elsevier. https://doi.org/10.1016/B978-008044980-7/50017-3</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Dorofeeva, Svetlana V., Victoria Reshetnikova, Margarita Serebryakova, Daria Goranskaya, Tatiana V. Akhutina &amp; Olga Dragoy. 2019. Assessing the validity of the standardized assessment of reading skills in Russian and verifying the relevance of available normative data. The Russian Journal of Cognitive Science 6(1). 4-24.</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>DuBay, William H. 2007. Smart Language: Readers, Readability, and the Grading of Text. Costa Mesa, California: Impact Information.</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Farris-Trimble, Ashley &amp; Bob McMurray. 2018. Morpho-phonological regularities influence the dynamics of real-time word recognition: Evidence from artificial language learning. Laboratory Phonology 9(1). 1-34. https://doi.org/10.5334/labphon.41</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Francois, Tomas &amp; Cedrick Fairon. 2012. An ’AI readability’ formula for French as a foreign language. Proceedings of the EMNLP and CoNLL 2012, Jeju Island, Korea, 12-14 July 2012. 466-477.</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Glazkova, Anna, Yury Egorov &amp; Maxim Glazkov. 2021. A comparative study of feature types for age-based text classification. In Analysis of Images, Social Networks and Texts. AIST 2020. Lecture Notes in Computer Science 12602. 120-134.</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Graesser, Arthur C., Danielle S. McNamara, Zhiqang Cai, Mark Conley, Haiying Li &amp; James Pennebaker. 2014. Coh-Metrix measures text characteristics at multiple levels of language and discourse. The Elementary School Journal 115. 210-229.</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Griffin, Zenzi M. &amp; Daniel H. Spieler. 2006. Observing the what and when of language production for different age groups by monitoring speakers’ eye movements. Brain and Language 99(3). 272-288.</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Henderson, John M., Aleksander Pollatsek &amp; Keith Rayner. 1989. Covert visual attention and extrafoveal information use during object identification. Perception &amp; Psychophysics 45. 196-208. https://doi.org/10.3758/BF03210697</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Jian, Yu-Cin &amp; Hwawei Ko. 2017. Influences of text difficulty and reading ability on learning illustrated science texts for children: An eye movement study. Computers &amp; Education 113. 263-279.</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Lexile. 2007. The Lexile Framework for Reading: Theoretical Framework and Development. Technical Report. MetaMetrics, Inc., Durham, NC</mixed-citation></ref><ref id="B24"><label>24.</label><mixed-citation>Luke, Steven G., John M. Henderson &amp; Fernanda Ferreira. 2015. Children’s eye-movements during reading reflect the quality of lexical representations: An individual differences approach. Journal of Experimental Psychology: Learning, Memory, and Cognition 41(6). 1675-1683. https://doi.org/10.1037/xlm0000133</mixed-citation></ref><ref id="B25"><label>25.</label><mixed-citation>Raney, Gary E. &amp; Keith Rayner. 1995. Word frequency effects and eye movements during two readings of a text. Canadian Journal of Experimental Psychology 49. 151-172.</mixed-citation></ref><ref id="B26"><label>26.</label><mixed-citation>Rau, Anne K., Kristina Moll &amp; Karin Landerl. The transition from sublexical to lexical processing in a consistent orthography: An eye-tracking study. Scientific Studies of Reading 18. 224-233. https://doi.org/10.1080/10888438.2013.857673</mixed-citation></ref><ref id="B27"><label>27.</label><mixed-citation>Rau, Anne K., Kristina Moll, Margaret J. Snowling &amp; Karin Landerl. 2015. Effects of orthographic consistency on eye movement behavior: German and English children and adults process the same words differently. Journal of Experimental Child Psychology 130. 92-105. https://doi.org/10.1016/j.jecp.2014.09.012.</mixed-citation></ref><ref id="B28"><label>28.</label><mixed-citation>Rayner, Keith. 1998. Eye movements in reading and information processing: 20 years of research. Psychological Bulletin 124. 372-422. https://doi.org/10.1037/0033-2909.124.3.372</mixed-citation></ref><ref id="B29"><label>29.</label><mixed-citation>Rayner, Keith, Timothy J. Slattery, Denis Drieghe &amp; Simon P. Liversedge. 2011. Eye movements and word skipping during reading: Effects of word length and predictability. Journal of Experimental Psychology: Human Perception and Performance 37(2). 514-528.</mixed-citation></ref><ref id="B30"><label>30.</label><mixed-citation>Rello, Luz, Ricardo Baeza-Yates, Laura Dempere-Marco &amp; Horacio Saggion. 2013. Frequent words improve readability and short words improve understandability for people with dyslexia. In Paula Kotzé &amp; Gary Marsden (eds.), Human-Computer interaction - INTERACT 2013. Lecture notes in computer science vol 8120, 203-219. Berlin/Heidelberg: Springer. https://doi.org/10.1007/978-3-642-40498-6_15</mixed-citation></ref><ref id="B31"><label>31.</label><mixed-citation>Reynolds, Robert. 2016. Insights from Russian second language readability classification: Complexity-dependent training requirements, and feature evaluation of multiple categories. Proceedings of the 11th Workshop on the Innovative Use of NLP for Building Educational Applications, San Diego, CA 2016. 289-300.</mixed-citation></ref><ref id="B32"><label>32.</label><mixed-citation>Sato, Satoshi. 2014. Text Readability and Word Distribution in Japanese. Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14) 2014. 2811-2815.</mixed-citation></ref><ref id="B33"><label>33.</label><mixed-citation>Schwarm, Sarah E. &amp; Mari Ostendorf. 2005. Reading level assessment using support vector machines and statistical language models. Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics (ACL ’05), USA, 2005. 523-530.</mixed-citation></ref><ref id="B34"><label>34.</label><mixed-citation>Solovyev, Valery, Vladimir Ivanov &amp; Marina Solnyshkina. 2018. Assessment of reading difficulty levels in Russian academic texts: Approaches and metrics. Journal of Intelligent &amp; Fuzzy Systems 34. 3049-3058.</mixed-citation></ref><ref id="B35"><label>35.</label><mixed-citation>Tiffin-Richards, Simon P. &amp; Sasha Schroeder. 2015. Children's and adults' parafoveal processes in German: Phonological and orthographic effects. Journal of Cognitive Psychology 27. 531-548. https://doi.org/10.1080/20445911.2014.999076</mixed-citation></ref><ref id="B36"><label>36.</label><mixed-citation>White, Sarah J., Denis Drieghe, Simon P Liversedge &amp; Adrian Staub. 2018. The word frequency effect during sentence reading: A linear or nonlinear effect of log frequency? Quarterly Journal of Experimental Psychology 71(1). 46-55. https://doi.org/10.1080/17470218.2016.1240813</mixed-citation></ref><ref id="B37"><label>37.</label><mixed-citation>Ляшевская О.Н., Шаров С.А. Частотный словарь современного русского языка (на материалах Национального корпуса русского языка). М.: Азбуковник. 2009. [Lyashevskaya, Olga N. &amp; Sergey A. Sharoff. 2009. Modern Russian Frequency Dictionary (based on the data from the Russian National Corpus). Moscow: Azbukovnik. (In Russ.)]</mixed-citation></ref></ref-list></back></article>
