<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Russian Journal of Linguistics</journal-id><journal-title-group><journal-title xml:lang="en">Russian Journal of Linguistics</journal-title><trans-title-group xml:lang="ru"><trans-title>Russian Journal of Linguistics</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2687-0088</issn><issn publication-format="electronic">2686-8024</issn><publisher><publisher-name xml:lang="en">Peoples’ Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">31328</article-id><article-id pub-id-type="doi">10.22363/2687-0088-30145</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Articles</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Статьи</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="zh"><subject>Articles</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">ReaderBench: Multilevel analysis of Russian text characteristics</article-title><trans-title-group xml:lang="ru"><trans-title>ReaderBench: многоуровневый анализ характеристик текста на русском языке</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-7994-9950</contrib-id><name-alternatives><name xml:lang="en"><surname>Corlatescu</surname><given-names>Dragos</given-names></name><name xml:lang="ru"><surname>Корлатеску</surname><given-names>Драгош</given-names></name></name-alternatives><bio xml:lang="en"><p>Teaching Assistant and a PhD student</p></bio><bio xml:lang="ru"><p>ассистент и аспирант</p></bio><email>dragos.corlatescu@upb.ro</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-0380-6814</contrib-id><name-alternatives><name xml:lang="en"><surname>Ruseti</surname><given-names>Ștefan</given-names></name><name xml:lang="ru"><surname>Русети</surname><given-names>Штефан</given-names></name></name-alternatives><bio xml:lang="en"><p>Lecturer</p></bio><bio xml:lang="ru"><p>преподаватель</p></bio><email>stefan.ruseti@upb.ro</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-4815-9227</contrib-id><name-alternatives><name xml:lang="en"><surname>Dascalu</surname><given-names>Mihai</given-names></name><name xml:lang="ru"><surname>Даскалу</surname><given-names>Михай</given-names></name></name-alternatives><bio xml:lang="en"><p>Full Professor</p></bio><bio xml:lang="ru"><p>профессор</p></bio><email>mihai.dascalu@upb.ro</email><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">University Politehnica of Bucharest</institution></aff><aff><institution xml:lang="ru">Политехнический университет Бухареста</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Academy of Romanian Scientists</institution></aff><aff><institution xml:lang="ru">Академия румынских ученых</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2022-06-29" publication-format="electronic"><day>29</day><month>06</month><year>2022</year></pub-date><volume>26</volume><issue>2</issue><issue-title xml:lang="en">Computational Linguistics and Discourse Complexology</issue-title><issue-title xml:lang="ru">Компьютерная лингвистика и дискурсивная комплексология</issue-title><fpage>342</fpage><lpage>370</lpage><history><date date-type="received" iso-8601-date="2022-06-29"><day>29</day><month>06</month><year>2022</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2022, Corlatescu D., Ruseti Ș., Dascalu M.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2022, Корлатеску Д., Русети Ш., Даскалу М.</copyright-statement><copyright-statement xml:lang="zh">Copyright ©; 2022, Corlatescu D., Ruseti Ș., Dascalu M.</copyright-statement><copyright-year>2022</copyright-year><copyright-holder xml:lang="en">Corlatescu D., Ruseti Ș., Dascalu M.</copyright-holder><copyright-holder xml:lang="ru">Корлатеску Д., Русети Ш., Даскалу М.</copyright-holder><copyright-holder xml:lang="zh">Corlatescu D., Ruseti Ș., Dascalu M.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/linguistics/article/view/31328">https://journals.rudn.ru/linguistics/article/view/31328</self-uri><abstract xml:lang="en"><p style="text-align: justify;">This paper introduces an adaptation of the open source ReaderBench framework that now supports Russian multilevel analyses of text characteristics, while integrating both textual complexity indices and state-of-the-art language models, namely Bidirectional Encoder Representations from Transformers (BERT). The evaluation of the proposed processing pipeline was conducted on a dataset containing Russian texts from two language levels for foreign learners (A - Basic user and B - Independent user). Our experiments showed that the ReaderBench complexity indices are statistically significant in differentiating between the two classes of language level, both from: a) a statistical perspective, where a Kruskal-Wallis analysis was performed and features such as the “nmod” dependency tag or the number of nouns at the sentence level proved the be the most predictive; and b) a neural network perspective, where our model combining textual complexity indices and contextualized embeddings obtained an accuracy of 92.36% in a leave one text out cross-validation, outperforming the BERT baseline. ReaderBench can be employed by designers and developers of educational materials to evaluate and rank materials based on their difficulty, as well as by a larger audience for assessing text complexity in different domains, including law, science, or politics.</p></abstract><trans-abstract xml:lang="ru"><p style="text-align: justify;">В статье представлена новая версия платформы ReaderBench с открытым исходным кодом. В настоящее время Readerbench поддерживает многоуровневый анализ параметров текстов на русском языке, интегрируя при этом как индексы текстовой сложности, так и современные языковые модели, в частности, BERT. Оценка предлагаемого алгоритма обработки проводилась на корпусе русских текстов двух языковых уровней, используемых при обучении русскому языку как иностранному (A - базовый пользователь и B - независимый пользователь). Наши эксперименты показали, что (а) индексы сложности текстов различных уровней по Общеевропейской шкале, рассчитываемые при помощи ReaderBench, статистически значимы (по критерию Краскела-Уоллиса), при этом количество существительных на уровне предложения оказалось наилучшим предиктором сложности; б) a наша нейронная модель, сочетающая индексы сложности текста и контекстуализированные вложения, при перекрестной валидации достигла точности 92,36 % и превзошла базовый уровень BERT. ReaderBench может использоваться разработчиками учебных материалов для оценки и ранжирования текстов в зависимости от их сложности, а также более широкой аудиторией для оценки сложности восприятия текста в различных областях, включая юриспруденцию, естествознание или политику.</p></trans-abstract><kwd-group xml:lang="en"><kwd>ReaderBench framework</kwd><kwd>text complexity indices</kwd><kwd>language model</kwd><kwd>neural architecture</kwd><kwd>multilevel text analysis</kwd><kwd>assessing text difficulty</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>фреймворк ReaderBench</kwd><kwd>индексы сложности текста</kwd><kwd>языковая модель</kwd><kwd>нейронная архитектура</kwd><kwd>многоуровневый анализ текста</kwd><kwd>оценка сложности текста</kwd></kwd-group><funding-group><funding-statement xml:lang="en">This research was supported by a grant of the Romanian National Authority for Scientific Research and Innovation, CNCS – UEFISCDI, project number TE 70 PN-III-P1-1.1-TE-2019-2209, ATES – “Automated Text Evaluation and Simplification”.</funding-statement></funding-group></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Abadi, Martin. 2016. Tensorflow: A system for large-scale machine learning. In 12th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 16) Savannah, GA, USA: {USENIX} Association. 265-283.</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Akhtiamov, Raouf B. 2019. Dictionary of abstract and concrete words of the Russian language: A methodology for creation and application. Journal of Research in Applied Linguistics. Saint Petersburg, Russia: Springer. 218-230.</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Bansal, S. 2014. Textstat. Retrieved September 1st, 2021. URL: https://github.com/shivam5992/textstat (accessed 26.05.2022).</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Blei, David M., Andrew Y. Ng &amp; Michael I. Jordan. 2003. Latent Dirichlet Allocation. Journal of Machine Learning Research 3(4-5). 993-1022.</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>BNC Consortium. 2007. British national corpus. Oxford Text Archive Core Collection.</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Boguslavsky, Igor, Leonid Iomdin &amp; Victor Sizov. 2004. Multilinguality in ETAP-3: Reuse of lexical resources. In Proceedings of the Workshop on Multilingual Linguistic Resources. Geneva, Switzerland: COLING. 1-8.</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Brysbaert, Marc, Boris New &amp; Emmanuel Keuleers. 2012. Adding part-of-speech information to the SUBTLEX-US word frequencies. Behavior Research Methods 44(4). 991-997.</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Brysbaert, Marc, Amy Beth Warriner &amp; Victor Kuperman. 2014. Concreteness ratings for 40 thousand generally known English word lemmas. Behavior Research Methods 46(3). 904-911.</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Choi, Joon Suh &amp; Scott A. Crossley. 2020. ARTE: Automatic Readability Tool for English. NLP Tools for the Social Sciences. linguisticanalysistools.org. Retrieved September 1st, 2021. URL: https://www.linguisticanalysistools.org/arte.html (accessed 26.05.2022).</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Churunina, Anna A., Ehl'zara Gizzatullina-Gafiyatova, Artem Zaikin &amp; Marina I. Solnyshkina. 2020. Lexical Features of Text Complexity: The case of Russian academic texts. In SHS Web of Conferences. Nizhny Novgorod, Russia: EDP Sciences.</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Coltheart, Max. 1981. The MRC psycholinguistic database. The Quarterly Journal of Experimental Psychology Section A 33(4). 497-505.</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Conneau, Alexis, Guillaume Lample, Marc’Aurelio Ranzato, Ludovic Denoyer &amp; Hervé Jégou. 2018. Word translation without parallel data. In 6th International Conference on Learning Representations. Vancouver, BC, Canada: OpenReview.net.</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Crossley, Scott A., Franklin Bradfield &amp; Analynn Bustamante. 2019. Using human judgments to examine the validity of automated grammar, syntax, and mechanical errors in writing. Journal of Writing Research 11(2). 251-270.</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Crossley, Scott A., Kristopher Kyle, Jodi Davenport &amp; Danielle S. McNamara. 2016. Automatic assessment of constructed response data in a Chemistry Tutor. In International Conference on Educational Data Ining. Raleigh, North Carolina, USA: International Educational Data Mining Society. 336-340.</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Dale, Edgar &amp; Jeanne S. Chall. 1948. A formula for predicting readability: Instructions. Educational Research Bulletin 27(1). 37-54.</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Dascalu, Mihai. 2014. Analyzing Discourse and Text Complexity for Learning and Collaborating, Studies in Computational Intelligence (534). Switzerland: Springer.</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Dascalu, Mihai, Philippe Dessus, Stefan Trausan-Matu &amp; Maryse Bianco. 2013. ReaderBench, an environment for analyzing text complexity and reading strategies. In H. Chad Lane, Kalina Yacef, Jack Mostow &amp; Philip Pavlik (eds.), 16th Int. Conf. on Artificial Intelligence in Education (AIED 2013), 379-388. Memphis, TN, USA: Springer.</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Dascalu, Mihai, Danielle S. McNamara, Stefan Trausan-Matu &amp; Laura K. Allen. 2018. Cohesion Network Analysis of CSCL Participation. Behavior Research Methods 50(2). 604-619. https://doi.org/10.3758/s13428-017-0888-4</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Dascalu, Mihai, Lucia Larise Stavarache, Stefan Trausan-Matu &amp; Philippe Dessus. 2014. Reflecting comprehension through French textual complexity factors. In 26th Int. Conf. on Tools with Artificial Intelligence (ICTAI 2014). 615-619. Limassol, Cyprus: IEEE.</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Dascalu, Mihai, Wim Westera, Stefan Ruseti, Stefan Trausan-Matu &amp; Hub J. Kurvers. 2017. ReaderBench learns Dutch: Building a comprehensive automated essay scoring system for Dutch. In Anne E. Baker, Xiangen Hu, Ma. Mercedes T. Rodrigo, Benedict du Boulay, Ryan Baker (eds.), 18th Int. Conf. on Artificial Intelligence in Education (AIED 2017), 52-63. Wuhan, China: Springer.</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Davies, Mark. 2010. The corpus of contemporary American English as the first reliable monitor corpus of English. Literary and Linguistic Computing 25(4). 447-464.</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Delvin, Jacob, Ming-Wei Chang, Kenton Lee &amp; Kristina Toutanova. 2019. BERT: Pre-training of deep bidirectional transformers for language understanding. In Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Minneapolis, MN, USA: Association for Computational Linguistics. 4171-4186.</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Flesch, Rudolf F. 1949. Art of Readable Writing.</mixed-citation></ref><ref id="B24"><label>24.</label><mixed-citation>Gabitov, Azat, Marina Solnyshkina, Liliya Shayakhmetova, Liliya Ilyasova &amp; Saida Adobarova. 2017. Text complexity in Russian textbooks on social studies. Revista Publicando 4(13 (2)). 597-606.</mixed-citation></ref><ref id="B25"><label>25.</label><mixed-citation>Gifu, Daniela, Mihai Dascalu, Stefan Trausan-Matu &amp; Laura K. Allen. 2016. Time evolution of writing styles in Romanian language. In 28th Int. Conf. on Tools with Artificial Intelligence (ICTAI 2016). San Jose, CA: IEEE. 1048-1054.</mixed-citation></ref><ref id="B26"><label>26.</label><mixed-citation>Graesser, Arthur C., Danielle S. McNamara, Max M. Louwerse &amp; Zhiqiang Cai. 2004. Coh-Metrix: Analysis of text on cohesion and language. Behavior Research Methods, Instruments, &amp; Computers 36(2). 193-202.</mixed-citation></ref><ref id="B27"><label>27.</label><mixed-citation>Guryanov, Igor, Iskander Yarmakeev, Aleksandr Kiselnikov &amp; Iena Harkova. 2017. Text complexity: Periods of study in Russian linguistics. Revista Publicando 4(13 (2)). 616-625.</mixed-citation></ref><ref id="B28"><label>28.</label><mixed-citation>Gutu-Robu, Gabriel, Maria-Dorinela Sirbu, Ionut S Cristian Paraschiv, Mihai Dascălu, Philippe Dessus &amp; Stefan Trausan-Matu. 2018. Liftoff - ReaderBench introduces new online functionalities. Romanian Journal of Human - Computer Interaction 11(1). 76-91.</mixed-citation></ref><ref id="B29"><label>29.</label><mixed-citation>Honnibal, Montani &amp; I. Montani. 2017. Spacy 2: Natural language understanding with bloom embeddings. Convolutional Neural Networks and Incremental Parsing 7(1).</mixed-citation></ref><ref id="B30"><label>30.</label><mixed-citation>Hopkins, Kenneth D. &amp; Douglas L. Weeks. 1990. Tests for normality and measures of skewness and kurtosis: Their place in research reporting. Educational and Psychological Measurement 50(4). 717-729.</mixed-citation></ref><ref id="B31"><label>31.</label><mixed-citation>Kincaid, J. Peter, Robert P. Fishburne Jr., Richard L. Rogers &amp; Brad S. Chissom. 1975. Derivation of New Readability Formulas: (Automated Readability Index, Fog Count and Flesch Reading Ease Formula) for Navy Enlisted Personnel. Naval Air Station Memphis: Chief of Naval Technical Training.</mixed-citation></ref><ref id="B32"><label>32.</label><mixed-citation>Kozea. 2016. Pyphen. Retrieved September 1st, 2021. URL: https://pyphen.org/ (accessed 20.05.2022).</mixed-citation></ref><ref id="B33"><label>33.</label><mixed-citation>Kruskal, William H. &amp; Allen W. Wallis. 1952. Use of ranks in one-criterion variance analysis. Journal of the American Statistical Association 47(260). 583-621.</mixed-citation></ref><ref id="B34"><label>34.</label><mixed-citation>Kuperman, Victor, Hans Stadthagen-Gonzalez &amp; Marc Brysbaert. 2012. Age-of-acquisition ratings for 30,000 English words. Behavior Research Methods 44(4). 978-990.</mixed-citation></ref><ref id="B35"><label>35.</label><mixed-citation>Kuratov, Yuri &amp; Mikhail Arkhipov. 2019. Adaptation of deep bidirectional multilingual transformers for Russian language. arXiv preprint arXiv:1905.07213.</mixed-citation></ref><ref id="B36"><label>36.</label><mixed-citation>Kyle, Kristopher. 2016. Measuring Syntactic Development in L2 Writing: Fine Grained Indices of Syntactic Complexity and Usage-based Indices of Syntactic Sophistication.</mixed-citation></ref><ref id="B37"><label>37.</label><mixed-citation>Kyle, Kristopher, Scott A. Crossley &amp; Cynthia Berger. 2018. The tool for the automatic analysis of lexical sophistication (TAALES): Version 2.0. Behavior Research Methods 50(3). 1030-1046.</mixed-citation></ref><ref id="B38"><label>38.</label><mixed-citation>Kyle, Kristopher, Scott A. Crossley &amp; Scott Jarvis. 2021. Assessing the validity of lexical diversity indices using direct judgements. Language Assessment Quarterly 18(2). 154-170.</mixed-citation></ref><ref id="B39"><label>39.</label><mixed-citation>Kyle, Kristopher, Scott A. Crossley &amp; Youjin J. Kim. 2015. Native language identification and writing proficiency. International Journal of Learner Corpus Research 1(2). 187-209.</mixed-citation></ref><ref id="B40"><label>40.</label><mixed-citation>Landauer, Thomas K., Peter W. Foltz &amp; Darrell Laham. 1998. An introduction to Latent Semantic Analysis. Discourse Processes 25(2/3). 259-284.</mixed-citation></ref><ref id="B41"><label>41.</label><mixed-citation>LanguageTool. 2021. Language Tool. Retrieved September 1st, 2021. URL: https://languagetool.org/ (accessed 20.05.2022).</mixed-citation></ref><ref id="B42"><label>42.</label><mixed-citation>Loukachevitch, Natalia V., G. Lashevich, Anastasia A. Gerasimova, Vyacheslav V. Ivanov. Boris V. Dobrov. 2016. Creating Russian wordnet by conversion. In Computational Linguistics and Intellectual Technologies: Annual conference Dialogue 2016. Moscow, Russia. 405-415.</mixed-citation></ref><ref id="B43"><label>43.</label><mixed-citation>Mc Laughlin, G. H. 1969. SMOG grading-a new readability formula. Journal of Reading 12(8). 639-646.</mixed-citation></ref><ref id="B44"><label>44.</label><mixed-citation>Mccarthy, Kathryn, Danielle Siobhan, Marina I. Solnyshkina, Fanuza Kh. Tarasove &amp; Roman V. Kupriyanov. 2019. The Russian language test: Towards assessing text comprehension. Vestnik Volgogradskogo Gosudarstvennogo Universiteta. Seriya 2: Yazykoznanie 18(4). 231-247.</mixed-citation></ref><ref id="B45"><label>45.</label><mixed-citation>Mikolov, Tomas, Kai Chen, Greg Corrado &amp; Jeffrey Dean. 2013. Efficient estimation of word representation in Vector Space. In Workshop at ICLR. Scottsdale, AZ.</mixed-citation></ref><ref id="B46"><label>46.</label><mixed-citation>Myint. 2014. language-check. Retrieved September 1st, 2021. URL: https://github.com/myint/language-check (accessed 23.05.2022).</mixed-citation></ref><ref id="B47"><label>47.</label><mixed-citation>Pearson, Karl. 1895. VII. Note on regression and inheritance in the case of two parents. Proceedings of the Royal Society of London 58. 240-242.</mixed-citation></ref><ref id="B48"><label>48.</label><mixed-citation>Pedregosa, Fabian, Gael Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, Jake Vanderplas, Alexandre Passos, David Cournapeau, Matthieu Brucher, Matthieu Perrot &amp; Edouard Duchesnay. 2011. Scikit-learn: Machine learning in Python. The Journal of Machine Learning Research 12. 2825-2830.</mixed-citation></ref><ref id="B49"><label>49.</label><mixed-citation>Quispesaravia, Andre, Walter Perez, Marco Sobrevilla Cabezudo &amp; Fernando Alva-Manchego. 2016. Coh-Metrix-Esp: A complexity analysis tool for documents written in Spanish. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16). 4694-4698.</mixed-citation></ref><ref id="B50"><label>50.</label><mixed-citation>Rehurek, Radim &amp; Petr Sojka. 2010. Software framework for topic modelling with large corpora. In Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks. Valletta, Malta: ELRA. 45-50.</mixed-citation></ref><ref id="B51"><label>51.</label><mixed-citation>Roscoe, Rod, Laura K. Allen, Jennifer L. Weston &amp; Scott A. Crossley. 2014. The Writing Pal intelligent tutoring system: Usability testing and development. Computers and Composition 34. 39-59.</mixed-citation></ref><ref id="B52"><label>52.</label><mixed-citation>Sadoski, Mark, Ernest T. Goetz &amp; Maximo Rodriguez. 2000. Engaging texts: Effects of concreteness on comprehensibility, interest, and recall in four text types. Journal of Educational Psychology 92(1). 85.</mixed-citation></ref><ref id="B53"><label>53.</label><mixed-citation>Sakhovskiy, Andrey, Valery D. Solovyev &amp; Marina Solnyshkina. 2020. Topic modeling for assessment of text complexity in Russian textbooks. In 2020 Ivannikov Ispras Open Conference (ISPRAS). Moscow, Russia: IEEE. 102-108.</mixed-citation></ref><ref id="B54"><label>54.</label><mixed-citation>Schmid, Helmut, Marco Baroni, Erika Zanchetta &amp; Achim Stein. 2007. Il sistema ‘tree-tagger arricchito’-The enriched TreeTagger system. IA Contributi Scientifici 4(2). 22-23.</mixed-citation></ref><ref id="B55"><label>55.</label><mixed-citation>Senter, R.J. &amp; E.A. Smith. 1967. Automated readability index: CINCINNATI UNIV OH.</mixed-citation></ref><ref id="B56"><label>56.</label><mixed-citation>Shannon, Claude E. 1948. A mathematical theory of communication. The Bell System Technical Journal 27(3). 379-423.</mixed-citation></ref><ref id="B57"><label>57.</label><mixed-citation>Shapiro, S.S. &amp; M.B. Wilk. 1965. An analysis of variance test for normality (complete samples). Biometrika 52(3/4). 591-611.</mixed-citation></ref><ref id="B58"><label>58.</label><mixed-citation>Sharoff, Serge, Elena Umanskaya &amp; James Wilson. 2014. A Frequency Dictionary of Russian: Core Vocabulary for Learners. Routledge.</mixed-citation></ref><ref id="B59"><label>59.</label><mixed-citation>Solnyshkina, Marina I., Valery Solovyev, Vladimir Ivanov &amp; Andrey Danilov. 2018. Studying text complexity in Russian academic corpus with Multi-Level Annotation. CEUR WORKSHOP PROCEEDINGS. Proceedings of Computational Models in Language and Speech Workshop, co-located with the 15th TEL International Conference on Computational and Cognitive Linguistics, TEL 2018.</mixed-citation></ref><ref id="B60"><label>60.</label><mixed-citation>Solovyev, Valery, Marina Solnyshkina, Mariia Andreeva, Andrey Danilov &amp; Radif Zamaletdinov. 2020. Text complexity and abstractness: Tools for the Russian language. In International Conference "Internet and Modern Society" (IMS-2020). St. Petersburg, Russia: CEUR Proceedings. 75-87.</mixed-citation></ref><ref id="B61"><label>61.</label><mixed-citation>Solovyev, Valery, Marina I. Solnyshkina &amp; Vladimir Ivanov. 2018. Complexity of Russian academic texts as the function of syntactic parameters. In 19th International Conference on Computational Linguistics and Intelligent Text Processing, CICLing. Hanoi, Vietnam: Springer Lecture Notes in Computer Science.</mixed-citation></ref><ref id="B62"><label>62.</label><mixed-citation>Spearman, Carl. 1987. The proof and measurement of association between two things. The American Journal of Psychology 100(3/4). 441-471.</mixed-citation></ref><ref id="B63"><label>63.</label><mixed-citation>Vaswani, Ashish, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser &amp; Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems. Long Beach, CA, USA: Curran Associates, Inc. 5998-6008.</mixed-citation></ref><ref id="B64"><label>64.</label><mixed-citation>Vorontsov, Konstantin &amp; Anna Potapenko. 2015. Additive regularization of topic models. Machine Learning 101(1) 303-323.</mixed-citation></ref></ref-list></back></article>
