<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">RUDN Journal of Engineering Research</journal-id><journal-title-group><journal-title xml:lang="en">RUDN Journal of Engineering Research</journal-title><trans-title-group xml:lang="ru"><trans-title>Вестник Российского университета дружбы народов. Серия: Инженерные исследования</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2312-8143</issn><issn publication-format="electronic">2312-8151</issn><publisher><publisher-name xml:lang="en">Peoples’ Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">47081</article-id><article-id pub-id-type="doi">10.22363/2312-8143-2025-26-3-310-322</article-id><article-id pub-id-type="edn">AAMJLK</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Articles</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Статьи</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Prediction of Breast Cancer Using Machine Learning</article-title><trans-title-group xml:lang="ru"><trans-title>Прогнозирование рака груди с помощью машинного обучения</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0006-8425-2425</contrib-id><name-alternatives><name xml:lang="en"><surname>Uwingabiye</surname><given-names>Florence</given-names></name><name xml:lang="ru"><surname>Увингабийе</surname><given-names>Флоренс</given-names></name></name-alternatives><bio xml:lang="en"><p>Master student of the Department of Mechanics and Control Processes, Academy of Engineering</p></bio><bio xml:lang="ru"><p>магистрант кафедры механики и процессов управления, инженерная академия</p></bio><email>cyizashem@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0006-9831-042X</contrib-id><name-alternatives><name xml:lang="en"><surname>Kimenyi</surname><given-names>Thadee</given-names></name><name xml:lang="ru"><surname>Кимений</surname><given-names>Тади</given-names></name></name-alternatives><bio xml:lang="en"><p>Master student of the Department of Mechanics and Control Processes, Academy of Engineering</p></bio><bio xml:lang="ru"><p>магистрант кафедры механики и процессов управления, инженерная академия</p></bio><email>ki.thadee@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0003-6885-6235</contrib-id><name-alternatives><name xml:lang="en"><surname>Kimenyi</surname><given-names>Asaph</given-names></name><name xml:lang="ru"><surname>Кимений</surname><given-names>Асаф</given-names></name></name-alternatives><bio xml:lang="en"><p>Master student of the Department of Mechanics and Control Processes, Academy of Engineering</p></bio><bio xml:lang="ru"><p>магистрант кафедры механики и процессов управления, инженерная академия, инженерная академия</p></bio><email>asaph.rw@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-8824-1241</contrib-id><contrib-id contrib-id-type="spin">2920-9463</contrib-id><name-alternatives><name xml:lang="en"><surname>Kruglova</surname><given-names>Larisa V.</given-names></name><name xml:lang="ru"><surname>Круглова</surname><given-names>Лариса Владимировна</given-names></name></name-alternatives><bio xml:lang="en"><p>PhD in Technical Sciences, Associate Professor of the Department of Mechanics and Control Processes, Academy of Engineering</p></bio><bio xml:lang="ru"><p>кандидат технических наук, доцент кафедры механики и процессов управления, инженерная академия</p></bio><email>kruglova-lv@rudn.ru</email><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">RUDN University</institution></aff><aff><institution xml:lang="ru">Российский университет дружбы народов</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2025-11-11" publication-format="electronic"><day>11</day><month>11</month><year>2025</year></pub-date><volume>26</volume><issue>3</issue><issue-title xml:lang="en">VOL 26, NO3 (2025)</issue-title><issue-title xml:lang="ru">ТОМ 26, №3 (2025)</issue-title><fpage>310</fpage><lpage>322</lpage><history><date date-type="received" iso-8601-date="2025-11-11"><day>11</day><month>11</month><year>2025</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2025, Uwingabiye F., Kimenyi T., Kimenyi A., Kruglova L.V.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2025, Увингабийе Ф., Кимений Т., Кимений А., Круглова Л.В.</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="en">Uwingabiye F., Kimenyi T., Kimenyi A., Kruglova L.V.</copyright-holder><copyright-holder xml:lang="ru">Увингабийе Ф., Кимений Т., Кимений А., Круглова Л.В.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/engineering-researches/article/view/47081">https://journals.rudn.ru/engineering-researches/article/view/47081</self-uri><abstract xml:lang="en"><p>Breast cancer remains one of the leading causes of morbidity and mortality among women worldwide. Despite the global emphasis on early detection, breast cancer continues to pose a significant public health challenge. The object of this study is to predict the breast cancer risk using various machine-learning approaches based on demographic, laboratory, and mammographic data. It employed a quantitative research design to assess the potential of machine learning (ML) in predicting breast cancer. It integrated supervised ML algorithms, including Support Vector Machines (SVM), Decision Trees, Random Forests, and Deep Learning models, to evaluate their accuracy, efficiency, and applicability in medical diagnostics. The dataset revealed significant variability in tumor features such as mean radius, mean texture, mean perimeter, and mean area. The target variable demonstrated a class imbalance, with 62% benign and 38% malignant cases. Among the evaluated models, Random Forest outperformed others with the highest accuracy, precision, recall, F1-score, and ROC-AUC, indicating superior predictive capability. The Logistic Regression and Support Vector Machine models showed competitive performance, particularly in precision and recall, while the Decision Tree model exhibited the lowest overall performance across metrics.</p></abstract><trans-abstract xml:lang="ru"><p>Рак молочной железы остается одной из основных причин заболеваемости и смертности среди женщин во всем мире. Несмотря на значительные усилия, направленные на раннее выявление болезни, рак молочной железы по-прежнему представляет собой серьезную проблему для здоровья населения. Цель исследования - прогнозирование риска рака молочной железы с использованием различных подходов машинного обучения, основанных на демографических, лабораторных и маммографических данных. Использована модель количественных оценок методов машинного обучения в прогнозировании рака молочной железы. Модель интегрирует алгоритмы машинного обучения, включая метод опорных векторов, деревья решений, случайные леса и модели глубокого обучения, для оценки их точности, эффективности и применимости в медицинской диагностике. Набор данных выявил значительную изменчивость в параметрах опухоли, таких как средний радиус, средняя текстура, средний периметр и средняя площадь. Целевая переменная продемонстрировала дисбаланс классов, с 62 % доброкачественных и 38 % злокачественных случаев. Среди оцененных моделей Random Forest превзошла другие по наибольшей точности, чувствительности, полноте, F1-мере и площади под кривой операционных характеристик, указывая на наилучшую способность прогнозирования. Модели логистической регрессии и метода опорных векторов показали конкурентоспособность, особенно почувствительности и полноте, в то время как модель дерева решений продемонстрировала самую низкую общую эффективность по всем показателям.</p></trans-abstract><kwd-group xml:lang="en"><kwd>early detection</kwd><kwd>public health</kwd><kwd>tumor</kwd><kwd>mammography</kwd><kwd>medical diagnostics</kwd><kwd>machine-learning algorithms</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>ранняя диагностика</kwd><kwd>общественное здравоохранение</kwd><kwd>опухоль</kwd><kwd>маммография</kwd><kwd>медицинская диагностика</kwd><kwd>алгоритмы машинного обучения</kwd></kwd-group><funding-group/></article-meta><fn-group/></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Sung H, Siegel RL, Jemal A, Ferlay J, Laversanne M, Soerjomataram I, Bray F. Global cancer statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries. CA: A Cancer Journal for Clinicians. 2021;71(3):209-249. https://doi.org/10.3322/caac.21660 EDN: MRLXRI</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Bray F, Laversanne M, Sung H, Soerjomataram I, Siegel SL, Jemal A. Global cancer statistics 2022: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries. CA: A Cancer Journal for Clinicians. 2024;74(3):229-263. https://doi.org/10.3322/caac.21834</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Khalid A, Mehmood A, Alabrah A, Alkhamees BF, Amin F, AlSalman H, Choi GS. Breast cancer detection and prevention using machine learning. Diagnostics. 2023;13(19):3113. https://doi.org/10.3390/diagnostics13193113</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Davis FD. Perceived usefulness, perceived ease of use, and user acceptance of information technology. MIS Quarterly. 2019;13(3):319-340. https://doi.org/10.2307/249008</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Venkatesh V, Davis FD. A theoretical extension of the Technology Acceptance Model: Four longitudinal field studies. Management Science. 2000;46(2):186-204. https://doi.org/10.1287/mnsc.46.2.186.11926 EDN: FNVBJN</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Heaton JIG, Bengio Y, Courville A. Deep learning. Genet Program Evolvable. 2018;19:305-307. https://doi.org/10.1007/s10710-017-9314-z</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Wolberg W, Mangasarian O, Street N, Street W. Breast cancer wisconsin (Diagnostic). UCI Machine Learning Repository. 1993. https://doi.org/10.24432/C5DW2B</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Chen T, Guestrin C. XGBoost: A Scalable Tree Boosting System. Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2016:785-794. https://doi.org/10.1145/2939672.2939785</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Gupta V, Choudhary S. Multicollinearity and its impact on model accuracy. Journal of Data Science and Analytics. 2022;14(1):12-24.</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Hunter JD. Matplotlib: A 2D Graphics Environment. Computing in Science &amp; Engineering. 2017;9(3):90-95. https://doi.org/10.1109/MCSE.2007.55</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Shivakumar M, Kokila R, Likitha BS, Tharun N, Adishesha R. Breast cancer prediction. International Journal of Creative Research Thoughts. 2024;12(5):600-605. Available from: https://ijcrt.org/papers/IJCRTAB02087.pdf (accessed: 15.03.2025).</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Vlachas C, Damianos L, Gousetis N, Mouratidis I, Kelepouris D, Kollias K-F, Asimopoulos N, Fragulis GF. Random forest classification algorithm for medical industry data. The 4th ETLTC International Conference on ICT Integration in Technical Education (ETLTC2022). 2022;139:03008. https://doi.org/10.1051/shsconf/202213903008</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Tiwari A, Mishra S, Kuo TR. Current AI techno-logies in cancer diagnostics and treatment. Mol Cancer. 2025;24:159. https://doi.org/10.1186/s12943-025-02369-9</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Lopez-Miguel ID. Survey on preprocessing techniques for big data projects. Engineering Proceedings. 2021;7(1):14. https://doi.org/10.3390/engproc2021007014</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>IBM Research. Parallel processing in Random Forest models. IBM Technical Journal. 2023;58(3):125-140. https://doi.org/10.33022/ijcs.v13i2.3803</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Ljubic B, Pavlovski M, Gillespie A, Zoran Obradovic Z. Systematic review of supervised machine learning models in prediction of medical conditions. Medrxiv. 2022. https://doi.org/10.1101/2022.04.22.22274183</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Bell R, Martinez G. Machine learning for predictive healthcare: Techniquesand applications. Journal of Artificial Intelligence in Medicine. 2018;50(3):19-26. https://doi.org/10.1016/j.artmed.2018.03.003</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Kotsiantis SB, Kanellopoulos D, Pintelas PE. Data preprocessing for supervised learning. International Journal of Computer Science. 2006;1(1):111-117.</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>LeCun Y, Bengio Y, Hinton G. Deep learning. Nature. 2015;521(7553):436-444. https://doi.org/10.1038/nature14539</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Waskom ML, Botvinnik O, O'Kane D, Hobson P, Lukauskas S, Seaborn BM. Statistical data visualization. Journal of Open Source Software. 2020;5(52):2186. Available from: https://ui.adsabs.harvard.edu/abs/2020ascl.soft12015W/abstract (accessed: 15.03.2025).</mixed-citation></ref></ref-list></back></article>
