From 773a5e5e8ade8f5a7e103131fdf1af88fbd8c72d Mon Sep 17 00:00:00 2001 From: Christian Torbidone Date: Thu, 1 Aug 2024 15:20:55 +0200 Subject: [PATCH] First Commit --- .idea/vcs.xml | 6 ++ settings.gradle.kts | 2 +- src/main/kotlin/Main.kt | 134 +++------------------------- src/main/kotlin/SitemapGenerator.kt | 128 ++++++++++++++++++++++++++ src/main/kotlin/models/UrlEntry.kt | 8 ++ 5 files changed, 153 insertions(+), 125 deletions(-) create mode 100644 .idea/vcs.xml create mode 100644 src/main/kotlin/SitemapGenerator.kt create mode 100644 src/main/kotlin/models/UrlEntry.kt diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts index 2ec3a28..ca1bc65 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -1,5 +1,5 @@ plugins { id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0" } -rootProject.name = "sitemap_utils" +rootProject.name = "sitemap" diff --git a/src/main/kotlin/Main.kt b/src/main/kotlin/Main.kt index 72b2f96..dec628d 100644 --- a/src/main/kotlin/Main.kt +++ b/src/main/kotlin/Main.kt @@ -1,128 +1,14 @@ -package org.thundernetwork +package org.thundernetwork.sitemap -import java.io.File -import java.time.LocalDateTime -import java.time.format.DateTimeFormatter -import javax.xml.parsers.DocumentBuilderFactory -import javax.xml.transform.OutputKeys -import javax.xml.transform.TransformerFactory -import javax.xml.transform.dom.DOMSource -import javax.xml.transform.stream.StreamResult +import org.thundernetwork.sitemap.models.UrlEntry -class SitemapGenerator(private val directoryPath: String) { - private val docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder() - private val transformer = TransformerFactory.newInstance().newTransformer().apply { - setOutputProperty(OutputKeys.INDENT, "yes") - setOutputProperty("{https://xml.apache.org/xslt}indent-amount", "2") - } - private val maxUrlsPerSitemap = 50000 - private var sitemapCount = 0 - private val sitemapIndexFile = File("$directoryPath/sitemap_index.xml") +fun main() { + val urlEntries = listOf( + UrlEntry("https://www.example.com/", "2024-07-01", "monthly", 1.0), + UrlEntry("https://www.example.com/about", "2024-07-01", "monthly", 0.8), + UrlEntry("https://www.example.com/contact", "2024-07-01", "monthly", 0.8) + ) - init { - if (!sitemapIndexFile.exists()) { - val doc = docBuilder.newDocument() - val rootElement = doc.createElement("sitemapindex") - rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9") - doc.appendChild(rootElement) - saveDocument(doc, sitemapIndexFile) - } else { - val doc = docBuilder.parse(sitemapIndexFile) - sitemapCount = doc.getElementsByTagName("sitemap").length - } - } - - private fun getSitemapFile(index: Int): File { - return File("$directoryPath/sitemap_$index.xml") - } - - private fun getDocument(file: File): org.w3c.dom.Document { - return if (file.exists()) { - docBuilder.parse(file) - } else { - val doc = docBuilder.newDocument() - val rootElement = doc.createElement("urlset") - rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9") - doc.appendChild(rootElement) - doc - } - } - - fun addUrl(url: String, lastmod: String? = null, changefreq: String? = null, priority: String? = null) { - val currentSitemapFile = getSitemapFile(sitemapCount) - var doc = getDocument(currentSitemapFile) - var root = doc.documentElement - - if (root.getElementsByTagName("url").length >= maxUrlsPerSitemap) { - sitemapCount++ - doc = getDocument(getSitemapFile(sitemapCount)) - root = doc.documentElement - updateSitemapIndex() - } - - val urlElement = doc.createElement("url") - val locElement = doc.createElement("loc") - locElement.textContent = url - urlElement.appendChild(locElement) - - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX") - val now = LocalDateTime.now().format(formatter) - val lastmodElement = doc.createElement("lastmod") - lastmodElement.textContent = lastmod ?: now - urlElement.appendChild(lastmodElement) - - changefreq?.let { - val changefreqElement = doc.createElement("changefreq") - changefreqElement.textContent = it - urlElement.appendChild(changefreqElement) - } - - priority?.let { - val priorityElement = doc.createElement("priority") - priorityElement.textContent = it - urlElement.appendChild(priorityElement) - } - - root.appendChild(urlElement) - saveDocument(doc, currentSitemapFile) - } - - fun removeUrl(url: String) { - for (i in 0..sitemapCount) { - val sitemapFile = getSitemapFile(i) - if (sitemapFile.exists()) { - val doc = getDocument(sitemapFile) - val root = doc.documentElement - val urlNodes = root.getElementsByTagName("url") - for (j in 0 until urlNodes.length) { - val urlNode = urlNodes.item(j) - val locNode = urlNode.childNodes.item(1) - if (locNode.textContent == url) { - root.removeChild(urlNode) - saveDocument(doc, sitemapFile) - return - } - } - } - } - } - - private fun updateSitemapIndex() { - val doc = docBuilder.parse(sitemapIndexFile) - val root = doc.documentElement - - val sitemapElement = doc.createElement("sitemap") - val locElement = doc.createElement("loc") - locElement.textContent = "sitemap_$sitemapCount.xml" - sitemapElement.appendChild(locElement) - - root.appendChild(sitemapElement) - saveDocument(doc, sitemapIndexFile) - } - - private fun saveDocument(doc: org.w3c.dom.Document, file: File) { - val source = DOMSource(doc) - val result = StreamResult(file) - transformer.transform(source, result) - } + val generator = SitemapGenerator(urlEntries) + generator.generateSitemap("sitemap.xml") } diff --git a/src/main/kotlin/SitemapGenerator.kt b/src/main/kotlin/SitemapGenerator.kt new file mode 100644 index 0000000..9c8e53e --- /dev/null +++ b/src/main/kotlin/SitemapGenerator.kt @@ -0,0 +1,128 @@ +package org.thundernetwork.sitemap + +import org.thundernetwork.sitemap.models.UrlEntry +import org.w3c.dom.Document +import org.w3c.dom.Element +import java.io.File +import javax.xml.parsers.DocumentBuilderFactory +import javax.xml.transform.OutputKeys +import javax.xml.transform.TransformerFactory +import javax.xml.transform.dom.DOMSource +import javax.xml.transform.stream.StreamResult + +class SitemapGenerator(private val large: Boolean, private val path: String) { + + private val urlEntries = mutableListOf() + + fun loadSitemap(filePath: String) { + val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder() + val doc: Document = docFactory.parse(File(filePath)) + val urlNodes = doc.getElementsByTagName("url") + + for (i in 0 until urlNodes.length) { + val urlElement = urlNodes.item(i) as Element + + val loc = urlElement.getElementsByTagName("loc").item(0).textContent + val lastmod = urlElement.getElementsByTagName("lastmod")?.item(0)?.textContent + val changefreq = urlElement.getElementsByTagName("changefreq")?.item(0)?.textContent + val priority = urlElement.getElementsByTagName("priority")?.item(0)?.textContent?.toDouble() + + urlEntries.add(UrlEntry(loc, lastmod, changefreq, priority)) + } + } + + fun addUrl(urlEntry: UrlEntry) { + urlEntries.add(urlEntry) + } + + fun removeUrl(loc: String) { + urlEntries.removeAll { it.loc == loc } + } + + fun generateSitemap() { + if (large && urlEntries.size > 10000) { + generateLargeSitemap() + } else { + generateSingleSitemap(path) + } + } + + private fun generateSingleSitemap(filePath: String) { + val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder() + val doc: Document = docFactory.newDocument() + + // Root element + val urlset: Element = doc.createElement("urlset") + urlset.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9") + doc.appendChild(urlset) + + for (urlEntry in urlEntries) { + val url: Element = doc.createElement("url") + + val loc: Element = doc.createElement("loc") + loc.appendChild(doc.createTextNode(urlEntry.loc)) + url.appendChild(loc) + + urlEntry.lastmod?.let { + val lastmod: Element = doc.createElement("lastmod") + lastmod.appendChild(doc.createTextNode(it)) + url.appendChild(lastmod) + } + + urlEntry.changefreq?.let { + val changefreq: Element = doc.createElement("changefreq") + changefreq.appendChild(doc.createTextNode(it)) + url.appendChild(changefreq) + } + + urlEntry.priority?.let { + val priority: Element = doc.createElement("priority") + priority.appendChild(doc.createTextNode(it.toString())) + url.appendChild(priority) + } + + urlset.appendChild(url) + } + + // Write the content into XML file + val transformerFactory = TransformerFactory.newInstance() + val transformer = transformerFactory.newTransformer() + transformer.setOutputProperty(OutputKeys.INDENT, "yes") + val source = DOMSource(doc) + val result = StreamResult(File(filePath)) + transformer.transform(source, result) + } + + private fun generateLargeSitemap() { + val dir = File(path) + if (!dir.exists()) { + dir.mkdirs() + } + + val numSitemaps = (urlEntries.size + 9999) / 10000 + val sitemapIndexDocFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder() + val sitemapIndexDoc: Document = sitemapIndexDocFactory.newDocument() + val sitemapIndex: Element = sitemapIndexDoc.createElement("sitemapindex") + sitemapIndex.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9") + sitemapIndexDoc.appendChild(sitemapIndex) + + urlEntries.chunked(10000).forEachIndexed { index, chunk -> + val sitemapFilePath = "$path/sitemap-$index.xml" + generateSingleSitemap(sitemapFilePath) + + val sitemap: Element = sitemapIndexDoc.createElement("sitemap") + val loc: Element = sitemapIndexDoc.createElement("loc") + loc.appendChild(sitemapIndexDoc.createTextNode("$path/sitemap-$index.xml")) + sitemap.appendChild(loc) + sitemapIndex.appendChild(sitemap) + } + + val indexFilePath = "$path.xml" + val transformerFactory = TransformerFactory.newInstance() + val transformer = transformerFactory.newTransformer() + transformer.setOutputProperty(OutputKeys.INDENT, "yes") + val source = DOMSource(sitemapIndexDoc) + val result = StreamResult(File(indexFilePath)) + transformer.transform(source, result) + } +} diff --git a/src/main/kotlin/models/UrlEntry.kt b/src/main/kotlin/models/UrlEntry.kt new file mode 100644 index 0000000..9f03577 --- /dev/null +++ b/src/main/kotlin/models/UrlEntry.kt @@ -0,0 +1,8 @@ +package org.thundernetwork.sitemap.models + +data class UrlEntry( + val loc: String, + val lastmod: String? = null, + val changefreq: String? = null, + val priority: Double? = null +) \ No newline at end of file