diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 2ec3a28..ca1bc65 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -1,5 +1,5 @@
plugins {
id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0"
}
-rootProject.name = "sitemap_utils"
+rootProject.name = "sitemap"
diff --git a/src/main/kotlin/Main.kt b/src/main/kotlin/Main.kt
index 72b2f96..dec628d 100644
--- a/src/main/kotlin/Main.kt
+++ b/src/main/kotlin/Main.kt
@@ -1,128 +1,14 @@
-package org.thundernetwork
+package org.thundernetwork.sitemap
-import java.io.File
-import java.time.LocalDateTime
-import java.time.format.DateTimeFormatter
-import javax.xml.parsers.DocumentBuilderFactory
-import javax.xml.transform.OutputKeys
-import javax.xml.transform.TransformerFactory
-import javax.xml.transform.dom.DOMSource
-import javax.xml.transform.stream.StreamResult
+import org.thundernetwork.sitemap.models.UrlEntry
-class SitemapGenerator(private val directoryPath: String) {
- private val docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder()
- private val transformer = TransformerFactory.newInstance().newTransformer().apply {
- setOutputProperty(OutputKeys.INDENT, "yes")
- setOutputProperty("{https://xml.apache.org/xslt}indent-amount", "2")
- }
- private val maxUrlsPerSitemap = 50000
- private var sitemapCount = 0
- private val sitemapIndexFile = File("$directoryPath/sitemap_index.xml")
+fun main() {
+ val urlEntries = listOf(
+ UrlEntry("https://www.example.com/", "2024-07-01", "monthly", 1.0),
+ UrlEntry("https://www.example.com/about", "2024-07-01", "monthly", 0.8),
+ UrlEntry("https://www.example.com/contact", "2024-07-01", "monthly", 0.8)
+ )
- init {
- if (!sitemapIndexFile.exists()) {
- val doc = docBuilder.newDocument()
- val rootElement = doc.createElement("sitemapindex")
- rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
- doc.appendChild(rootElement)
- saveDocument(doc, sitemapIndexFile)
- } else {
- val doc = docBuilder.parse(sitemapIndexFile)
- sitemapCount = doc.getElementsByTagName("sitemap").length
- }
- }
-
- private fun getSitemapFile(index: Int): File {
- return File("$directoryPath/sitemap_$index.xml")
- }
-
- private fun getDocument(file: File): org.w3c.dom.Document {
- return if (file.exists()) {
- docBuilder.parse(file)
- } else {
- val doc = docBuilder.newDocument()
- val rootElement = doc.createElement("urlset")
- rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
- doc.appendChild(rootElement)
- doc
- }
- }
-
- fun addUrl(url: String, lastmod: String? = null, changefreq: String? = null, priority: String? = null) {
- val currentSitemapFile = getSitemapFile(sitemapCount)
- var doc = getDocument(currentSitemapFile)
- var root = doc.documentElement
-
- if (root.getElementsByTagName("url").length >= maxUrlsPerSitemap) {
- sitemapCount++
- doc = getDocument(getSitemapFile(sitemapCount))
- root = doc.documentElement
- updateSitemapIndex()
- }
-
- val urlElement = doc.createElement("url")
- val locElement = doc.createElement("loc")
- locElement.textContent = url
- urlElement.appendChild(locElement)
-
- val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX")
- val now = LocalDateTime.now().format(formatter)
- val lastmodElement = doc.createElement("lastmod")
- lastmodElement.textContent = lastmod ?: now
- urlElement.appendChild(lastmodElement)
-
- changefreq?.let {
- val changefreqElement = doc.createElement("changefreq")
- changefreqElement.textContent = it
- urlElement.appendChild(changefreqElement)
- }
-
- priority?.let {
- val priorityElement = doc.createElement("priority")
- priorityElement.textContent = it
- urlElement.appendChild(priorityElement)
- }
-
- root.appendChild(urlElement)
- saveDocument(doc, currentSitemapFile)
- }
-
- fun removeUrl(url: String) {
- for (i in 0..sitemapCount) {
- val sitemapFile = getSitemapFile(i)
- if (sitemapFile.exists()) {
- val doc = getDocument(sitemapFile)
- val root = doc.documentElement
- val urlNodes = root.getElementsByTagName("url")
- for (j in 0 until urlNodes.length) {
- val urlNode = urlNodes.item(j)
- val locNode = urlNode.childNodes.item(1)
- if (locNode.textContent == url) {
- root.removeChild(urlNode)
- saveDocument(doc, sitemapFile)
- return
- }
- }
- }
- }
- }
-
- private fun updateSitemapIndex() {
- val doc = docBuilder.parse(sitemapIndexFile)
- val root = doc.documentElement
-
- val sitemapElement = doc.createElement("sitemap")
- val locElement = doc.createElement("loc")
- locElement.textContent = "sitemap_$sitemapCount.xml"
- sitemapElement.appendChild(locElement)
-
- root.appendChild(sitemapElement)
- saveDocument(doc, sitemapIndexFile)
- }
-
- private fun saveDocument(doc: org.w3c.dom.Document, file: File) {
- val source = DOMSource(doc)
- val result = StreamResult(file)
- transformer.transform(source, result)
- }
+ val generator = SitemapGenerator(urlEntries)
+ generator.generateSitemap("sitemap.xml")
}
diff --git a/src/main/kotlin/SitemapGenerator.kt b/src/main/kotlin/SitemapGenerator.kt
new file mode 100644
index 0000000..9c8e53e
--- /dev/null
+++ b/src/main/kotlin/SitemapGenerator.kt
@@ -0,0 +1,128 @@
+package org.thundernetwork.sitemap
+
+import org.thundernetwork.sitemap.models.UrlEntry
+import org.w3c.dom.Document
+import org.w3c.dom.Element
+import java.io.File
+import javax.xml.parsers.DocumentBuilderFactory
+import javax.xml.transform.OutputKeys
+import javax.xml.transform.TransformerFactory
+import javax.xml.transform.dom.DOMSource
+import javax.xml.transform.stream.StreamResult
+
+class SitemapGenerator(private val large: Boolean, private val path: String) {
+
+ private val urlEntries = mutableListOf()
+
+ fun loadSitemap(filePath: String) {
+ val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ val doc: Document = docFactory.parse(File(filePath))
+ val urlNodes = doc.getElementsByTagName("url")
+
+ for (i in 0 until urlNodes.length) {
+ val urlElement = urlNodes.item(i) as Element
+
+ val loc = urlElement.getElementsByTagName("loc").item(0).textContent
+ val lastmod = urlElement.getElementsByTagName("lastmod")?.item(0)?.textContent
+ val changefreq = urlElement.getElementsByTagName("changefreq")?.item(0)?.textContent
+ val priority = urlElement.getElementsByTagName("priority")?.item(0)?.textContent?.toDouble()
+
+ urlEntries.add(UrlEntry(loc, lastmod, changefreq, priority))
+ }
+ }
+
+ fun addUrl(urlEntry: UrlEntry) {
+ urlEntries.add(urlEntry)
+ }
+
+ fun removeUrl(loc: String) {
+ urlEntries.removeAll { it.loc == loc }
+ }
+
+ fun generateSitemap() {
+ if (large && urlEntries.size > 10000) {
+ generateLargeSitemap()
+ } else {
+ generateSingleSitemap(path)
+ }
+ }
+
+ private fun generateSingleSitemap(filePath: String) {
+ val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ val doc: Document = docFactory.newDocument()
+
+ // Root element
+ val urlset: Element = doc.createElement("urlset")
+ urlset.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
+ doc.appendChild(urlset)
+
+ for (urlEntry in urlEntries) {
+ val url: Element = doc.createElement("url")
+
+ val loc: Element = doc.createElement("loc")
+ loc.appendChild(doc.createTextNode(urlEntry.loc))
+ url.appendChild(loc)
+
+ urlEntry.lastmod?.let {
+ val lastmod: Element = doc.createElement("lastmod")
+ lastmod.appendChild(doc.createTextNode(it))
+ url.appendChild(lastmod)
+ }
+
+ urlEntry.changefreq?.let {
+ val changefreq: Element = doc.createElement("changefreq")
+ changefreq.appendChild(doc.createTextNode(it))
+ url.appendChild(changefreq)
+ }
+
+ urlEntry.priority?.let {
+ val priority: Element = doc.createElement("priority")
+ priority.appendChild(doc.createTextNode(it.toString()))
+ url.appendChild(priority)
+ }
+
+ urlset.appendChild(url)
+ }
+
+ // Write the content into XML file
+ val transformerFactory = TransformerFactory.newInstance()
+ val transformer = transformerFactory.newTransformer()
+ transformer.setOutputProperty(OutputKeys.INDENT, "yes")
+ val source = DOMSource(doc)
+ val result = StreamResult(File(filePath))
+ transformer.transform(source, result)
+ }
+
+ private fun generateLargeSitemap() {
+ val dir = File(path)
+ if (!dir.exists()) {
+ dir.mkdirs()
+ }
+
+ val numSitemaps = (urlEntries.size + 9999) / 10000
+ val sitemapIndexDocFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+ val sitemapIndexDoc: Document = sitemapIndexDocFactory.newDocument()
+ val sitemapIndex: Element = sitemapIndexDoc.createElement("sitemapindex")
+ sitemapIndex.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
+ sitemapIndexDoc.appendChild(sitemapIndex)
+
+ urlEntries.chunked(10000).forEachIndexed { index, chunk ->
+ val sitemapFilePath = "$path/sitemap-$index.xml"
+ generateSingleSitemap(sitemapFilePath)
+
+ val sitemap: Element = sitemapIndexDoc.createElement("sitemap")
+ val loc: Element = sitemapIndexDoc.createElement("loc")
+ loc.appendChild(sitemapIndexDoc.createTextNode("$path/sitemap-$index.xml"))
+ sitemap.appendChild(loc)
+ sitemapIndex.appendChild(sitemap)
+ }
+
+ val indexFilePath = "$path.xml"
+ val transformerFactory = TransformerFactory.newInstance()
+ val transformer = transformerFactory.newTransformer()
+ transformer.setOutputProperty(OutputKeys.INDENT, "yes")
+ val source = DOMSource(sitemapIndexDoc)
+ val result = StreamResult(File(indexFilePath))
+ transformer.transform(source, result)
+ }
+}
diff --git a/src/main/kotlin/models/UrlEntry.kt b/src/main/kotlin/models/UrlEntry.kt
new file mode 100644
index 0000000..9f03577
--- /dev/null
+++ b/src/main/kotlin/models/UrlEntry.kt
@@ -0,0 +1,8 @@
+package org.thundernetwork.sitemap.models
+
+data class UrlEntry(
+ val loc: String,
+ val lastmod: String? = null,
+ val changefreq: String? = null,
+ val priority: Double? = null
+)
\ No newline at end of file